In [34]:
# example from: https://machinelearningmastery.com/tutorial-to-implement-k-nearest-neighbors-in-python-from-scratch/
# github: https://github.com/madhug-nadig/Machine-Learning-Algorithms-from-Scratch/blob/master/K%20Means%20Clustering.py

In [35]:
import import_ipynb
from sklearn.datasets import load_iris
from sklearn.utils import shuffle
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, accuracy_score
import pandas as pd
import numpy as np
from math import sqrt
from numpy.linalg import inv
from scipy.interpolate import lagrange
import random
from network import NetworkNode, NetworkShare, merge, reconstruct
from smpc_secrets import ShamirSecretSharing, AdditiveSecretSharing, Vandermonde, P, RandPoly


In [36]:
# calculate the Euclidean distance between two vectors
# each row is the set of feataures for a node
def euclidean_distance(row1, row2):
	distance = 0.0
	for i in range(len(row1)):
		distance += (row1[i] - row2[i])**2
	return sqrt(distance)

In [37]:
def generate_functions(features_arr, R_set=[]):
    all_functions = []
    for feature in range(len(features_arr)):
        # if R_set == []:
        func = RandPoly(name=f"f{feature}", n=2, 
                        R=[(i,x) for i, x in enumerate(list(
            [features_arr[feature],random.randint(2,250),random.randint(2,250)]))])
        all_functions.append(func)
        # else:
        #     r_arr = R_set[feature]
        #     func = RandPoly(name=f"f{feature}", n=2, 
        #                     R=r_arr)
        #     all_functions.append(func)
    return all_functions

def generate_shares(func_array, node_id):
    shares = []
    for func in func_array:
        shares.append(func(node_id))
    return shares

def get_feature_distances(arr1, arr2):
    distances = []
    for feature_a, feature_b in zip(arr1, arr2):
        dist = (feature_a-feature_b)**2
        distances.append(dist)
    return distances

def sum_distances(arr):
    return sum(arr)

def reconstruct(shares):
    x = np.arange(1, len(shares) + 1)
    y = shares
    f = lagrange(x, y)
    return f(0)
    
def simplified_calc(alice_data, bob_data):
    # Alice's functions
    assert len(alice_data) == len(bob_data), "feature length mismatch!"

    # get private functions
    alice_functions_randpoly = [RandPoly(n=2, R=[(0,alice_data[0]),(1,3),(2,5)]),
                       RandPoly(n=2, R=[(0,alice_data[1]),(1,4),(2,6)])]
    bob_functions_randpoly = [RandPoly(n=2, R=[(0,bob_data[0]),(1,7),(2,9)]),
                       RandPoly(n=2, R=[(0,bob_data[1]),(1,8),(2,10)])]
    # alice_functions_randpoly = generate_functions(alice_data)
    # bob_functions_randpoly = generate_functions(bob_data)

    alice_functions = [f.poly for f in alice_functions_randpoly]
    bob_functions = [f.poly for f in bob_functions_randpoly]

    # get shares
    alice_personal_shares = generate_shares(alice_functions, 1)
    alice_from_bob = generate_shares(bob_functions, 1)

    bob_personal_shares = generate_shares(bob_functions, 2)
    bob_from_alice = generate_shares(alice_functions, 2)

    server_from_alice = generate_shares(alice_functions, 3)
    server_from_bob = generate_shares(bob_functions, 3)

    server_from_alice_2 = generate_shares(alice_functions, 4)
    server_from_bob_2 = generate_shares(bob_functions, 4)

    server_from_alice_3 = generate_shares(alice_functions, 5)
    server_from_bob_3 = generate_shares(bob_functions, 5)

    server_from_alice_4 = generate_shares(alice_functions, 6)
    server_from_bob_4 = generate_shares(bob_functions, 6)

    server_from_alice_5 = generate_shares(alice_functions, 7)
    server_from_bob_5 = generate_shares(bob_functions, 7)

    server_from_alice_6 = generate_shares(alice_functions, 8)
    server_from_bob_6 = generate_shares(bob_functions, 8)

    # compute differences for each feature
    alice_diffrences = get_feature_distances(alice_personal_shares, alice_from_bob)
    bob_differences = get_feature_distances(bob_personal_shares, bob_from_alice)
    server_differences = get_feature_distances(server_from_alice, server_from_bob)
    server_differences_2 = get_feature_distances(server_from_alice_2, server_from_bob_2)
    server_differences_3 = get_feature_distances(server_from_alice_3, server_from_bob_3)
    server_differences_4 = get_feature_distances(server_from_alice_4, server_from_bob_4)
    server_differences_5 = get_feature_distances(server_from_alice_5, server_from_bob_5)
    server_differences_6 = get_feature_distances(server_from_alice_6, server_from_bob_6)

    # get sum of distances
    alice_sum = sum_distances(alice_diffrences)
    bob_sum = sum_distances(bob_differences)
    server_sum = sum_distances(server_differences)
    server_sum_2 = sum_distances(server_differences_2)
    server_sum_3 = sum_distances(server_differences_3)
    server_sum_4 = sum_distances(server_differences_4)
    server_sum_5 = sum_distances(server_differences_5)
    server_sum_6 = sum_distances(server_differences_6)
    

    print(alice_sum, bob_sum, server_sum, server_sum_2)


    dist_squared = reconstruct([alice_sum, bob_sum, server_sum, server_sum_2, server_sum_3])

    alice_bob_distance = np.sqrt(abs(dist_squared))

    return alice_bob_distance

In [40]:
# a = [0,0]
# b = [3,3]

a = [0,4]
b = [3,11]

print("regular calc: ", euclidean_distance(a,b), "\n")
simplified_calc(a,b)


regular calc:  7.615773105863909 

346 1690 5626 14458


7.61577310586367