In [1]:
# example from: https://machinelearningmastery.com/tutorial-to-implement-k-nearest-neighbors-in-python-from-scratch/
# github: https://github.com/madhug-nadig/Machine-Learning-Algorithms-from-Scratch/blob/master/K%20Means%20Clustering.py

In [1]:
import import_ipynb

from sklearn.datasets import load_iris
from sklearn.utils import shuffle
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, accuracy_score
import pandas as pd
import numpy as np
from math import sqrt
from numpy.linalg import inv
from scipy.interpolate import lagrange
import random
import os
os.chdir("c:\\Users\\swart\\Desktop\\secure-mpc-main\\secure_mpc_main")
from network import NetworkNode, NetworkShare, merge, reconstruct
from smpc_secrets import ShamirSecretSharing, AdditiveSecretSharing, Vandermonde, P, RandPoly


In [6]:
# get iris dataset
X_class,y_class = load_iris().data, load_iris().target

# we just want binary classification
X_class = X_class[:100]
y_class = y_class[:100]

X_class,y_class = shuffle(X_class, y_class, random_state=20)

x_df = pd.DataFrame(X_class)
y_df = pd.DataFrame(y_class)

In [7]:
# separate data for alice, bob, server
X_class_server, y_class_server = X_class[:90], y_class[:90]
X_class_alice, y_class_alice = X_class[90:95], y_class[90:95]
X_class_bob, y_class_bob = X_class[95:100], y_class[95:100]

In [4]:
# calculate the Euclidean distance between two vectors
# each row is the set of feataures for a node
def euclidean_distance(row1, row2):
	distance = 0.0
	for i in range(len(row1)):
		distance += (row1[i] - row2[i])**2
	return sqrt(distance)

In [14]:
def secure_euclidean_distance(data1, data2):
    node1 = NetworkShare("Node1", node_id=1, k=3)
    node2 = NetworkShare("Node2", node_id=2, k=3)
    server = NetworkNode("Server", node_id=3, k=3)

    node1_shares = node1.create_shares(data=data1)
    node2_shares = node2.create_shares(data=data2)
    # server_shares = server.create_shares([0]*len(data1))

    print(node1_shares, node2_shares)
    
    node1_received_from_node2 = node2.get_shares_for(node_id=1, share_type="f")
    # node1_received_from_server = server.get_shares_for(node_id=1, share_type="f")
    node2_received_from_node1 = node1.get_shares_for(node_id=2, share_type="f") 
    # node2_received_from_server = server.get_shares_for(node_id=2, share_type="f")
    server_received_from_node1 = node1.get_shares_for(node_id=3, share_type="f")
    server_received_from_node2 = node2.get_shares_for(node_id=3, share_type="f")

    # node1_merged = node1.merge_shares(shares=[node1_received_from_node2, node1_received_from_server], by=merge)
    # node2_merged = node2.merge_shares(shares=[node2_received_from_node1, node2_received_from_server], by=merge)   
    node1_merged = node1.merge_shares(shares=node1_received_from_node2, by=merge)
    node2_merged = node2.merge_shares(shares=node2_received_from_node1, by=merge)   
    server_merged = node2.merge_shares_with(shares=[server_received_from_node1, server_received_from_node2], by=merge)  

    print(node1_merged, node2_merged, server_merged) 

    distance = reconstruct([node1_merged, node2_merged, server_merged])
    print(distance)

    return np.sqrt(abs(distance))

In [5]:
# Locate the most similar neighbors
def get_neighbors(X_train, y_train, test_row, num_neighbors):
	distances = list()
	for i in range(len(X_train)):
		dist = secure_euclidean_distance(X_train[i], test_row)
		distances.append((X_train[i], dist, y_train[i]))
	distances.sort(key=lambda tup: tup[1])
	neighbors = list()
	for i in range(num_neighbors):
		neighbors.append([distances[i][0], distances[i][2]])
	return neighbors

In [6]:
# Make a classification prediction with neighbors
def predict_classification(X_train, y_train, test_row, num_neighbors):
	neighbors = get_neighbors(X_train, y_train, test_row, num_neighbors)
	output_values = [row[-1] for row in neighbors]
	prediction = max(set(output_values), key=output_values.count)
	return prediction

In [20]:
def generate_functions(features_arr):
    all_functions = []
    for feature in range(len(features_arr)):
        func = RandPoly(name=f"f{feature}", n=1, 
                        R=[(i,x) for i, x in enumerate(list(
            [features_arr[feature],random.randint(2,250)]))]).poly
        all_functions.append(func)
    return all_functions

def generate_shares(func_array, node_id):
    shares = []
    for func in func_array:
        shares.append(func(node_id))
    return shares

def get_feature_distances(arr1, arr2):
    distances = []
    for feature_a, feature_b in zip(arr1, arr2):
        dist = (feature_a-feature_b)**2
        distances.append(dist)
    return distances

def sum_distances(arr):
    return sum(arr)

def reconstruct(shares):
    x = np.arange(1, len(shares) + 1)
    y = shares
    f = lagrange(x, y)
    return f(0)
    
def simplified_calc(alice_data, bob_data):
    # Alice's functions
    assert len(alice_data) == len(bob_data), "feature length mismatch!"

    # get private functions
    alice_functions = generate_functions(alice_data)
    bob_functions = generate_functions(bob_data)

    # get shares
    alice_personal_shares = generate_shares(alice_functions, 1)
    alice_from_bob = generate_shares(bob_functions, 1)

    bob_personal_shares = generate_shares(bob_functions, 2)
    bob_from_alice = generate_shares(alice_functions, 2)

    server_from_alice = generate_shares(alice_functions, 3)
    server_from_bob = generate_shares(bob_functions, 3)

    # compute distance for each feature
    alice_distances = get_feature_distances(alice_personal_shares, alice_from_bob)
    bob_distances = get_feature_distances(bob_personal_shares, bob_from_alice)
    server_distances = get_feature_distances(server_from_alice, server_from_bob)

    # get sum of distances
    alice_sum = sum_distances(alice_distances)
    bob_sum = sum_distances(bob_distances)
    server_sum = sum_distances(server_distances)

    print(alice_sum, bob_sum, server_sum)


    dist_squared = reconstruct([alice_sum, bob_sum, server_sum])

    alice_bob_distance = np.sqrt(dist_squared)

    return alice_bob_distance

In [27]:
a = [0,0]
b = [3,3]

print("regular calc: ", euclidean_distance(a,b), "\n")
simplified_calc(a,b)


regular calc:  4.242640687119285 

5641 23434 53397


4.242640687119285