In [1]:
# example from: https://machinelearningmastery.com/tutorial-to-implement-k-nearest-neighbors-in-python-from-scratch/
# github: https://github.com/madhug-nadig/Machine-Learning-Algorithms-from-Scratch/blob/master/K%20Means%20Clustering.py

In [5]:
import import_ipynb

from sklearn.datasets import load_iris
from sklearn.utils import shuffle
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, accuracy_score
import pandas as pd
import numpy as np
from math import sqrt
import os
os.chdir("c:\\Users\\swart\\Desktop\\secure-mpc-main\\secure_mpc_main")
from network import NetworkNode, NetworkShare, merge, reconstruct


In [6]:
# get iris dataset
X_class,y_class = load_iris().data, load_iris().target

# we just want binary classification
X_class = X_class[:100]
y_class = y_class[:100]

X_class,y_class = shuffle(X_class, y_class, random_state=20)

x_df = pd.DataFrame(X_class)
y_df = pd.DataFrame(y_class)

In [7]:
# separate data for alice, bob, server
X_class_server, y_class_server = X_class[:90], y_class[:90]
X_class_alice, y_class_alice = X_class[90:95], y_class[90:95]
X_class_bob, y_class_bob = X_class[95:100], y_class[95:100]

In [8]:
# calculate the Euclidean distance between two vectors
# each row is the set of feataures for a node
def euclidean_distance(row1, row2):
	distance = 0.0
	for i in range(len(row1)):
		distance += (row1[i] - row2[i])**2
	return sqrt(distance)

In [10]:
def secure_euclidean_distance(data1, data2):
    node1 = NetworkShare("Node1", node_id=1, k=3)
    node2 = NetworkShare("Node2", node_id=2, k=3)
    server = NetworkShare("Server", node_id=3, k=3)

    node1_shares = node1.create_shares(data=data1)
    node2_shares = node2.create_shares(data=data2)
    server_shares = server.create_shares([0]*len(data1))

    print(node1_shares, node2_shares, server_shares)
    
    node1_received_from_node2 = node2.get_shares_for(node_id=1, share_type="f")
    node1_received_from_server = server.get_shares_for(node_id=1, share_type="f")
    node2_received_from_node1 = node1.get_shares_for(node_id=2, share_type="f") 
    node2_received_from_server = server.get_shares_for(node_id=2, share_type="f")
    server_received_from_node1 = node1.get_shares_for(node_id=3, share_type="f")
    server_received_from_node2 = node2.get_shares_for(node_id=3, share_type="f")

    node1_merged = node1.merge_shares(shares=[node1_received_from_node2, node1_received_from_server], by=merge)
    node2_merged = node2.merge_shares(shares=[node2_received_from_node1, node2_received_from_server], by=merge)   
    server_merged = node2.merge_shares(shares=[server_received_from_node1, server_received_from_node2], by=merge)  

    print(node1_merged, node2_merged, server_merged) 

    distance = reconstruct([node1_merged, node2_merged, server_merged])
    print(distance)

    return np.sqrt(abs(distance))
    


In [11]:
# Locate the most similar neighbors
def get_neighbors(X_train, y_train, test_row, num_neighbors):
	distances = list()
	for i in range(len(X_train)):
		dist = secure_euclidean_distance(X_train[i], test_row)
		distances.append((X_train[i], dist, y_train[i]))
	distances.sort(key=lambda tup: tup[1])
	neighbors = list()
	for i in range(num_neighbors):
		neighbors.append([distances[i][0], distances[i][2]])
	return neighbors

In [12]:
# Make a classification prediction with neighbors
def predict_classification(X_train, y_train, test_row, num_neighbors):
	neighbors = get_neighbors(X_train, y_train, test_row, num_neighbors)
	output_values = [row[-1] for row in neighbors]
	prediction = max(set(output_values), key=output_values.count)
	return prediction

In [17]:
a = [0,0]
b = [3,3]

print("regular calc: ", euclidean_distance(a,b), "\n")
secure_euclidean_distance(a,b)


regular calc:  4.242640687119285 

[1300 1272] [1271 1965] [ 336 2586]
2053125 6475932 14354627
1086206.0


1042.2120705499433

In [74]:
alice = NetworkShare("Alice", node_id=1, k=2)
alice_shares = alice.create_shares(data=[4,2])
print("Alice shares:", alice_shares)

# server = NetworkNode("Server", node_id=2, k=2)
# # server_shares = server.create_shares(data = b)
# # print("Server's shares:", server_shares)

# alice_received_from_server = server.get_shares_for(node_id=1, share_type="f")
# server_received_from_alice = alice.get_shares_for(node_id=2, share_type="f")

# # alice_received_from_server = server.get_shares_for(node_id=1, share_type="f")
# # # print(alice.merge_shares([alice_shares[1], server_shares[1]]))
# # # # alice.get_shares('d')

# # alice.merge_shares(shares=alice_received_from_server, by=merge)

# alice_merged = alice.merge_shares(shares=alice_received_from_server, by=merge)
# server_merged = server.merge_shares(shares=server_received_from_alice, by=merge)


# reconstruct(shares=[alice_merged, server_merged])
# server = NetworkNode("Server", k=3, node_id=2, port=9997)

# server_received_from_alice = alice.get_shares_for(node_id=2, share_type="f")
# print(f"server received: {server_received_from_alice}")
# distance = server.merge_shares_with(shares=server_received_from_alice, own_shares=server.get_shares_for(node_id=2, share_type="f"))

# # distance = int(server.merge_shares(node_id=1, shares=server_received_from_alice, share_type="f").get('data'))
# print("distance:", distance)




Alice shares: [ 156 1131]


In [12]:
alice = NetworkShare("Alice", node_id=1, k=2)
alice_shares = alice.create_shares(data=[4,2])
print("Alice shares:", alice_shares)
server_received_from_alice = alice.get_shares_for(node_id=2, share_type="f")
print(server_received_from_alice)


Alice shares: [1141 1062]
[2278 2122]


In [13]:
server = NetworkShare("Server", node_id=2, k=2)
alice_received_from_server = server.get_shares_for(node_id=2, share_type="f")
print(f"received: {alice_received_from_server}")
# alice.merge_shares(shares=alice_received_from_bob, by=merge)
# print(f"alice shares: {alice.shares}")
# print(f"alice d: {alice.get_shares('d')}")

ConnectionRefusedError: [WinError 10061] No connection could be made because the target machine actively refused it

In [None]:
# Alice submits her shares in a secure way



# Server calcs shares and distance



# Server returns predictions for Alice




In [None]:
class ShamirShare:
    def __init__(self,id, t, n, secrets):
        self.computations = []
        self.all_shares = []
        self.id = id
        for secret in secrets:
            shamir = ShamirSecretSharing(n=n, t=t)
            shares = shamir.generate(s=secret)
            self.all_shares.append(shares)
            self.computations.append((shamir, shares))
        self.shares = self.get_share(id)
        
    
    def get_share(self,id):
        return [shares[id][1] for shares in self.all_shares]
        
    def reconstruct(self):
        secrets = []
        for shamir, shares in self.computations:
            secrets.append(shamir.reconstruct(shares=shares))
        return secrets
    
    def _compute(self, other, op):
        assert len(self.shares) == len(other)
        for i in range(len(self.shares)):
            self.shares[i] = op(self.shares[i], other[i])

In [None]:

class CustomKNN:
	
	def __init__(self):
		self.accurate_predictions = 0
		self.total_predictions = 0
		self.accuracy = 0.0

	def predict(self, training_data, to_predict, k = 3):
		if len(training_data) >= k:
			print("K cannot be smaller than the total voting groups(ie. number of training data points)")
			return
		
		distributions = []
		for group in training_data:
			for features in training_data[group]:
				euclidean_distance = np.linalg.norm(np.array(features)- np.array(to_predict))
				distributions.append([euclidean_distance, group])
		
		results = [i[1] for i in sorted(distributions)[:k]]
		result = Counter(results).most_common(1)[0][0]
		confidence = Counter(results).most_common(1)[0][1]/k
		
		return result, confidence
	
	def test(self, test_set, training_set):
		for group in test_set:
			for data in test_set[group]:
				predicted_class,confidence = self.predict(training_set, data, k =3)
				if predicted_class == group:
					self.accurate_predictions += 1
				else:
					print("Wrong classification with confidence " + str(confidence * 100) + " and class " + str(predicted_class))
				self.total_predictions += 1
		self.accuracy = 100*(self.accurate_predictions/self.total_predictions)
		print("\nAcurracy :", str(self.accuracy) + "%")

In [None]:
def knn_from_scratch_secure(X_train, X_test, y_train, y_test, num_neighbors):
	y_pred = []
	for test_row in X_test:
		y_pred.append(predict_classification(X_train, y_train, test_row, num_neighbors))

	f1_binary = f1_score(y_test, y_pred, average="binary")
	accuracy = accuracy_score(y_test, y_pred)

	# print("ypred:", y_pred)
	# print("ytest:", y_test)

	return f1_binary, accuracy

Unnamed: 0,0
0,2
1,2
2,1
3,2
4,0
5,0
6,1
7,0
8,2
9,0


In [None]:
#testing

numFolds = 10
num_neighbors = 9
stratifiedKFold = StratifiedKFold(
    n_splits=numFolds, shuffle=True, random_state=86
)

count = 1
avgF1 = 0
avgAcc = 0

X = X_class
y = y_class

for train_index, test_index in stratifiedKFold.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    f1, acc = knn_from_scratch(X_train, X_test, y_train, y_test, num_neighbors)
    avgF1 += f1
    avgAcc += acc

    # print(f"\tFold {count}:: Average accuracy: {avgAcc}, Average F1 score: {avgF1}")

    count += 1

avgF1 = avgF1/numFolds
avgAcc = avgAcc/numFolds

print(f"Average accuracy: {avgAcc}, Average F1 score: {avgF1}")

In [None]:
def main():
	#20% of the available data will be used for testing
	test_size = 0.2

	#The keys of the dict are the classes that the data is classfied into
	training_set = {2: [], 4:[]}
	test_set = {2: [], 4:[]}
	
	#Split data into training and test for cross validation
	training_data = x_df[:-int(test_size * len(x_df))]
	test_data = y_df[-int(test_size * len(y_df)):]
	
	#Insert data into the training set
	for record in training_data:
		training_set[record[-1]].append(record[:-1]) # Append the list in the dict will all the elements of the record except the class

	#Insert data into the test set
	for record in test_data:
		test_set[record[-1]].append(record[:-1]) # Append the list in the dict will all the elements of the record except the class

	# split into test and training sets

	


	s = time.clock()
	knn = CustomKNN()
	knn.test(test_set, training_set)
	e = time.clock()
	print("Exec Time:" ,e-s)