<a href="https://colab.research.google.com/github/md-hussain28/ArkhamAthletics/blob/main/Another_copy_of_Edit_of_Final_copy_of_Homomorphic_Encryption_and_Federated_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install phe



In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes, load_breast_cancer
import phe as paillier

In [None]:
import hashlib
import time

In [None]:
class Block:
    def __init__(self, index, timestamp, about, data, loss, previous_hash):
        self.index = index
        self.timestamp = timestamp
        self.about = about
        self.data = data
        self.loss = loss
        self.previous_hash = previous_hash
        self.hash = self.calculate_hash()

    def calculate_hash(self):
        block_string = str(self.index) + str(self.timestamp) + str(self.data) + str(self.previous_hash)
        return hashlib.sha256(block_string.encode('utf-8')).hexdigest()

In [None]:
class Blockchain:
    def __init__(self):
        self.chain = [self.create_genesis_block()]

    def create_genesis_block(self):
        return Block(0, int(time.time()), "Genesis Block", "Genesis Block", "", "0")

    def add_block(self, new_block):
        new_block.previous_hash = self.chain[-1].hash
        new_block.hash = new_block.calculate_hash()
        self.chain.append(new_block)

In [None]:
blockchain = Blockchain()
blocknumber = 1

In [None]:
from sklearn.preprocessing import LabelEncoder

def get_data(n_clients, data_frame):
    print("Loading data")
    y = data_frame.target
    X = data_frame.data
    print(data_frame.feature_names)

    # Encoding categorical columns
    label_encoders = {}
    for i, col in enumerate(data_frame.feature_names):
        if not np.issubdtype(X[:, i].dtype, np.number):  # Check if data type is not numeric
            le = LabelEncoder()
            X[:, i] = le.fit_transform(X[:, i])
            label_encoders[col] = le

    X = np.c_[X, np.ones(X.shape[0])]

    # Shuffle
    perm = np.random.permutation(X.shape[0])
    X, y = X[perm, :], y[perm]

    # Test selected at random
    test_size = 50
    test_idx = np.random.choice(X.shape[0], size=test_size, replace=False)
    train_idx = np.ones(X.shape[0], dtype=bool)
    train_idx[test_idx] = False
    X_test, y_test = X[test_idx, :], y[test_idx]
    X_train, y_train = X[train_idx, :], y[train_idx]

    X, y = [], []
    step = int(X_train.shape[0] / n_clients)
    for c in range(n_clients):
        X.append(X_train[step * c: step * (c + 1), :])
        y.append(y_train[step * c: step * (c + 1)])

    return X, y, X_test, y_test, label_encoders


In [None]:
def mean_square_error(y_pred, y):
    return np.mean((y - y_pred) ** 2)

In [None]:
def binary_cross_entropy(y_pred, y):
    epsilon = 1e-15  # Small constant to avoid numerical instability
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)  # Clip predicted values to avoid taking the log of 0
    bce_loss = -(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))  # Binary Cross-Entropy formula
    return np.mean(bce_loss)  # Return the mean of the BCE loss across all samples


In [None]:
def encrypt_vector(public_key, x):
    return [public_key.encrypt(i) for i in x] #encyption usin Paillier

In [None]:
def decrypt_vector(private_key, x):
    return np.array([private_key.decrypt(i) for i in x])

In [None]:
def sum_encrypted_vectors(x, y):
    if len(x) != len(y):
        raise ValueError('Encrypted vectors must have the same size')
    return [x[i] + y[i] for i in range(len(x))]

In [None]:
def printResult(X, y, y_pred, model, frame):
    X = pd.DataFrame(X)
    y = pd.DataFrame(y)
    y_pred = pd.DataFrame(y_pred)
    # features = frame.feature_names
    # if model == 'cls':
    #   features.tolist()
    # # features = features.tolist()
    # cols = X.shape[1]
    # if len(features) != cols:
    #   features.append("bias term")
    # X.columns = features
    result = X.reset_index()
    result = result.assign(actual=y)
    result = result.assign(predicted=y_pred)
    print(result)

In [None]:
def addPatientName(y_pred):
    # y = ['Patient {}'.format(i) for i in range(1, n_clients + 1)]
    patients = len(y_pred)
    y = []
    for i in range(1, patients+1):
        y.append(["Patient " + str(i), y_pred[i-1]])
    return y

# Server

In [None]:
class Server:

    def __init__(self, key_length):
        keypair = paillier.generate_paillier_keypair(n_length=key_length)
        self.pubkey, self.privkey = keypair

    def decrypt_aggregate(self, input_model, n_clients):
        return decrypt_vector(self.privkey, input_model) / n_clients

# Client

In [None]:
class Client:
    def __init__(self, name, X, y, pubkey, task):
        self.name = name
        self.pubkey = pubkey
        self.X, self.y = X, y
        self.weights = np.zeros(X.shape[1])
        self.task = task

    def fit(self, n_iter, eta=0.01):
        for _ in range(n_iter):
            gradient = self.compute_gradient()
            self.gradient_step(gradient, eta)

    def gradient_step(self, gradient, eta=0.01):
        # updation of model with the given gradient
        self.weights -= eta * gradient

    def compute_gradient(self):
        if self.task == 'reg':  # Regression task
            delta = self.predict(self.X) - self.y
            return delta.dot(self.X)
        elif self.task == 'cls':  # Classification task
            delta = self.predict_proba(self.X) - self.y
            return delta.dot(self.X)

    def predict(self, X):
        return X.dot(self.weights)

    def predict_proba(self, X):
        # For binary classification, apply sigmoid function
        # For multiclass classification, apply softmax function
        # Here, let's assume binary classification
        return 1 / (1 + np.exp(-X.dot(self.weights)))

    def encrypted_gradient(self, sum_to=None):
        gradient = self.compute_gradient()
        encrypted_gradient = encrypt_vector(self.pubkey, gradient)

        if sum_to is not None:
            return sum_encrypted_vectors(sum_to, encrypted_gradient)
        else:
            return encrypted_gradient

# Machine Learning


In [None]:
def federated_learning(n_iter, eta, n_clients, key_length, data_frame):
    blocknumber = 1
    cluster_number = 1
    for name, frame, task in data_frame:
        print(f'Working on Dataset: {name}; current task: {task}')
        names = ['Client {}'.format(i) for i in range(1, n_clients + 1)]

        X, y, X_test, y_test, label_encoder = get_data(n_clients=n_clients, data_frame = frame)

        # Instantiate the server and generate private and public keys
        server = Server(key_length=key_length)

        # Instantiate the clients.
        clients = []
        for i in range(n_clients):
            clients.append(Client(names[i], X[i], y[i], server.pubkey, task))

        print('Loss (MSE or BCE) that each client gets on test set by '
            'training only on own local data:')
        for c in clients:
            c.fit(n_iter, eta)
            y_pred = c.predict(X_test)
            if task == 'reg':
                loss = mean_square_error(y_pred, y_test)
            elif task == 'cls':
                loss = binary_cross_entropy(y_pred, y_test)
                y_pred_binary = np.where(y_pred>=0.5, 1, 0)
                y_pred = y_pred_binary

            printResult(X_test, y_test, y_pred, task, frame)
            print('{:s}:\t{:.2f}'.format(c.name, loss))

        print('Running distributed gradient aggregation for {:d} iterations'
            .format(n_iter))
        for i in range(n_iter):

            encrypt_aggr = clients[0].encrypted_gradient(sum_to=None)
            for c in clients:
                encrypt_aggr = c.encrypted_gradient(sum_to=encrypt_aggr)

            aggr = server.decrypt_aggregate(encrypt_aggr, n_clients)

            for c in clients:
                c.gradient_step(aggr, eta)

            if i==n_iter-1 :
              blockchain.add_block(Block(blocknumber, time.time(), "Model post FL", str(aggr), "", blockchain.chain[-1].hash))
              blocknumber += 1

        print('Loss that each client gets after running the protocol:')
        for c in clients:
            y_pred = c.predict(X_test)
            if task == 'reg':
                loss = mean_square_error(y_pred, y_test)
            elif task == 'cls':
                loss = binary_cross_entropy(y_pred, y_test)
                y_pred_binary = np.where(y_pred>=0.5, 1, 0)
                y_pred = y_pred_binary
            printResult(X_test, y_test, y_pred, task, frame)
            print('{:s}:\t{:.2f}'.format(c.name, loss))
            y_patients = addPatientName(y_pred)
            blockchain.add_block(Block(blocknumber, time.time(), "Cluster " + str(cluster_number) + "\t" + c.name, str(y_patients), str(loss), blockchain.chain[-1].hash))
            blocknumber += 1
        cluster_number += 1


# End Result

In [None]:
breast_cancer = load_breast_cancer()
diabetes = load_diabetes()

data_frames = [('Diabetes', diabetes, 'reg'), ('Breast Cancer', breast_cancer, 'cls')]

In [None]:
federated_learning(n_iter=50, eta=0.01, n_clients=3, key_length=1024, data_frame = data_frames)

Working on Dataset: Diabetes; current task: reg
Loading data
['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']
Loss (MSE or BCE) that each client gets on test set by training only on own local data:
    index         0         1         2         3         4         5  \
0       0  0.019913  0.050680 -0.012673  0.070072 -0.011201  0.007141   
1       1 -0.103593 -0.044642 -0.037463 -0.026328  0.002559  0.019980   
2       2  0.063504  0.050680 -0.004050 -0.012556  0.103003  0.048790   
3       3 -0.023677 -0.044642  0.059541 -0.040099 -0.042848 -0.043589   
4       4 -0.103593  0.050680 -0.023451 -0.022885 -0.086878 -0.067701   
5       5 -0.009147 -0.044642  0.011039 -0.057313 -0.024960 -0.042963   
6       6  0.074401 -0.044642  0.114509  0.028758  0.024574  0.024991   
7       7  0.012648  0.050680  0.000261 -0.011420  0.039710  0.057245   
8       8 -0.012780 -0.044642  0.060618  0.052858  0.047965  0.029375   
9       9 -0.005515 -0.044642  0.043373  0.087287  0.0135

  return 1 / (1 + np.exp(-X.dot(self.weights)))
  return 1 / (1 + np.exp(-X.dot(self.weights)))
  return 1 / (1 + np.exp(-X.dot(self.weights)))
  return 1 / (1 + np.exp(-X.dot(self.weights)))


Loss that each client gets after running the protocol:
    index       0      1       2       3        4        5         6  \
0       0  23.090  19.83  152.10  1682.0  0.09342  0.12750  0.167600   
1       1  13.850  15.18   88.99   587.4  0.09516  0.07688  0.044790   
2       2  15.130  29.81   96.71   719.5  0.08320  0.04605  0.046860   
3       3  14.590  22.68   96.39   657.1  0.08473  0.13300  0.102900   
4       4  11.320  27.08   71.76   395.7  0.06883  0.03813  0.016330   
5       5  12.560  19.07   81.92   485.8  0.08760  0.10380  0.103000   
6       6  10.910  12.35   69.14   363.7  0.08518  0.04721  0.012360   
7       7  13.710  18.68   88.73   571.0  0.09916  0.10700  0.053850   
8       8  15.060  19.83  100.30   705.6  0.10390  0.15530  0.170000   
9       9  13.530  10.94   87.91   559.2  0.12910  0.10470  0.068770   
10     10  11.680  16.17   75.49   420.5  0.11280  0.09263  0.042790   
11     11  13.610  24.69   87.76   572.6  0.09258  0.07862  0.052850   
12     12

In [None]:
for block in blockchain.chain:
    print("Block #" + str(block.index))
    print("Timestamp: " + str(block.timestamp))
    print("About: " + block.about)
    print("Data: " + block.data)
    print("Loss:" + block.loss)
    print("Hash: " + block.hash)
    print("Previous Hash: " + block.previous_hash)
    print("\n")

Block #0
Timestamp: 1716960661
About: Genesis Block
Data: Genesis Block
Loss:
Hash: 8fda0c94f9a1758ae08b39021e074c7530844a598c3e684cf9d91f9bced80540
Previous Hash: 0


Block #1
Timestamp: 1716960712.9305336
About: Model post FL
Data: [ -13.74318122   52.61527383 -164.39484434 -119.52449245    3.2451182
   31.48514676   74.24956009  -52.88700444 -149.27632879  -67.04291677
   -0.79032086]
Loss:
Hash: 9182236119050933aadb53ea50aa1b24d1aaba108df77ffd3310c3ee076ad913
Previous Hash: 8fda0c94f9a1758ae08b39021e074c7530844a598c3e684cf9d91f9bced80540


Block #2
Timestamp: 1716960712.9468818
About: Cluster 1	Client 1
Data: [['Patient 1', 167.93176887623886], ['Patient 2', 116.5937155681395], ['Patient 3', 162.84803755435067], ['Patient 4', 150.0768155625327], ['Patient 5', 106.00484606996486], ['Patient 6', 139.2919380332461], ['Patient 7', 181.13864413154033], ['Patient 8', 166.66597972029498], ['Patient 9', 194.70721900124485], ['Patient 10', 184.1472009688394], ['Patient 11', 156.076405943247