In [334]:
# hide
%reload_ext autoreload
%autoreload 2

In [442]:
import random
import numpy as np
import pandas as pd
import torch
import copy
from clientClass import *
from dataFunction import *
from HE_functions import *
import tenseal as ts
from cryptotree.preprocessing import Featurizer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
RANDOM_STATE = 123
from collections import OrderedDict
from base64 import b64encode, b64decode
import matplotlib.pyplot as plt
%matplotlib inline
from time import time
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from scipy.special import expit
from functools import reduce
torch.random.manual_seed(11007303)
random.seed(11007303)

In [443]:
n_clients = 4
n_features = 18
glob_model = LR(n_features)
EPOCHS = 80
poly_mod_degree = 4096
coeff_mod_bit_sizes = [40, 20, 40]

# create TenSEALContext
ctx_eval = ts.context(ts.SCHEME_TYPE.CKKS, poly_mod_degree, -1, coeff_mod_bit_sizes)

# scale of ciphertext to use
ctx_eval.global_scale = 2 ** 20

# this key is needed for doing dot-product operations
ctx_eval.generate_galois_keys()


        

# Make Dataframes for every dataset

In [444]:

cleveland = "FL_HE_2/processed.cleveland.data"
switzerland = "FL_HE_2/processed.switzerland.data"
va = "FL_HE_2/processed.va.data"
hungarian = "FL_HE_2/reprocessed.hungarian.data"
cleveland_df, switzerland_df, va_df, hungarian_df = import_data(cleveland, switzerland, va, hungarian)
df_dict ={
    'Cleveland': cleveland_df,
    'Switzerland': switzerland_df,
    'VA Long Beach': va_df,
    'Hungary': hungarian_df        
    }



In [445]:
class LR(torch.nn.Module):

    def __init__(self, n_features):
        super(LR, self).__init__()
        self.lr = torch.nn.Linear(n_features, 1)
        
    def forward(self, x):
        out = torch.sigmoid(self.lr(x))
        return out

In [446]:
clients = []
for i in list(df_dict.keys()):
    df = df_dict.get(i)
    df_dict[i] = new_df(df)
    location_data = new_df(df)
    y = location_data.HeartDisease
    location_data = location_data.drop(columns="HeartDisease")
    cat_feat = ['ChestPainType', 'RestingECG', 'ST_Slope']
    location_data = make_dummies(location_data, cat_feat)
    numeric_feature_names = ['Age', 'MaxHR', 'RestingBP',  'Cholesterol', 'Oldpeak']
    for j in numeric_feature_names:
        if location_data[j].std() != 0: 
            location_data[j] = (location_data[j] - location_data[j].mean()) / location_data[j].std()
    x = location_data
    client_model = copy.deepcopy(glob_model)
    lr = 3
    lr_decay = 1
    clients.append(Client(i, x, y, cat_feat, client_model, lr, torch.nn.BCELoss()))



In [447]:
validation_X_set = torch.tensor(())
validation_y_set = torch.tensor(())
for i in range(len(clients)):
    validation_X_set = torch.cat((validation_X_set, clients[i].X_test), 0)
    validation_y_set = torch.cat((validation_y_set, clients[i].y_test), 0)


In [450]:
loss_train = []
net_best = None
best_acc = None
best_epoch = None
results = []
min_loss_client = []
glob_model = LR(n_features)
glob_model.eval()
for iter in range(EPOCHS):
    loss_locals = []
    client_state_dicts = []
    for client in clients:
        client.set_state_dict(glob_model.state_dict())
        client_state_dict, loss = client.train()
        
        loss_locals.append(copy.deepcopy(loss))
        min_loss_client.append(min(loss))
        client_state_dicts.append(client_state_dict)

    enrypted_state_dicts = encrypt_state_dicts(copy.deepcopy(client_state_dicts), ctx_eval)
    averaged_encrypted_state_dict = average_state_dict(enrypted_state_dicts)
    decrypted_state_dicts = decrypt_state_dicts(averaged_encrypted_state_dict)
    glob_model.load_state_dict(decrypted_state_dicts)

    loss_avg = sum(min_loss_client) / len(min_loss_client)
    loss_train.append(loss_avg)        
        
    acc_test, loss_test =  accuracy_loss_LR(glob_model,validation_X_set, validation_y_set)

    print('Round {:3d}, Average loss {:.3f}, Test loss {:.3f}, Test accuracy: {:.2f}'.format(
        iter, loss_avg, loss_test, acc_test))


    if best_acc is None or acc_test > best_acc:
        net_best = copy.deepcopy(glob_model)
        best_acc = acc_test
        best_epoch = iter

    results.append(np.array([iter, loss_avg, loss_test, acc_test, best_acc]))
    final_results = np.array(results)
    final_results = pd.DataFrame(final_results, columns=['epoch', 'loss_avg', 'loss_test', 'acc_test', 'best_acc'])

print('Best model, iter: {}, acc: {}'.format(best_epoch, best_acc))    
    

Round   0, Average loss 0.696, Test loss 0.659, Test accuracy: 0.64
Round   1, Average loss 0.625, Test loss 0.547, Test accuracy: 0.71
Round   2, Average loss 0.582, Test loss 0.560, Test accuracy: 0.69
Round   3, Average loss 0.555, Test loss 0.530, Test accuracy: 0.75
Round   4, Average loss 0.536, Test loss 0.537, Test accuracy: 0.72
Round   5, Average loss 0.522, Test loss 0.529, Test accuracy: 0.75
Round   6, Average loss 0.511, Test loss 0.532, Test accuracy: 0.74
Round   7, Average loss 0.502, Test loss 0.530, Test accuracy: 0.74
Round   8, Average loss 0.495, Test loss 0.531, Test accuracy: 0.74
Round   9, Average loss 0.489, Test loss 0.531, Test accuracy: 0.74
Round  10, Average loss 0.484, Test loss 0.532, Test accuracy: 0.74
Round  11, Average loss 0.480, Test loss 0.533, Test accuracy: 0.74
Round  12, Average loss 0.476, Test loss 0.533, Test accuracy: 0.74
Round  13, Average loss 0.473, Test loss 0.534, Test accuracy: 0.74
Round  14, Average loss 0.470, Test loss 0.535, 

In [458]:
averaged_state_dict = average_state_dict(client_state_dicts)
for key in decrypted_state_dicts.keys():
    if key == 'lr.weight':
        Distance  = torch.cdist(decrypted_state_dicts['lr.weight'],averaged_state_dict['lr.weight'])**2
    else:
        Distance = ((decrypted_state_dicts[key]-averaged_state_dict[key])**2)
    print(Distance)


tensor([[0.0053]])
tensor([0.0004])
