In [21]:
# hide
%reload_ext autoreload
%autoreload 2

In [22]:
import random
import numpy as np
import pandas as pd
import torch
import copy
from clientClass import *
from dataFunction import *
from HE_functions import *
import tenseal as ts
RANDOM_STATE = 123
%matplotlib inline
from time import time
torch.random.manual_seed(11007303)
random.seed(11007303)

In [23]:
class LR(torch.nn.Module):

    def __init__(self, n_features):
        super(LR, self).__init__()
        self.lr = torch.nn.Linear(n_features, 1)
        
    def forward(self, x):
        out = torch.sigmoid(self.lr(x))
        return out

In [24]:
n_clients = 4
n_features = 18
glob_model = LR(n_features)
EPOCHS = 80
poly_mod_degree = 4096
coeff_mod_bit_sizes = [40, 20, 40]

# create TenSEALContext
ctx_eval = ts.context(ts.SCHEME_TYPE.CKKS, poly_mod_degree, -1, coeff_mod_bit_sizes)

# scale of ciphertext to use
ctx_eval.global_scale = 2 ** 20

# this key is needed for doing dot-product operations
ctx_eval.generate_galois_keys()


        

# Make Dataframes for every dataset

In [25]:

cleveland = "FL_HE_2/processed.cleveland.data"
switzerland = "FL_HE_2/processed.switzerland.data"
va = "FL_HE_2/processed.va.data"
hungarian = "FL_HE_2/reprocessed.hungarian.data"
cleveland_df, switzerland_df, va_df, hungarian_df = import_data(cleveland, switzerland, va, hungarian)
df_dict ={
    'Cleveland': cleveland_df,
    'Switzerland': switzerland_df,
    'VA Long Beach': va_df,
    'Hungary': hungarian_df        
    }



In [26]:
# import matplotlib.pyplot as plt
# for i in clients:
#     plt.hist(i.y)
#     plt.show()

In [27]:
clients = []
glob_model = LR(n_features)
for i in list(df_dict.keys()):
    df = df_dict.get(i)
    df_dict[i] = new_df(df)
    location_data = new_df(df)
    y = location_data.HeartDisease
    location_data = location_data.drop(columns="HeartDisease")
    cat_feat = ['ChestPainType', 'RestingECG', 'ST_Slope']
    location_data = make_dummies(location_data, cat_feat)
    numeric_feature_names = ['Age', 'MaxHR', 'RestingBP',  'Cholesterol', 'Oldpeak']
    for j in numeric_feature_names:
        if location_data[j].std() != 0: 
            location_data[j] = (location_data[j] - location_data[j].mean()) / location_data[j].std()
    x = location_data
    # print(ty pe(y))
    client_model = copy.deepcopy(glob_model)
    lr = 3
    lr_decay = 1
    clients.append(Client(i, x, y, client_model, lr, torch.nn.BCELoss()))



In [28]:
validation_X_set = torch.tensor(())
validation_y_set = torch.tensor(())
for i in range(len(clients)):
    validation_X_set = torch.cat((validation_X_set, clients[i].X_test), 0)
    validation_y_set = torch.cat((validation_y_set, clients[i].y_test), 0)


In [29]:
# plt.hist(validation_y_set)

In [30]:
loss_train = []
net_best = None
best_acc = None
best_epoch = None
results = []
min_loss_client = []
glob_model = LR(n_features)
glob_model.eval()
for iter in range(50):
    loss_locals = []
    client_state_dicts = []
    for client in clients:
        client.set_state_dict(glob_model.state_dict())
        client_state_dict, loss = client.train()
        
        loss_locals.append(copy.deepcopy(loss))
        min_loss_client.append(min(loss))
        client_state_dicts.append(client_state_dict)

    enrypted_state_dicts = encrypt_state_dicts(copy.deepcopy(client_state_dicts), ctx_eval)
    averaged_encrypted_state_dict = average_state_dict(enrypted_state_dicts)
    decrypted_state_dicts = decrypt_state_dicts(averaged_encrypted_state_dict)
    glob_model.load_state_dict(decrypted_state_dicts)

    loss_avg = sum(min_loss_client) / len(min_loss_client)
    loss_train.append(loss_avg)        
        
    acc_test, loss_test =  accuracy_loss_LR(glob_model,validation_X_set, validation_y_set)

    print('Round {:3d}, Average loss {:.3f}, Test loss {:.3f}, Test accuracy: {:.2f}'.format(
        iter, loss_avg, loss_test, acc_test))


    if best_acc is None or acc_test > best_acc:
        net_best = copy.deepcopy(glob_model)
        best_acc = acc_test
        best_epoch = iter

    results.append(np.array([iter, loss_avg, loss_test, acc_test, best_acc]))
    final_results = np.array(results)
    final_results = pd.DataFrame(final_results, columns=['epoch', 'loss_avg', 'loss_test', 'acc_test', 'best_acc'])

print('Best model, iter: {}, acc: {}'.format(best_epoch, best_acc))    
    

Round   0, Average loss 0.322, Test loss 0.556, Test accuracy: 0.73
Round   1, Average loss 0.320, Test loss 0.545, Test accuracy: 0.75
Round   2, Average loss 0.319, Test loss 0.541, Test accuracy: 0.74
Round   3, Average loss 0.318, Test loss 0.540, Test accuracy: 0.74
Round   4, Average loss 0.317, Test loss 0.540, Test accuracy: 0.74
Round   5, Average loss 0.316, Test loss 0.540, Test accuracy: 0.74
Round   6, Average loss 0.316, Test loss 0.541, Test accuracy: 0.74
Round   7, Average loss 0.316, Test loss 0.541, Test accuracy: 0.74
Round   8, Average loss 0.316, Test loss 0.541, Test accuracy: 0.74
Round   9, Average loss 0.315, Test loss 0.542, Test accuracy: 0.74
Round  10, Average loss 0.315, Test loss 0.542, Test accuracy: 0.74
Round  11, Average loss 0.315, Test loss 0.543, Test accuracy: 0.74
Round  12, Average loss 0.315, Test loss 0.543, Test accuracy: 0.73
Round  13, Average loss 0.315, Test loss 0.543, Test accuracy: 0.73
Round  14, Average loss 0.315, Test loss 0.543, 