In [60]:
from typing import Tuple, Union, List
import numpy as np
from sklearn.linear_model import LogisticRegression
import openml
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, precision_score, recall_score

In [61]:
import pickle
import warnings
warnings.filterwarnings('ignore')

In [62]:
df = pd.read_csv("data/test.csv")
df

Unnamed: 0,loan_amnt,int_rate,installment,annual_inc,dti,delinq_2yrs,inq_last_6mths,open_acc,pub_rec,revol_bal,...,home_ownership_OWN,home_ownership_RENT,verification_status_Source Verified,verification_status_Verified,issue_d_Jan-2019,issue_d_Mar-2019,initial_list_status_w,next_pymnt_d_May-2019,application_type_Joint App,loan_status
0,8400,0.1797,303.56,81600,11.78,0,0,10,0,5126,...,0,1,0,1,0,0,1,1,0,0
1,35000,0.1298,1178.96,140000,27.93,5,0,29,0,51257,...,0,0,1,0,0,0,1,1,0,0
2,30000,0.0646,918.93,283000,5.22,0,1,25,0,43694,...,1,0,0,0,0,0,1,1,0,0
3,15000,0.2727,460.46,77000,10.38,1,1,14,1,7837,...,0,0,0,0,0,0,1,1,0,0
4,25000,0.2437,985.69,85000,12.33,0,0,10,1,64320,...,0,0,1,0,0,0,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,9600,0.0819,301.68,70000,22.15,1,0,5,0,3327,...,0,1,0,0,0,0,1,1,0,1
96,40000,0.0819,814.70,161000,18.11,3,0,11,0,71158,...,0,0,1,0,0,0,1,1,0,1
97,21000,0.2534,620.58,77000,26.14,0,2,10,0,6265,...,0,0,1,0,0,0,0,1,1,1
98,10000,0.1614,352.27,65000,23.76,0,0,5,0,651,...,0,1,1,0,0,0,1,1,0,1


In [63]:
X_test = df.iloc[::,0:-1]
Y_test = df.iloc[::,-1]

In [64]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaler = scaler.fit(X_test)
X_test_scaled = X_scaler.transform(X_test)

In [65]:
from os import listdir
from os.path import isfile, join
client1 = [f for f in listdir('model/client1') if isfile(join('model/client1', f))]
client2 = [f for f in listdir('model/client2') if isfile(join('model/client2', f))]
client3 = [f for f in listdir('model/client3') if isfile(join('model/client3', f))]
client4 = [f for f in listdir('model/client4') if isfile(join('model/client4', f))]
agg_models = [f for f in listdir('model/agg_models') if isfile(join('model/agg_models', f))]

In [66]:
client1

['client_1_round_2_model.sav',
 'client_1_round_3_model.sav',
 'client_1_round_4_model.sav']

In [67]:
client2

['client_2_round_1_model.sav',
 'client_2_round_2_model.sav',
 'client_2_round_5_model.sav']

In [68]:
client3

['client_3_round_3_model.sav',
 'client_3_round_4_model.sav',
 'client_3_round_5_model.sav']

In [69]:
client4

['client_4_round_1_model.sav',
 'client_4_round_2_model.sav',
 'client_4_round_3_model.sav',
 'client_4_round_4_model.sav',
 'client_4_round_5_model.sav']

In [70]:
agg_models

['agg_round_1_model.sav',
 'agg_round_2_model.sav',
 'agg_round_3_model.sav',
 'agg_round_4_model.sav',
 'agg_round_5_model.sav']

In [71]:
all_models = client1 + client2 + client3 + client4

In [72]:
rounds = []
clients = []
for model in all_models:
    rounds.append(model.split("_")[2] + "_" + model.split("_")[3])
    clients.append(model.split("_")[0] + model.split("_")[1])

In [74]:
rounds = np.array(rounds)
clients = np.array(clients)
all_models = np.array(all_models)
agg_models = np.array(agg_models)

In [75]:
all_models[rounds=="round_1"]

array(['client_2_round_1_model.sav', 'client_4_round_1_model.sav'],
      dtype='<U26')

In [79]:
num_of_rounds = 5
for i in range(1,num_of_rounds+1):
    print(f"========================== ROUND {i}========================================================")
    model_arr = all_models[rounds==f"round_{i}"]
    client_arr = clients[rounds==f"round_{i}"]
    for x,y in zip(model_arr,client_arr):
        model = pickle.load(open(f'model/{y}/{x}', 'rb'))
        preds = model.predict(X_test_scaled)
        print(f"-----------\033[1m {y} \033[0m ---------")
        print(classification_report(Y_test, preds))
        print("=================================================================")
    print("========================= AGGREGATED ============================")
    agg_model = pickle.load(open(f'model/agg_models/agg_round_{i}_model.sav', 'rb'))
    agg_preds = agg_model.predict(X_test_scaled)
    print(classification_report(Y_test, agg_preds))
    print("==========================================================================================")

-----------[1m client2 [0m ---------
              precision    recall  f1-score   support

           0       0.59      0.58      0.59        50
           1       0.59      0.60      0.59        50

    accuracy                           0.59       100
   macro avg       0.59      0.59      0.59       100
weighted avg       0.59      0.59      0.59       100

-----------[1m client4 [0m ---------
              precision    recall  f1-score   support

           0       0.76      0.64      0.70        50
           1       0.69      0.80      0.74        50

    accuracy                           0.72       100
   macro avg       0.73      0.72      0.72       100
weighted avg       0.73      0.72      0.72       100

              precision    recall  f1-score   support

           0       0.70      0.62      0.66        50
           1       0.66      0.74      0.70        50

    accuracy                           0.68       100
   macro avg       0.68      0.68      0.68       