In [1]:
from typing import Tuple, Union, List
import numpy as np
from sklearn.linear_model import LogisticRegression
import openml
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, precision_score, recall_score

In [2]:
import pickle
import warnings
warnings.filterwarnings('ignore')

In [4]:
df = pd.read_csv("data/test.csv")
df

Unnamed: 0,loan_amnt,int_rate,installment,annual_inc,dti,delinq_2yrs,inq_last_6mths,open_acc,pub_rec,revol_bal,...,home_ownership_OWN,home_ownership_RENT,verification_status_Source Verified,verification_status_Verified,issue_d_Jan-2019,issue_d_Mar-2019,initial_list_status_w,next_pymnt_d_May-2019,application_type_Joint App,loan_status
0,8400,0.1797,303.56,81600,11.78,0,0,10,0,5126,...,0,1,0,1,0,0,1,1,0,0
1,35000,0.1298,1178.96,140000,27.93,5,0,29,0,51257,...,0,0,1,0,0,0,1,1,0,0
2,30000,0.0646,918.93,283000,5.22,0,1,25,0,43694,...,1,0,0,0,0,0,1,1,0,0
3,15000,0.2727,460.46,77000,10.38,1,1,14,1,7837,...,0,0,0,0,0,0,1,1,0,0
4,25000,0.2437,985.69,85000,12.33,0,0,10,1,64320,...,0,0,1,0,0,0,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,9600,0.0819,301.68,70000,22.15,1,0,5,0,3327,...,0,1,0,0,0,0,1,1,0,1
96,40000,0.0819,814.70,161000,18.11,3,0,11,0,71158,...,0,0,1,0,0,0,1,1,0,1
97,21000,0.2534,620.58,77000,26.14,0,2,10,0,6265,...,0,0,1,0,0,0,0,1,1,1
98,10000,0.1614,352.27,65000,23.76,0,0,5,0,651,...,0,1,1,0,0,0,1,1,0,1


In [5]:
X_test = df.iloc[::,0:-1]
Y_test = df.iloc[::,-1]

In [6]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaler = scaler.fit(X_test)
X_test_scaled = X_scaler.transform(X_test)

In [24]:
from os import listdir
from os.path import isfile, join
client1 = [f for f in listdir('model/client1') if isfile(join('model/client1', f))]
client2 = [f for f in listdir('model/client2') if isfile(join('model/client2', f))]
client3 = [f for f in listdir('model/client3') if isfile(join('model/client3', f))]
client4 = [f for f in listdir('model/client4') if isfile(join('model/client4', f))]
agg_models = [f for f in listdir('model/agg_models') if isfile(join('model/agg_models', f))]

In [25]:
client1

['client_1_model_0.sav',
 'client_1_model_1.sav',
 'client_1_model_2.sav',
 'client_1_model_3.sav']

In [26]:
client2

['client_2_model_0.sav',
 'client_2_model_1.sav',
 'client_2_model_2.sav',
 'client_2_model_3.sav']

In [27]:
client3

['client_3_model_0.sav',
 'client_3_model_1.sav',
 'client_3_model_2.sav',
 'client_3_model_3.sav']

In [28]:
client4

['client_4_model_0.sav',
 'client_4_model_1.sav',
 'client_4_model_2.sav',
 'client_4_model_3.sav']

In [29]:
agg_models

['agg_model_0.sav', 'agg_model_1.sav', 'agg_model_2.sav', 'agg_model_3.sav']

In [31]:
import pickle
count = 0
for client1m,client2m,client3m,client4m,aggmodelm in zip(client1,client2,client3,client4,agg_models):
    c1_model = pickle.load(open(f'model/client1/{client1m}', 'rb'))
    c2_model = pickle.load(open(f'model/client2/{client2m}', 'rb'))
    c3_model = pickle.load(open(f'model/client3/{client3m}', 'rb'))
    c4_model = pickle.load(open(f'model/client4/{client4m}', 'rb'))
    agg_model = pickle.load(open(f'model/agg_models/{aggmodelm}', 'rb'))
    c1_preds = c1_model.predict(X_test_scaled)
    c2_preds = c2_model.predict(X_test_scaled)
    c3_preds = c3_model.predict(X_test_scaled)
    c4_preds = c4_model.predict(X_test_scaled)
    agg_preds = agg_model.predict(X_test_scaled)

    print(f"-----------\033[1m MODEL-{count} (ROUND {count}) \033[0m ---------")
    print(f"-----------\033[1m CLIENT-1 \033[0m ---------")
    print(classification_report(Y_test, c1_preds))
    print(f"-----------\033[1m CLIENT-2 \033[0m ---------")
    print(classification_report(Y_test, c2_preds))
    print(f"-----------\033[1m CLIENT-3 \033[0m ---------")
    print(classification_report(Y_test, c3_preds))
    print(f"-----------\033[1m CLIENT-4 \033[0m ---------")
    print(classification_report(Y_test, c4_preds))
    print(f"-----------\033[1m Aggregated Model \033[0m ---------")
    print(classification_report(Y_test, agg_preds))
    print("=================================================================")
    count +=1

-----------[1m MODEL-0 (ROUND 0) [0m ---------
-----------[1m CLIENT-1 [0m ---------
              precision    recall  f1-score   support

           0       0.87      0.80      0.83        50
           1       0.81      0.88      0.85        50

    accuracy                           0.84       100
   macro avg       0.84      0.84      0.84       100
weighted avg       0.84      0.84      0.84       100

-----------[1m CLIENT-2 [0m ---------
              precision    recall  f1-score   support

           0       0.75      0.80      0.78        50
           1       0.79      0.74      0.76        50

    accuracy                           0.77       100
   macro avg       0.77      0.77      0.77       100
weighted avg       0.77      0.77      0.77       100

-----------[1m CLIENT-3 [0m ---------
              precision    recall  f1-score   support

           0       0.60      0.54      0.57        50
           1       0.58      0.64      0.61        50

    accuracy 