In [1]:
import numpy as np
import pandas as pd
import random
import sys
import pickle as pkl
import networkx as nx
import scipy.sparse as sp
import torch
from scipy.sparse import csgraph
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn import svm
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from Code.Graph_composition import *
from Code.Modeling import *
import easydict
from sklearn import metrics    

In [2]:
sample_dataset = pd.read_csv('Data/Sample_dataset.csv')

In [None]:
sample_dataset1 = sample_dataset.drop('loan_status', axis = 1)

In [None]:
all_feature_graph = Numerical_min_max(sample_dataset1)
all_feature_graph_discreted = Discretization(all_feature_graph)
all_feature_name, all_feature_distance_by_factor_exercise = Algo_distance(all_feature_graph_discreted)
all_feature_weight_vector_exercise = Weight(all_feature_distance_by_factor_exercise)
all_feature_distance_matrix_exercise = Distance(all_feature_graph, all_feature_weight_vector_exercise, all_feature_distance_by_factor_exercise, all_feature_name)
all_feature_distance_matrix_exercise.tofile('all_feature_distance_matrix.dat')


In [None]:
sample_dataset

In [3]:
loan_feature_distance_matrix_exercise = np.fromfile('loan_feature_distance_matrix.dat', dtype=float)
history_feature_distance_matrix_exercise = np.fromfile('history_feature_distance_matrix.dat', dtype=float)
soft_feature_distance_matrix_exercise = np.fromfile('soft_feature_distance_matrix.dat', dtype=float)

loan_feature_distance_matrix_exercise = loan_feature_distance_matrix_exercise.reshape(14000,14000)
history_feature_distance_matrix_exercise = history_feature_distance_matrix_exercise.reshape(14000,14000)
soft_feature_distance_matrix_exercise = soft_feature_distance_matrix_exercise.reshape(14000,14000)


In [4]:
def CV_dataset_Extraction(sample_dataset, loan_feature_distance_matrix_exercise, history_feature_distance_matrix_exercise, soft_feature_distance_matrix_exercise, cv = 1):
        ### Train and Test indexing for Fully Paid Observations 
        total_Fully_paid_index = list(range(7000))
        test_Fully_paid_index = list(range((cv-1)*1000, cv*1000))
        train_Fully_paid_index = list(set(total_Fully_paid_index)-set(test_Fully_paid_index))
        
        ### Train and Test indexing for Defualt Observations
        total_Default_index = list(range(7000,14000))
        test_Default_index = list(range((7000 + (cv-1)*1000), (7000 + cv*1000)))
        train_Default_index = list(set(total_Default_index)-set(test_Default_index))
        
        ### Train and Test dataset partition
        train_dataset = sample_dataset.iloc[(train_Fully_paid_index + train_Default_index), :].reset_index(drop = True)
        test_dataset = sample_dataset.iloc[(test_Fully_paid_index + test_Default_index), :].reset_index(drop = True)
        
        ### Train distance matrix
        loan_feature_distance_matrix_train = loan_feature_distance_matrix_exercise[(train_Fully_paid_index + train_Default_index), :]
        loan_feature_distance_matrix_train = loan_feature_distance_matrix_train[:, (train_Fully_paid_index + train_Default_index)]
        history_feature_distance_matrix_train = history_feature_distance_matrix_exercise[(train_Fully_paid_index + train_Default_index), :]
        history_feature_distance_matrix_train = history_feature_distance_matrix_train[:, (train_Fully_paid_index + train_Default_index)]
        soft_feature_distance_matrix_train = soft_feature_distance_matrix_exercise[(train_Fully_paid_index + train_Default_index), :]
        soft_feature_distance_matrix_train = soft_feature_distance_matrix_train[:, (train_Fully_paid_index + train_Default_index)]
 
        ### Test distance matrix
        loan_feature_distance_matrix_test = loan_feature_distance_matrix_exercise[(test_Fully_paid_index + test_Default_index), :]
        loan_feature_distance_matrix_test = loan_feature_distance_matrix_test[:, (train_Fully_paid_index + train_Default_index)]
        history_feature_distance_matrix_test = history_feature_distance_matrix_exercise[(test_Fully_paid_index + test_Default_index), :]
        history_feature_distance_matrix_test = history_feature_distance_matrix_test[:, (train_Fully_paid_index + train_Default_index)]
        soft_feature_distance_matrix_test = soft_feature_distance_matrix_exercise[(test_Fully_paid_index + test_Default_index), :]
        soft_feature_distance_matrix_test = soft_feature_distance_matrix_test[:, (train_Fully_paid_index + train_Default_index)]
        
        return(train_dataset, test_dataset, loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test) 
        



In [5]:
### Distance matrix to Adjancency matrix and Node feature matrix Generation
def Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, bandwidth = 10):
    ### Transformation for Train dataset
    loan_feature_adjacency_matrix_train = Distance_Weight(loan_feature_distance_matrix_train, bandwidth)
    history_feature_adjacency_matrix_train = Distance_Weight(history_feature_distance_matrix_train, bandwidth)
    soft_feature_adjacency_matrix_train = Distance_Weight(soft_feature_distance_matrix_train, bandwidth)
    
    ### Transformation for Test dataset
    loan_feature_adjacency_matrix_test = Distance_Weight_Test(loan_feature_distance_matrix_test, bandwidth)
    history_feature_adjacency_matrix_test = Distance_Weight_Test(history_feature_distance_matrix_test, bandwidth)
    soft_feature_adjacency_matrix_test = Distance_Weight_Test(soft_feature_distance_matrix_test, bandwidth)

    ### Normalize for train dataset
    train_loan_adj = normalize(loan_feature_adjacency_matrix_train + sp.eye(loan_feature_adjacency_matrix_train.shape[0]))
    train_history_adj = normalize(history_feature_adjacency_matrix_train + sp.eye(history_feature_adjacency_matrix_train.shape[0]))
    train_soft_adj = normalize(soft_feature_adjacency_matrix_train + sp.eye(soft_feature_adjacency_matrix_train.shape[0]))

    ### Matrix generation for Test dataset
    loan_feature_adjacency_matrix_test = np.concatenate([loan_feature_adjacency_matrix_train,loan_feature_adjacency_matrix_test])
    history_feature_adjacency_matrix_test = np.concatenate([history_feature_adjacency_matrix_train,history_feature_adjacency_matrix_test])
    soft_feature_adjacency_matrix_test = np.concatenate([soft_feature_adjacency_matrix_train,soft_feature_adjacency_matrix_test])

    loan_feature_adjacency_matrix_test = np.concatenate((loan_feature_adjacency_matrix_test, np.zeros((loan_feature_adjacency_matrix_test.shape[0], loan_feature_distance_matrix_test.shape[0]))), axis = 1)
    history_feature_adjacency_matrix_test = np.concatenate((history_feature_adjacency_matrix_test, np.zeros((history_feature_adjacency_matrix_test.shape[0], history_feature_distance_matrix_test.shape[0]))), axis = 1)
    soft_feature_adjacency_matrix_test = np.concatenate((soft_feature_adjacency_matrix_test, np.zeros((soft_feature_adjacency_matrix_test.shape[0], soft_feature_distance_matrix_test.shape[0]))), axis = 1)

    test_loan_adj = normalize(loan_feature_adjacency_matrix_test + sp.eye(loan_feature_adjacency_matrix_test.shape[0]))
    test_history_adj = normalize(history_feature_adjacency_matrix_test + sp.eye(history_feature_adjacency_matrix_test.shape[0]))
    test_soft_adj = normalize(soft_feature_adjacency_matrix_test + sp.eye(soft_feature_adjacency_matrix_test.shape[0]))


    ### One-hot Encoding and Train X, Train Y   
    temp_dummies = pd.get_dummies(train_dataset)
    train_y = list(temp_dummies['loan_status_Charged Off'])
    base_category = temp_dummies.columns[[11, 13, 18, 19, 21, 24, 26, 29, 32, 34, 40, 43]]
    train_x = Model_matrix(train_dataset, base_category)
    train_x = train_x.drop('loan_status_Charged Off', axis = 1)

    ### Train Node feature Matrix
    loan_features = train_dataset.columns[0:7]
    train_loan_features_dataset = train_dataset[loan_features] 
    history_features = train_dataset.columns[7:16]
    train_history_features_dataset = train_dataset[history_features] 
    soft_features = train_dataset.columns[16:21]
    train_soft_features_dataset = train_dataset[soft_features] 
    
    loan_x = Model_matrix(train_loan_features_dataset, base_category)
    history_x = Model_matrix(train_history_features_dataset, base_category)
    soft_x = Model_matrix(train_soft_features_dataset, base_category)
    DN_count_loan = Default_Neighbor_count(loan_feature_adjacency_matrix_train, train_y)
    train_loan_node_feature = np.concatenate((DN_count_loan, loan_x), axis= 1)
    DN_count_history = Default_Neighbor_count(history_feature_adjacency_matrix_train, train_y)
    train_history_node_feature = np.concatenate((DN_count_history, history_x), axis= 1)
    DN_count_soft = Default_Neighbor_count(soft_feature_adjacency_matrix_train, train_y)
    train_soft_node_feature = np.concatenate((DN_count_soft, soft_x), axis= 1)

    all_dataset = pd.concat([train_dataset, test_dataset])
    all_dataset = all_dataset.reset_index()
    all_dataset = all_dataset.drop(columns=['index'])

    temp_dummies = pd.get_dummies(all_dataset)
    test_y = list(temp_dummies['loan_status_Charged Off'])

    ### Test Node feature matrix
    loan_features = all_dataset.columns[0:7]
    test_loan_features_dataset = all_dataset[loan_features] 
    history_features = all_dataset.columns[7:16]
    test_history_features_dataset = all_dataset[history_features] 
    soft_features = all_dataset.columns[16:21]
    test_soft_features_dataset = all_dataset[soft_features] 
    
    loan_x = Model_matrix(test_loan_features_dataset, base_category)
    history_x = Model_matrix(test_history_features_dataset, base_category)
    soft_x = Model_matrix(test_soft_features_dataset, base_category)
    
    DN_count_loan = Default_Neighbor_count(loan_feature_adjacency_matrix_test, test_y)
    test_loan_node_feature = np.concatenate((DN_count_loan, loan_x), axis= 1)
    
    DN_count_history = Default_Neighbor_count(history_feature_adjacency_matrix_test, test_y)
    test_history_node_feature = np.concatenate((DN_count_history, history_x), axis= 1)
    
    DN_count_soft = Default_Neighbor_count(soft_feature_adjacency_matrix_test, test_y)
    test_soft_node_feature = np.concatenate((DN_count_soft, soft_x), axis= 1)
    
    return(train_loan_adj, train_history_adj, train_soft_adj, test_loan_adj, test_history_adj, test_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, test_loan_node_feature, test_history_node_feature, test_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft)
    





# Baseline models

## Train, Test dataset

In [6]:
from sklearn.svm import SVC
from sklearn import metrics    
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
import re
from Code.Test_sampling import *
import pandas as pd
from sklearn.neural_network import MLPClassifier

In [7]:
train_dataset, test_dataset, loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test = CV_dataset_Extraction(sample_dataset, loan_feature_distance_matrix_exercise, history_feature_distance_matrix_exercise, soft_feature_distance_matrix_exercise, 
            cv = 1)

col_order = train_dataset.columns

### normalize
test_dataset = Numerical_min_max_for_test(train_dataset, test_dataset)
train_dataset = Numerical_min_max(train_dataset)
train_dataset = train_dataset[col_order]
test_dataset = test_dataset[col_order]
del col_order

train_loan_adj, train_history_adj, train_soft_adj, test_loan_adj, test_history_adj, test_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, test_loan_node_feature, test_history_node_feature, test_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft = Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, 
    bandwidth = 20)


In [8]:
test_loan_node_feature = torch.from_numpy(test_loan_node_feature).float().cuda()
test_history_node_feature = torch.from_numpy(test_history_node_feature).float().cuda()
test_soft_node_feature = torch.from_numpy(test_soft_node_feature).float().cuda()
test_loan_adj = torch.from_numpy(test_loan_adj).float().cuda()
test_history_adj = torch.from_numpy(test_history_adj).float().cuda()
test_soft_adj = torch.from_numpy(test_soft_adj).float().cuda()
test_y = torch.tensor(test_y, dtype=torch.long).cuda()



In [None]:
torch.manual_seed(47137)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 3210, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 2830 acc_train: 0.8071 Test set results: acc_test: 0.8105 F1_test: 0.8300 auc_test: 0.8815


In [None]:
test()

In [None]:
torch.manual_seed(97213)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 2646, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3377 acc_train: 0.8085 Test set results: acc_test: 0.8025 F1_test: 0.8228 auc_test: 0.8721

In [None]:
test()

In [None]:
torch.manual_seed(214831)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 2264, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3808 acc_train: 0.8064 Test set results: acc_test: 0.8000 F1_test: 0.8236 auc_test: 0.8640

In [None]:
test()

In [None]:
torch.manual_seed(8731147)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.001, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    

### Epoch: 4783 acc_train: 0.8162 Test set results: acc_test: 0.7980 F1_test: 0.8233 auc_test: 0.8690

In [None]:
torch.manual_seed(722)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
#
### Epoch: 1409 acc_train: 0.8008 Test set results: acc_test: 0.8125 F1_test: 0.8312 auc_test: 0.8804

### Epoch: 2815 acc_train: 0.8033 Test set results: acc_test: 0.8095 F1_test: 0.8331 auc_test: 0.8815

In [None]:
torch.manual_seed(17314)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 4065, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)

### Epoch: 3299 acc_train: 0.8066 Test set results: acc_test: 0.7995 F1_test: 0.8209 auc_test: 0.8600


In [None]:
test()

In [None]:
torch.manual_seed(5122)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 3998, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
test()

In [None]:
net

In [None]:
from Code.Test_sampling import *
train_x, train_y_save = X_Y_split(train_dataset)
test_x, test_y_save = X_Y_split(test_dataset)

In [None]:
regex = re.compile(r"\[|\]|<", re.IGNORECASE)
train_x.columns = [regex.sub("_", col) if any(x in str(col) for x in set(('[', ']', '<'))) else col for col in train_x.columns.values]

test_x.columns = [regex.sub("_", col) if any(x in str(col) for x in set(('[', ']', '<'))) else col for col in test_x.columns.values]
train_x.columns = [regex.sub("_", col) if any(x in str(col) for x in set(('[', ']', '<'))) else col for col in train_x.columns.values]


In [None]:
def Evaluation(model, Test_x, Test_y):
    pred = model.predict(Test_x)
    fpr, tpr, thresholds = metrics.roc_curve(Test_y, model.predict_proba(Test_x)[:, 1], pos_label=1)
    roc_auc = metrics.auc(fpr, tpr)
    print("Accuracy = {}".format(metrics.accuracy_score(Test_y, pred)))
    print("Precision = {}".format(metrics.precision_score(Test_y, pred, pos_label=1)))
    print("Recall = {}".format(metrics.recall_score(Test_y, pred, pos_label=1)))
    print("F1 score = {}".format(metrics.f1_score(Test_y, pred, pos_label=1)))
    print("AUC = {}".format(roc_auc))

### RandomForest

In [None]:
rf_model = RandomForestClassifier(n_estimators=500, max_depth = 3, max_features = 4)
rf_model.fit(train_x, train_y_save)
Evaluation(rf_model, test_x, test_y_save)


### SVM

In [None]:
svm_model = SVC(C = 3, gamma = 'scale', probability = True)
svm_model.fit(train_x, train_y_save)
Evaluation(svm_model, test_x, test_y_save)

### XGBoost

In [None]:
xgb_model = xgb.XGBClassifier(n_estimator = 500, learning_rate = 0.001, max_depth = 5, gamma = 0.1)
xgb_model.fit(train_x, train_y_save)
Evaluation(xgb_model, test_x, test_y_save)

In [None]:
### MLP

In [None]:
mlp_model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(20, 20, 20, 20, 20), random_state=9054, max_iter = 5000, early_stopping = True)
mlp_model.fit(train_x, train_y_save)
Evaluation(mlp_model, test_x, test_y_save)

In [None]:
mlp_model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(20, 20, 20, 20, 20), random_state=54, max_iter = 5000, early_stopping = True)
mlp_model.fit(train_x, train_y_save)
Evaluation(mlp_model, test_x, test_y_save)

In [None]:
mlp_model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(20, 20, 20, 20, 20), random_state=1254, max_iter = 5000, early_stopping = True)
mlp_model.fit(train_x, train_y_save)
Evaluation(mlp_model, test_x, test_y_save)

In [None]:
mlp_model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(20, 20, 20, 20, 20), random_state=19127)
mlp_model.fit(train_x, train_y_save)
Evaluation(mlp_model, test_x, test_y_save)

# GCN

In [9]:
def train(epoch, args, criterion):
    t = time.time()
    net.train()
    optimizer.zero_grad()
    outputs = net(test_loan_node_feature[0:12000, :], test_history_node_feature[0:12000, :], test_soft_node_feature[0:12000, :], 
                  test_loan_adj[0:12000, 0:12000], test_history_adj[0:12000, 0:12000], test_soft_adj[0:12000, 0:12000])
    loss = criterion(outputs, test_y[0:12000].float())
    
    pred = (outputs > 0.5).float()
    correct = (pred.transpose(0,1) == test_y[0:12000].float()).float().sum()
    acc_train = correct / len(test_y[0:12000])
    
    loss.backward()
    optimizer.step()
    
    net.eval()
    outputs = net(test_loan_node_feature, test_history_node_feature, test_soft_node_feature, 
                  test_loan_adj, test_history_adj, test_soft_adj)
    outputs = outputs[range(12000,len(outputs))]

    outputs_list = outputs.tolist()
    test_y_for_test = test_y[range(12000,len(test_y))].float()
    test_y_for_test_list = test_y_for_test.tolist()
    loss_test = criterion(outputs, test_y_for_test)
    pred = (outputs > 0.5).float()
    correct = (pred.transpose(0,1) == test_y_for_test).float().sum()
    acc_test = correct / len(test_y_for_test)
    pred_list = pred.tolist()
    f1_score = metrics.f1_score(test_y_for_test_list, pred_list, pos_label=1)
    fpr, tpr, thresholds = metrics.roc_curve(test_y_for_test_list, outputs_list, pos_label=1)
    roc_auc = metrics.auc(fpr, tpr)
  
    outputs_list = outputs.tolist()
    test_y_for_test = test_y[range(12000,len(test_y))].float()
    test_y_for_test_list = test_y_for_test.tolist()
    loss_test = criterion(outputs, test_y_for_test)
    pred = (outputs > 0.5).float()
    pred = pred.tolist()
    fpr, tpr, thresholds = metrics.roc_curve(test_y_for_test_list, outputs_list, pos_label=1)
  
    
    print('Epoch: {:04d}'.format(epoch+1),
          "Test set results:",
          'acc: {:.4f}'.format(acc_test),
          "Pre = {}".format(metrics.precision_score(test_y_for_test_list, pred, pos_label=1)),
          "Recall = {}".format(metrics.recall_score(test_y_for_test_list, pred, pos_label=1)),
          'F1: {:.4f}'.format(f1_score),
          'auc: {:.4f}'.format(roc_auc))

    
def test():
    net.eval()
    outputs = net(test_loan_node_feature, test_history_node_feature, test_soft_node_feature, 
               test_loan_adj, test_history_adj, test_soft_adj)
 
    outputs = outputs[range(12000,len(outputs))]

    outputs_list = outputs.tolist()
    test_y_for_test = test_y[range(12000,len(test_y))].float()
    test_y_for_test_list = test_y_for_test.tolist()
    loss_test = criterion(outputs, test_y_for_test)
    pred = (outputs > 0.5).float()
    pred = pred.tolist()
    fpr, tpr, thresholds = metrics.roc_curve(test_y_for_test_list, outputs_list, pos_label=1)
    roc_auc = metrics.auc(fpr, tpr)
  
    print("Accuracy = {}".format(metrics.accuracy_score(test_y_for_test_list, pred)))
    print("Precision = {}".format(metrics.precision_score(test_y_for_test_list, pred, pos_label=1)))
    print("Recall = {}".format(metrics.recall_score(test_y_for_test_list, pred, pos_label=1)))
    print("F1 score = {}".format(metrics.f1_score(test_y_for_test_list, pred, pos_label=1)))
    print("AUC = {}".format(roc_auc))

    

In [10]:
class GCNNet(nn.Module):
    def __init__(self, args):
        super(GCNNet, self).__init__()
        
        self.blocks_loan = nn.ModuleList()
        for i in range(args.n_block):
            self.blocks_loan.append(GCNBlock(args.n_layer,
                                        9 if i==0 else args.hidden_dim,
                                        args.hidden_dim,
                                        args.hidden_dim,   ### output dim 따로
                                        args.bn,
                                        args.sc))
        
        self.blocks_history = nn.ModuleList()
        for i in range(args.n_block):
            self.blocks_history.append(GCNBlock(args.n_layer,
                                        13 if i==0 else args.hidden_dim,
                                        args.hidden_dim,
                                        args.hidden_dim,   ### output dim 따로
                                        args.bn,
                                        args.sc))
        
        self.blocks_soft = nn.ModuleList()
        for i in range(args.n_block):
            self.blocks_soft.append(GCNBlock(args.n_layer,
                                        12 if i==0 else args.hidden_dim,
                                        args.hidden_dim,
                                        args.hidden_dim,   ### output dim 따로
                                        args.bn,
                                        args.sc))
        
        self.pred1 = Classifier(3 * args.hidden_dim, 1,act = nn.Sigmoid())
    
    def forward(self, loan_node_feature, history_node_feature, soft_node_feature, loan_adj, history_adj, soft_adj):
        for i, block in enumerate(self.blocks_loan):
            out1, loan_adj = block((loan_node_feature if i==0 else out1), loan_adj)
        for i, block in enumerate(self.blocks_history):
            out2, history_adj = block((history_node_feature if i==0 else out2), history_adj)
        for i, block in enumerate(self.blocks_soft):
            out3, soft_adj = block((soft_node_feature if i==0 else out3), soft_adj)
        out4 = torch.cat([out1, out2, out3], dim = 1)
        out = self.pred1(out4)
        return out
   

In [None]:
class GCNNet(nn.Module):
    def __init__(self, args):
        super(GCNNet, self).__init__()
        
        self.blocks_loan = nn.ModuleList()
        for i in range(args.n_block):
            self.blocks_loan.append(GCNBlock(args.n_layer,
                                        9 if i==0 else args.hidden_dim,
                                        args.hidden_dim,
                                        args.hidden_dim,   ### output dim 따로
                                        args.bn,
                                        args.sc))
        
        self.blocks_history = nn.ModuleList()
        for i in range(args.n_block):
            self.blocks_history.append(GCNBlock(args.n_layer,
                                        13 if i==0 else args.hidden_dim,
                                        args.hidden_dim,
                                        args.hidden_dim,   ### output dim 따로
                                        args.bn,
                                        args.sc))
        
        self.blocks_soft = nn.ModuleList()
        for i in range(args.n_block):
            self.blocks_soft.append(GCNBlock(args.n_layer,
                                        12 if i==0 else args.hidden_dim,
                                        args.hidden_dim,
                                        args.hidden_dim,   ### output dim 따로
                                        args.bn,
                                        args.sc))
        
        self.pred1 = Classifier(3 * args.hidden_dim, args.pred_dim1,act = nn.ReLU())
        self.pred2 = Classifier(args.pred_dim1, 1,act = nn.Sigmoid())
    
    def forward(self, loan_node_feature, history_node_feature, soft_node_feature, loan_adj, history_adj, soft_adj):
        for i, block in enumerate(self.blocks_loan):
            out1, loan_adj = block((loan_node_feature if i==0 else out1), loan_adj)
        for i, block in enumerate(self.blocks_history):
            out2, history_adj = block((history_node_feature if i==0 else out2), history_adj)
        for i, block in enumerate(self.blocks_soft):
            out3, soft_adj = block((soft_node_feature if i==0 else out3), soft_adj)
        out4 = torch.cat([out1, out2, out3], dim = 1)
        out = self.pred1(out4)
        out = self.pred2(out)
        return out
   

In [None]:
class GCNNet(nn.Module):
    def __init__(self, args):
        super(GCNNet, self).__init__()
        
        self.blocks_loan = nn.ModuleList()
        for i in range(args.n_block):
            self.blocks_loan.append(GCNBlock(args.n_layer,
                                        9 if i==0 else args.hidden_dim,
                                        args.hidden_dim,
                                        args.hidden_dim,   ### output dim 따로
                                        args.bn,
                                        args.sc))
        
        self.blocks_history = nn.ModuleList()
        for i in range(args.n_block):
            self.blocks_history.append(GCNBlock(args.n_layer,
                                        13 if i==0 else args.hidden_dim,
                                        args.hidden_dim,
                                        args.hidden_dim,   ### output dim 따로
                                        args.bn,
                                        args.sc))
        
        self.blocks_soft = nn.ModuleList()
        for i in range(args.n_block):
            self.blocks_soft.append(GCNBlock(args.n_layer,
                                        12 if i==0 else args.hidden_dim,
                                        args.hidden_dim,
                                        args.hidden_dim,   ### output dim 따로
                                        args.bn,
                                        args.sc))
        
        self.pred1 = Classifier(3 * args.hidden_dim, args.pred_dim1,act = nn.ReLU())
        self.pred2 = Classifier(args.pred_dim1, args.pred_dim2,act = nn.ReLU())
        self.pred3 = Classifier(args.pred_dim2, 1,act = nn.Sigmoid())
    
    def forward(self, loan_node_feature, history_node_feature, soft_node_feature, loan_adj, history_adj, soft_adj):
        for i, block in enumerate(self.blocks_loan):
            out1, loan_adj = block((loan_node_feature if i==0 else out1), loan_adj)
        for i, block in enumerate(self.blocks_history):
            out2, history_adj = block((history_node_feature if i==0 else out2), history_adj)
        for i, block in enumerate(self.blocks_soft):
            out3, soft_adj = block((soft_node_feature if i==0 else out3), soft_adj)
        out4 = torch.cat([out1, out2, out3], dim = 1)
        out = self.pred1(out4)
        out = self.pred2(out)
        out = self.pred3(out)
        return out
   

In [None]:
class GCNNet(nn.Module):
    def __init__(self, args):
        super(GCNNet, self).__init__()
        
        self.blocks_loan = nn.ModuleList()
        for i in range(args.n_block):
            self.blocks_loan.append(GCNBlock(args.n_layer,
                                        9 if i==0 else args.hidden_dim,
                                        args.hidden_dim,
                                        args.hidden_dim,   ### output dim 따로
                                        args.bn,
                                        args.sc))
        
        self.blocks_history = nn.ModuleList()
        for i in range(args.n_block):
            self.blocks_history.append(GCNBlock(args.n_layer,
                                        13 if i==0 else args.hidden_dim,
                                        args.hidden_dim,
                                        args.hidden_dim,   ### output dim 따로
                                        args.bn,
                                        args.sc))
        
        self.blocks_soft = nn.ModuleList()
        for i in range(args.n_block):
            self.blocks_soft.append(GCNBlock(args.n_layer,
                                        12 if i==0 else args.hidden_dim,
                                        args.hidden_dim,
                                        args.hidden_dim,   ### output dim 따로
                                        args.bn,
                                        args.sc))
        
        self.pred1 = Classifier(3 * args.hidden_dim, args.pred_dim1,act = nn.ReLU())
        self.pred2 = Classifier(args.pred_dim1, args.pred_dim2, act = nn.ReLU())
        self.pred3 = Classifier(args.pred_dim2, args.pred_dim3, act = nn.ReLU())
        self.pred4 = Classifier(args.pred_dim3, 1,act = nn.Sigmoid())
    
    def forward(self, loan_node_feature, history_node_feature, soft_node_feature, loan_adj, history_adj, soft_adj):
        for i, block in enumerate(self.blocks_loan):
            out1, loan_adj = block((loan_node_feature if i==0 else out1), loan_adj)
        for i, block in enumerate(self.blocks_history):
            out2, history_adj = block((history_node_feature if i==0 else out2), history_adj)
        for i, block in enumerate(self.blocks_soft):
            out3, soft_adj = block((soft_node_feature if i==0 else out3), soft_adj)
        out4 = torch.cat([out1, out2, out3], dim = 1)
        out = self.pred1(out4)
        out = self.pred2(out)
        out = self.pred3(out)
        out = self.pred4(out)
        
        return out
   

In [None]:
class GCNNet(nn.Module):
    def __init__(self, args):
        super(GCNNet, self).__init__()
        
        self.blocks_loan = nn.ModuleList()
        for i in range(args.n_block):
            self.blocks_loan.append(GCNBlock(args.n_layer,
                                        9,
                                        9,
                                        9,   ### output dim 따로
                                        args.bn,
                                        args.sc))
        
        self.blocks_history = nn.ModuleList()
        for i in range(args.n_block):
            self.blocks_history.append(GCNBlock(args.n_layer,
                                        13,
                                        13,
                                        13,   ### output dim 따로
                                        args.bn,
                                        args.sc))
        
        self.blocks_soft = nn.ModuleList()
        for i in range(args.n_block):
            self.blocks_soft.append(GCNBlock(args.n_layer,
                                        12,
                                        12,
                                        12,   ### output dim 따로
                                        args.bn,
                                        args.sc))
        
        self.pred1 = Classifier(34, args.pred_dim1,act = nn.ReLU())
        self.pred2 = Classifier(args.pred_dim1, args.pred_dim2, act = nn.ReLU())
        self.pred3 = Classifier(args.pred_dim2, args.pred_dim3, act = nn.ReLU())
        self.pred4 = Classifier(args.pred_dim3, 1,act = nn.Sigmoid())
    
    def forward(self, loan_node_feature, history_node_feature, soft_node_feature, loan_adj, history_adj, soft_adj):
        for i, block in enumerate(self.blocks_loan):
            out1, loan_adj = block((loan_node_feature if i==0 else out1), loan_adj)
        for i, block in enumerate(self.blocks_history):
            out2, history_adj = block((history_node_feature if i==0 else out2), history_adj)
        for i, block in enumerate(self.blocks_soft):
            out3, soft_adj = block((soft_node_feature if i==0 else out3), soft_adj)
        out4 = torch.cat([out1, out2, out3], dim = 1)
        out = self.pred1(out4)
        out = self.pred2(out)
        out = self.pred3(out)
        out = self.pred4(out)
        
        return out
   

seed : 4199 -> Test Acc : 0.795, F1: 0.814 Auc : 0.864


In [None]:
test_loan_node_feature = torch.from_numpy(test_loan_node_feature).float().cuda()
test_history_node_feature = torch.from_numpy(test_history_node_feature).float().cuda()
test_soft_node_feature = torch.from_numpy(test_soft_node_feature).float().cuda()
test_loan_adj = torch.from_numpy(test_loan_adj).float().cuda()
test_history_adj = torch.from_numpy(test_history_adj).float().cuda()
test_soft_adj = torch.from_numpy(test_soft_adj).float().cuda()
test_y = torch.tensor(test_y, dtype=torch.long).cuda()



In [None]:
### CV 1

In [None]:
torch.manual_seed(122147)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
test()

### CV 2

In [None]:
torch.manual_seed(484312384)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
test()

### CV 3

In [None]:
torch.manual_seed(214831)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 3808, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3808 acc_train: 0.8064 Test set results: acc_test: 0.8000 F1_test: 0.8236 auc_test: 0.8640

In [None]:
test()

### CV 4

In [None]:
torch.manual_seed(8731147)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 4783, "lr": 0.001, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    

### Epoch: 4783 acc_train: 0.8162 Test set results: acc_test: 0.7980 F1_test: 0.8233 auc_test: 0.8690

In [None]:
torch.manual_seed(5871379)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 2316, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    

In [None]:
test()

### CV 5

In [None]:
torch.manual_seed(62912271)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 3023, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3023 acc_train: 0.8077 Test set results: acc_test: 0.8150 F1_test: 0.8369 auc_test: 0.8836

In [None]:
test()

### CV 6

In [None]:
torch.manual_seed(17314)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 3299, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)

### Epoch: 3299 acc_train: 0.8066 Test set results: acc_test: 0.7995 F1_test: 0.8209 auc_test: 0.8600


In [None]:
test()

### CV 7

In [None]:
torch.manual_seed(5122)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 3792, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
test()

# CV Test 1

In [None]:
torch.manual_seed(122147)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(233258)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(391814)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(488147)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(522122)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(695122)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(724414)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(822222)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(914781)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(2338471)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(3981472)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(4813579)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(9182347)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(7513587)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(821357138)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

# CV Test 2

In [None]:
torch.manual_seed(821357138)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(187318)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(348731)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(513843)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(683184)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(219831218)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(484312384)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(6843123157)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(86313185131)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

In [None]:
torch.manual_seed(997312584)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "hidden_dim": 16 , "pred_dim1": 16 , "pred_dim2": 24, "pred_dim3": 8,
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 3792 acc_train: 0.8101 Test set results: acc_test: 0.7915 F1_test: 0.8131 auc_test: 0.8731

# All node feature

In [None]:
sample_dataset = pd.read_csv('Data/Sample_dataset.csv')

In [None]:
all_feature_distance_matrix_exercise = np.fromfile('all_feature_distance_matrix.dat', dtype=float)
all_feature_distance_matrix_exercise = all_feature_distance_matrix_exercise.reshape(14000,14000)

In [None]:
def CV_dataset_Extraction(sample_dataset, all_feature_distance_matrix_exercise, cv = 1):
        ### Train and Test indexing for Fully Paid Observations 
        total_Fully_paid_index = list(range(7000))
        test_Fully_paid_index = list(range((cv-1)*1000, cv*1000))
        train_Fully_paid_index = list(set(total_Fully_paid_index)-set(test_Fully_paid_index))
        
        ### Train and Test indexing for Defualt Observations
        total_Default_index = list(range(7000,14000))
        test_Default_index = list(range((7000 + (cv-1)*1000), (7000 + cv*1000)))
        train_Default_index = list(set(total_Default_index)-set(test_Default_index))
        
        ### Train and Test dataset partition
        train_dataset = sample_dataset.iloc[(train_Fully_paid_index + train_Default_index), :].reset_index(drop = True)
        test_dataset = sample_dataset.iloc[(test_Fully_paid_index + test_Default_index), :].reset_index(drop = True)
        
        ### Train distance matrix
        all_feature_distance_matrix_train = all_feature_distance_matrix_exercise[(train_Fully_paid_index + train_Default_index), :]
        all_feature_distance_matrix_train = all_feature_distance_matrix_train[:, (train_Fully_paid_index + train_Default_index)]
 
        ### Test distance matrix
        all_feature_distance_matrix_test = all_feature_distance_matrix_exercise[(test_Fully_paid_index + test_Default_index), :]
        all_feature_distance_matrix_test = all_feature_distance_matrix_test[:, (train_Fully_paid_index + train_Default_index)]
        
        return(train_dataset, test_dataset, all_feature_distance_matrix_train, all_feature_distance_matrix_test) 
        

In [None]:
### Distance matrix to Adjancency matrix and Node feature matrix Generation
def Matrix_Generation(all_feature_distance_matrix_train, all_feature_distance_matrix_test, train_dataset, test_dataset, bandwidth = 10):
    ### Transformation for Train dataset
    all_feature_adjacency_matrix_train = Distance_Weight(all_feature_distance_matrix_train, bandwidth)
    
    ### Transformation for Test dataset
    all_feature_adjacency_matrix_test = Distance_Weight_Test(all_feature_distance_matrix_test, bandwidth)
 
    ### Normalize for train dataset
    train_all_adj = normalize(all_feature_adjacency_matrix_train + sp.eye(all_feature_adjacency_matrix_train.shape[0]))
    
    ### Matrix generation for Test dataset
    all_feature_adjacency_matrix_test = np.concatenate([all_feature_adjacency_matrix_train,all_feature_adjacency_matrix_test]) 
    all_feature_adjacency_matrix_test = np.concatenate((all_feature_adjacency_matrix_test, np.zeros((all_feature_adjacency_matrix_test.shape[0], all_feature_distance_matrix_test.shape[0]))), axis = 1)   
    test_all_adj = normalize(all_feature_adjacency_matrix_test + sp.eye(all_feature_adjacency_matrix_test.shape[0]))

    ### One-hot Encoding and Train X, Train Y   
    temp_dummies = pd.get_dummies(train_dataset)
    train_y = list(temp_dummies['loan_status_Charged Off'])
    base_category = temp_dummies.columns[[11, 13, 18, 19, 21, 24, 26, 29, 32, 34, 40, 43]]
    train_x = Model_matrix(train_dataset, base_category)
    train_x = train_x.drop('loan_status_Charged Off', axis = 1)

    ### Train Node feature Matrix
    DN_count_all = Default_Neighbor_count(all_feature_adjacency_matrix_train, train_y)
    train_all_node_feature = np.concatenate((DN_count_all, train_x), axis= 1)

    all_dataset = pd.concat([train_dataset, test_dataset])
    all_dataset = all_dataset.reset_index()
    all_dataset = all_dataset.drop(columns=['index'])

    temp_dummies = pd.get_dummies(all_dataset)
    test_y = list(temp_dummies['loan_status_Charged Off'])

    ### Test Node feature matrix
    all_x = Model_matrix(all_dataset, base_category)
    all_x = all_x.drop('loan_status_Charged Off', axis = 1)

    DN_count_all = Default_Neighbor_count(all_feature_adjacency_matrix_test, test_y)
    test_all_node_feature = np.concatenate((DN_count_all, all_x), axis= 1)

    
    return(train_all_adj, test_all_adj, train_all_node_feature, test_all_node_feature, base_category, train_y, test_y, DN_count_all)
    

In [None]:
train_dataset, test_dataset, all_feature_distance_matrix_train, all_feature_distance_matrix_test  = CV_dataset_Extraction(sample_dataset, all_feature_distance_matrix_exercise,  
            cv = 1)

col_order = train_dataset.columns

### normalize
test_dataset = Numerical_min_max_for_test(train_dataset, test_dataset)
train_dataset = Numerical_min_max(train_dataset)
train_dataset = train_dataset[col_order]
test_dataset = test_dataset[col_order]
del col_order

train_all_adj, test_all_adj, train_all_node_feature, test_all_node_feature, base_category, train_y, test_y, DN_count_all = Matrix_Generation(all_feature_distance_matrix_train, all_feature_distance_matrix_test, train_dataset, test_dataset, 
    bandwidth = 20)


In [None]:
class One_block_GCNNet(nn.Module):
    def __init__(self, args):
        super(One_block_GCNNet, self).__init__()
        
        self.blocks_gcn = nn.ModuleList()
        for i in range(args.n_block):
            self.blocks_gcn.append(GCNBlock(args.n_layer,
                                        args.in_dim if i==0 else args.hidden_dim,
                                        args.hidden_dim,
                                        args.hidden_dim,   ### output dim 따로
                                        args.bn,
                                        args.sc))
        

        
        self.pred1 = Classifier(args.hidden_dim, 1,act = nn.Sigmoid())
    
    def forward(self, node_feature, adj):
        for i, block in enumerate(self.blocks_gcn):
            out, adj = block((node_feature if i==0 else out), adj)
       
        out = self.pred1(out)
        return out

In [None]:
def train(epoch, args, criterion):
    t = time.time()
    net.train()
    optimizer.zero_grad()
    outputs = net(test_all_node_feature[0:12000, :], 
                  test_all_adj[0:12000, 0:12000])
    loss = criterion(outputs, test_y[0:12000].float())
    
    pred = (outputs > 0.5).float()
    correct = (pred.transpose(0,1) == test_y[0:12000].float()).float().sum()
    acc_train = correct / len(test_y[0:12000])
    
    loss.backward()
    optimizer.step()
    
    net.eval()
    outputs = net(test_all_node_feature,
                  test_all_adj)
    outputs = outputs[range(12000,len(outputs))]

    outputs_list = outputs.tolist()
    test_y_for_test = test_y[range(12000,len(test_y))].float()
    test_y_for_test_list = test_y_for_test.tolist()
    loss_test = criterion(outputs, test_y_for_test)
    pred = (outputs > 0.5).float()
    correct = (pred.transpose(0,1) == test_y_for_test).float().sum()
    acc_test = correct / len(test_y_for_test)
    pred_list = pred.tolist()
    f1_score = metrics.f1_score(test_y_for_test_list, pred_list, pos_label=1)
    fpr, tpr, thresholds = metrics.roc_curve(test_y_for_test_list, outputs_list, pos_label=1)
    roc_auc = metrics.auc(fpr, tpr)
    
    outputs_list = outputs.tolist()
    test_y_for_test = test_y[range(12000,len(test_y))].float()
    test_y_for_test_list = test_y_for_test.tolist()
    loss_test = criterion(outputs, test_y_for_test)
    pred = (outputs > 0.5).float()
    pred = pred.tolist()
    fpr, tpr, thresholds = metrics.roc_curve(test_y_for_test_list, outputs_list, pos_label=1)
    roc_auc = metrics.auc(fpr, tpr)
  
    
    print('Epoch: {:04d}'.format(epoch+1),
          "Test set results:",
          'acc: {:.4f}'.format(acc_test),
          "Pre = {}".format(metrics.precision_score(test_y_for_test_list, pred, pos_label=1)),
          "Recall = {}".format(metrics.recall_score(test_y_for_test_list, pred, pos_label=1)),
          'F1: {:.4f}'.format(f1_score),
          'auc: {:.4f}'.format(roc_auc))

    
def test():
    net.eval()
    outputs = net(test_all_node_feature, 
               test_all_adj)
 
    outputs = outputs[range(12000,len(outputs))]

    outputs_list = outputs.tolist()
    test_y_for_test = test_y[range(12000,len(test_y))].float()
    test_y_for_test_list = test_y_for_test.tolist()
    loss_test = criterion(outputs, test_y_for_test)
    pred = (outputs > 0.5).float()
    pred = pred.tolist()
    fpr, tpr, thresholds = metrics.roc_curve(test_y_for_test_list, outputs_list, pos_label=1)
    roc_auc = metrics.auc(fpr, tpr)
  
    print("Accuracy = {}".format(metrics.accuracy_score(test_y_for_test_list, pred)))
    print("Precision = {}".format(metrics.precision_score(test_y_for_test_list, pred, pos_label=1)))
    print("Recall = {}".format(metrics.recall_score(test_y_for_test_list, pred, pos_label=1)))
    print("F1 score = {}".format(metrics.f1_score(test_y_for_test_list, pred, pos_label=1)))
    print("AUC = {}".format(roc_auc))

    

In [None]:
train_dataset, test_dataset, all_feature_distance_matrix_train, all_feature_distance_matrix_test  = CV_dataset_Extraction(sample_dataset, all_feature_distance_matrix_exercise,  
            cv = 7)

col_order = train_dataset.columns

### normalize
test_dataset = Numerical_min_max_for_test(train_dataset, test_dataset)
train_dataset = Numerical_min_max(train_dataset)
train_dataset = train_dataset[col_order]
test_dataset = test_dataset[col_order]
del col_order

train_all_adj, test_all_adj, train_all_node_feature, test_all_node_feature, base_category, train_y, test_y, DN_count_all = Matrix_Generation(all_feature_distance_matrix_train, all_feature_distance_matrix_test, train_dataset, test_dataset, 
    bandwidth = 20)


In [None]:
test_all_node_feature = torch.from_numpy(test_all_node_feature).float().cuda()
test_all_adj = torch.from_numpy(test_all_adj).float().cuda()
test_y = torch.tensor(test_y, dtype=torch.long).cuda()

In [None]:
torch.manual_seed(47137)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "in_dim": 32, "hidden_dim": 16 , 
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = One_block_GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 2830 acc_train: 0.8071 Test set results: acc_test: 0.8105 F1_test: 0.8300 auc_test: 0.8815


In [None]:
torch.manual_seed(97213)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "in_dim": 32, "hidden_dim": 16 , 
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = One_block_GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
  

In [None]:
torch.manual_seed(214831)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "in_dim": 32, "hidden_dim": 16 , 
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = One_block_GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
  

In [None]:
torch.manual_seed(5871379)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "in_dim": 32, "hidden_dim": 16 , 
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = One_block_GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
  


In [None]:
torch.manual_seed(62912271)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "in_dim": 32, "hidden_dim": 16 , 
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = One_block_GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
  



In [None]:
torch.manual_seed(17314)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "in_dim": 32, "hidden_dim": 16 , 
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = One_block_GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
  



In [None]:
torch.manual_seed(5122)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "in_dim": 32, "hidden_dim": 16 , 
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = One_block_GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
  



# Only one node feature

In [None]:
def CV_dataset_Extraction(sample_dataset, loan_feature_distance_matrix_exercise, history_feature_distance_matrix_exercise, soft_feature_distance_matrix_exercise, cv = 1):
        ### Train and Test indexing for Fully Paid Observations 
        total_Fully_paid_index = list(range(7000))
        test_Fully_paid_index = list(range((cv-1)*1000, cv*1000))
        train_Fully_paid_index = list(set(total_Fully_paid_index)-set(test_Fully_paid_index))
        
        ### Train and Test indexing for Defualt Observations
        total_Default_index = list(range(7000,14000))
        test_Default_index = list(range((7000 + (cv-1)*1000), (7000 + cv*1000)))
        train_Default_index = list(set(total_Default_index)-set(test_Default_index))
        
        ### Train and Test dataset partition
        train_dataset = sample_dataset.iloc[(train_Fully_paid_index + train_Default_index), :].reset_index(drop = True)
        test_dataset = sample_dataset.iloc[(test_Fully_paid_index + test_Default_index), :].reset_index(drop = True)
        
        ### Train distance matrix
        loan_feature_distance_matrix_train = loan_feature_distance_matrix_exercise[(train_Fully_paid_index + train_Default_index), :]
        loan_feature_distance_matrix_train = loan_feature_distance_matrix_train[:, (train_Fully_paid_index + train_Default_index)]
        history_feature_distance_matrix_train = history_feature_distance_matrix_exercise[(train_Fully_paid_index + train_Default_index), :]
        history_feature_distance_matrix_train = history_feature_distance_matrix_train[:, (train_Fully_paid_index + train_Default_index)]
        soft_feature_distance_matrix_train = soft_feature_distance_matrix_exercise[(train_Fully_paid_index + train_Default_index), :]
        soft_feature_distance_matrix_train = soft_feature_distance_matrix_train[:, (train_Fully_paid_index + train_Default_index)]
 
        ### Test distance matrix
        loan_feature_distance_matrix_test = loan_feature_distance_matrix_exercise[(test_Fully_paid_index + test_Default_index), :]
        loan_feature_distance_matrix_test = loan_feature_distance_matrix_test[:, (train_Fully_paid_index + train_Default_index)]
        history_feature_distance_matrix_test = history_feature_distance_matrix_exercise[(test_Fully_paid_index + test_Default_index), :]
        history_feature_distance_matrix_test = history_feature_distance_matrix_test[:, (train_Fully_paid_index + train_Default_index)]
        soft_feature_distance_matrix_test = soft_feature_distance_matrix_exercise[(test_Fully_paid_index + test_Default_index), :]
        soft_feature_distance_matrix_test = soft_feature_distance_matrix_test[:, (train_Fully_paid_index + train_Default_index)]
        
        return(train_dataset, test_dataset, loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test) 
    

In [None]:
### Distance matrix to Adjancency matrix and Node feature matrix Generation
def Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, bandwidth = 10):
    ### Transformation for Train dataset
    loan_feature_adjacency_matrix_train = Distance_Weight(loan_feature_distance_matrix_train, bandwidth)
    history_feature_adjacency_matrix_train = Distance_Weight(history_feature_distance_matrix_train, bandwidth)
    soft_feature_adjacency_matrix_train = Distance_Weight(soft_feature_distance_matrix_train, bandwidth)
    
    ### Transformation for Test dataset
    loan_feature_adjacency_matrix_test = Distance_Weight_Test(loan_feature_distance_matrix_test, bandwidth)
    history_feature_adjacency_matrix_test = Distance_Weight_Test(history_feature_distance_matrix_test, bandwidth)
    soft_feature_adjacency_matrix_test = Distance_Weight_Test(soft_feature_distance_matrix_test, bandwidth)

    ### Normalize for train dataset
    train_loan_adj = normalize(loan_feature_adjacency_matrix_train + sp.eye(loan_feature_adjacency_matrix_train.shape[0]))
    train_history_adj = normalize(history_feature_adjacency_matrix_train + sp.eye(history_feature_adjacency_matrix_train.shape[0]))
    train_soft_adj = normalize(soft_feature_adjacency_matrix_train + sp.eye(soft_feature_adjacency_matrix_train.shape[0]))

    ### Matrix generation for Test dataset
    loan_feature_adjacency_matrix_test = np.concatenate([loan_feature_adjacency_matrix_train,loan_feature_adjacency_matrix_test])
    history_feature_adjacency_matrix_test = np.concatenate([history_feature_adjacency_matrix_train,history_feature_adjacency_matrix_test])
    soft_feature_adjacency_matrix_test = np.concatenate([soft_feature_adjacency_matrix_train,soft_feature_adjacency_matrix_test])

    loan_feature_adjacency_matrix_test = np.concatenate((loan_feature_adjacency_matrix_test, np.zeros((loan_feature_adjacency_matrix_test.shape[0], loan_feature_distance_matrix_test.shape[0]))), axis = 1)
    history_feature_adjacency_matrix_test = np.concatenate((history_feature_adjacency_matrix_test, np.zeros((history_feature_adjacency_matrix_test.shape[0], history_feature_distance_matrix_test.shape[0]))), axis = 1)
    soft_feature_adjacency_matrix_test = np.concatenate((soft_feature_adjacency_matrix_test, np.zeros((soft_feature_adjacency_matrix_test.shape[0], soft_feature_distance_matrix_test.shape[0]))), axis = 1)

    test_loan_adj = normalize(loan_feature_adjacency_matrix_test + sp.eye(loan_feature_adjacency_matrix_test.shape[0]))
    test_history_adj = normalize(history_feature_adjacency_matrix_test + sp.eye(history_feature_adjacency_matrix_test.shape[0]))
    test_soft_adj = normalize(soft_feature_adjacency_matrix_test + sp.eye(soft_feature_adjacency_matrix_test.shape[0]))


    ### One-hot Encoding and Train X, Train Y   
    temp_dummies = pd.get_dummies(train_dataset)
    train_y = list(temp_dummies['loan_status_Charged Off'])
    base_category = temp_dummies.columns[[11, 13, 18, 19, 21, 24, 26, 29, 32, 34, 40, 43]]
    train_x = Model_matrix(train_dataset, base_category)
    train_x = train_x.drop('loan_status_Charged Off', axis = 1)

    ### Train Node feature Matrix
    DN_count_loan = Default_Neighbor_count(loan_feature_adjacency_matrix_train, train_y)
    train_loan_node_feature = np.concatenate((DN_count_loan, train_x), axis= 1)
    DN_count_history = Default_Neighbor_count(history_feature_adjacency_matrix_train, train_y)
    train_history_node_feature = np.concatenate((DN_count_history, train_x), axis= 1)
    DN_count_soft = Default_Neighbor_count(soft_feature_adjacency_matrix_train, train_y)
    train_soft_node_feature = np.concatenate((DN_count_soft, train_x), axis= 1)

    all_dataset = pd.concat([train_dataset, test_dataset])
    all_dataset = all_dataset.reset_index()
    all_dataset = all_dataset.drop(columns=['index'])

    temp_dummies = pd.get_dummies(all_dataset)
    test_y = list(temp_dummies['loan_status_Charged Off'])

    ### Test Node feature matrix
    all_x = Model_matrix(all_dataset, base_category)
    all_x = all_x.drop('loan_status_Charged Off', axis = 1)

    DN_count_loan = Default_Neighbor_count(loan_feature_adjacency_matrix_test, test_y)
    test_loan_node_feature = np.concatenate((DN_count_loan, all_x), axis= 1)
    
    DN_count_history = Default_Neighbor_count(history_feature_adjacency_matrix_test, test_y)
    test_history_node_feature = np.concatenate((DN_count_history, all_x), axis= 1)
    
    DN_count_soft = Default_Neighbor_count(soft_feature_adjacency_matrix_test, test_y)
    test_soft_node_feature = np.concatenate((DN_count_soft, all_x), axis= 1)
    
    return(train_loan_adj, train_history_adj, train_soft_adj, test_loan_adj, test_history_adj, test_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, test_loan_node_feature, test_history_node_feature, test_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft)
    

In [None]:
train_dataset, test_dataset, loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test = CV_dataset_Extraction(sample_dataset, loan_feature_distance_matrix_exercise, history_feature_distance_matrix_exercise, soft_feature_distance_matrix_exercise, 
            cv = 1)

col_order = train_dataset.columns

### normalize
test_dataset = Numerical_min_max_for_test(train_dataset, test_dataset)
train_dataset = Numerical_min_max(train_dataset)
train_dataset = train_dataset[col_order]
test_dataset = test_dataset[col_order]
del col_order

train_loan_adj, train_history_adj, train_soft_adj, test_loan_adj, test_history_adj, test_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, test_loan_node_feature, test_history_node_feature, test_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft = Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, 
    bandwidth = 20)


In [None]:
def train(epoch, args, criterion):
    t = time.time()
    net.train()
    optimizer.zero_grad()
    outputs = net(test_soft_node_feature[0:12000, :], 
                  test_soft_adj[0:12000, 0:12000])
    loss = criterion(outputs, test_y[0:12000].float())
    
    pred = (outputs > 0.5).float()
    correct = (pred.transpose(0,1) == test_y[0:12000].float()).float().sum()
    acc_train = correct / len(test_y[0:12000])
    
    loss.backward()
    optimizer.step()
    
    net.eval()
    outputs = net(test_soft_node_feature,
                  test_soft_adj)
    outputs = outputs[range(12000,len(outputs))]

    outputs_list = outputs.tolist()
    test_y_for_test = test_y[range(12000,len(test_y))].float()
    test_y_for_test_list = test_y_for_test.tolist()
    loss_test = criterion(outputs, test_y_for_test)
    pred = (outputs > 0.5).float()
    correct = (pred.transpose(0,1) == test_y_for_test).float().sum()
    acc_test = correct / len(test_y_for_test)
    pred_list = pred.tolist()
    f1_score = metrics.f1_score(test_y_for_test_list, pred_list, pos_label=1)
    fpr, tpr, thresholds = metrics.roc_curve(test_y_for_test_list, outputs_list, pos_label=1)
    roc_auc = metrics.auc(fpr, tpr)
    
    outputs_list = outputs.tolist()
    test_y_for_test = test_y[range(12000,len(test_y))].float()
    test_y_for_test_list = test_y_for_test.tolist()
    loss_test = criterion(outputs, test_y_for_test)
    pred = (outputs > 0.5).float()
    pred = pred.tolist()
    fpr, tpr, thresholds = metrics.roc_curve(test_y_for_test_list, outputs_list, pos_label=1)
    roc_auc = metrics.auc(fpr, tpr)
  
    
    print('Epoch: {:04d}'.format(epoch+1),
          "Test set results:",
          'acc: {:.4f}'.format(acc_test),
          "Pre = {}".format(metrics.precision_score(test_y_for_test_list, pred, pos_label=1)),
          "Recall = {}".format(metrics.recall_score(test_y_for_test_list, pred, pos_label=1)),
          'F1: {:.4f}'.format(f1_score),
          'auc: {:.4f}'.format(roc_auc))



    

In [None]:
train_dataset, test_dataset, loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test = CV_dataset_Extraction(sample_dataset, loan_feature_distance_matrix_exercise, history_feature_distance_matrix_exercise, soft_feature_distance_matrix_exercise, 
            cv = 1)

col_order = train_dataset.columns

### normalize
test_dataset = Numerical_min_max_for_test(train_dataset, test_dataset)
train_dataset = Numerical_min_max(train_dataset)
train_dataset = train_dataset[col_order]
test_dataset = test_dataset[col_order]
del col_order

train_loan_adj, train_history_adj, train_soft_adj, test_loan_adj, test_history_adj, test_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, test_loan_node_feature, test_history_node_feature, test_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft = Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, 
    bandwidth = 20)


test_soft_node_feature = torch.from_numpy(test_soft_node_feature).float().cuda()
test_soft_adj = torch.from_numpy(test_soft_adj).float().cuda()
test_y = torch.tensor(test_y, dtype=torch.long).cuda()

In [None]:
torch.manual_seed(47137)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "in_dim": 32, "hidden_dim": 16 , 
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = One_block_GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()


for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 2830 acc_train: 0.8071 Test set results: acc_test: 0.8105 F1_test: 0.8300 auc_test: 0.8815


In [None]:
train_dataset, test_dataset, loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test = CV_dataset_Extraction(sample_dataset, loan_feature_distance_matrix_exercise, history_feature_distance_matrix_exercise, soft_feature_distance_matrix_exercise, 
            cv = 2)

col_order = train_dataset.columns

### normalize
test_dataset = Numerical_min_max_for_test(train_dataset, test_dataset)
train_dataset = Numerical_min_max(train_dataset)
train_dataset = train_dataset[col_order]
test_dataset = test_dataset[col_order]
del col_order

train_loan_adj, train_history_adj, train_soft_adj, test_loan_adj, test_history_adj, test_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, test_loan_node_feature, test_history_node_feature, test_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft = Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, 
    bandwidth = 20)


test_soft_node_feature = torch.from_numpy(test_soft_node_feature).float().cuda()
test_soft_adj = torch.from_numpy(test_soft_adj).float().cuda()
test_y = torch.tensor(test_y, dtype=torch.long).cuda()

In [None]:
torch.manual_seed(97213)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "in_dim": 32, "hidden_dim": 16 , 
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = One_block_GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 2830 acc_train: 0.8071 Test set results: acc_test: 0.8105 F1_test: 0.8300 auc_test: 0.8815


In [None]:
train_dataset, test_dataset, loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test = CV_dataset_Extraction(sample_dataset, loan_feature_distance_matrix_exercise, history_feature_distance_matrix_exercise, soft_feature_distance_matrix_exercise, 
            cv = 3)

col_order = train_dataset.columns

### normalize
test_dataset = Numerical_min_max_for_test(train_dataset, test_dataset)
train_dataset = Numerical_min_max(train_dataset)
train_dataset = train_dataset[col_order]
test_dataset = test_dataset[col_order]
del col_order

train_loan_adj, train_history_adj, train_soft_adj, test_loan_adj, test_history_adj, test_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, test_loan_node_feature, test_history_node_feature, test_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft = Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, 
    bandwidth = 20)


test_soft_node_feature = torch.from_numpy(test_soft_node_feature).float().cuda()
test_soft_adj = torch.from_numpy(test_soft_adj).float().cuda()
test_y = torch.tensor(test_y, dtype=torch.long).cuda()

In [None]:
torch.manual_seed(214831)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "in_dim": 32, "hidden_dim": 16 , 
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = One_block_GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 2830 acc_train: 0.8071 Test set results: acc_test: 0.8105 F1_test: 0.8300 auc_test: 0.8815


In [None]:
train_dataset, test_dataset, loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test = CV_dataset_Extraction(sample_dataset, loan_feature_distance_matrix_exercise, history_feature_distance_matrix_exercise, soft_feature_distance_matrix_exercise, 
            cv = 4)

col_order = train_dataset.columns

### normalize
test_dataset = Numerical_min_max_for_test(train_dataset, test_dataset)
train_dataset = Numerical_min_max(train_dataset)
train_dataset = train_dataset[col_order]
test_dataset = test_dataset[col_order]
del col_order

train_loan_adj, train_history_adj, train_soft_adj, test_loan_adj, test_history_adj, test_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, test_loan_node_feature, test_history_node_feature, test_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft = Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, 
    bandwidth = 20)


test_soft_node_feature = torch.from_numpy(test_soft_node_feature).float().cuda()
test_soft_adj = torch.from_numpy(test_soft_adj).float().cuda()
test_y = torch.tensor(test_y, dtype=torch.long).cuda()

In [None]:
torch.manual_seed(5871379)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "in_dim": 32, "hidden_dim": 16 , 
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = One_block_GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 2830 acc_train: 0.8071 Test set results: acc_test: 0.8105 F1_test: 0.8300 auc_test: 0.8815


In [None]:
train_dataset, test_dataset, loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test = CV_dataset_Extraction(sample_dataset, loan_feature_distance_matrix_exercise, history_feature_distance_matrix_exercise, soft_feature_distance_matrix_exercise, 
            cv = 5)

col_order = train_dataset.columns

### normalize
test_dataset = Numerical_min_max_for_test(train_dataset, test_dataset)
train_dataset = Numerical_min_max(train_dataset)
train_dataset = train_dataset[col_order]
test_dataset = test_dataset[col_order]
del col_order

train_loan_adj, train_history_adj, train_soft_adj, test_loan_adj, test_history_adj, test_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, test_loan_node_feature, test_history_node_feature, test_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft = Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, 
    bandwidth = 20)


test_soft_node_feature = torch.from_numpy(test_soft_node_feature).float().cuda()
test_soft_adj = torch.from_numpy(test_soft_adj).float().cuda()
test_y = torch.tensor(test_y, dtype=torch.long).cuda()

In [None]:
torch.manual_seed(62912271)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "in_dim": 32, "hidden_dim": 16 , 
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = One_block_GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 2830 acc_train: 0.8071 Test set results: acc_test: 0.8105 F1_test: 0.8300 auc_test: 0.8815


In [None]:
train_dataset, test_dataset, loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test = CV_dataset_Extraction(sample_dataset, loan_feature_distance_matrix_exercise, history_feature_distance_matrix_exercise, soft_feature_distance_matrix_exercise, 
            cv = 6)

col_order = train_dataset.columns

### normalize
test_dataset = Numerical_min_max_for_test(train_dataset, test_dataset)
train_dataset = Numerical_min_max(train_dataset)
train_dataset = train_dataset[col_order]
test_dataset = test_dataset[col_order]
del col_order

train_loan_adj, train_history_adj, train_soft_adj, test_loan_adj, test_history_adj, test_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, test_loan_node_feature, test_history_node_feature, test_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft = Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, 
    bandwidth = 20)


test_soft_node_feature = torch.from_numpy(test_soft_node_feature).float().cuda()
test_soft_adj = torch.from_numpy(test_soft_adj).float().cuda()
test_y = torch.tensor(test_y, dtype=torch.long).cuda()

In [None]:
torch.manual_seed(17314)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "in_dim": 32, "hidden_dim": 16 , 
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = One_block_GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 2830 acc_train: 0.8071 Test set results: acc_test: 0.8105 F1_test: 0.8300 auc_test: 0.8815


In [None]:
train_dataset, test_dataset, loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test = CV_dataset_Extraction(sample_dataset, loan_feature_distance_matrix_exercise, history_feature_distance_matrix_exercise, soft_feature_distance_matrix_exercise, 
            cv = 7)

col_order = train_dataset.columns

### normalize
test_dataset = Numerical_min_max_for_test(train_dataset, test_dataset)
train_dataset = Numerical_min_max(train_dataset)
train_dataset = train_dataset[col_order]
test_dataset = test_dataset[col_order]
del col_order

train_loan_adj, train_history_adj, train_soft_adj, test_loan_adj, test_history_adj, test_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, test_loan_node_feature, test_history_node_feature, test_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft = Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, 
    bandwidth = 20)


test_soft_node_feature = torch.from_numpy(test_soft_node_feature).float().cuda()
test_soft_adj = torch.from_numpy(test_soft_adj).float().cuda()
test_y = torch.tensor(test_y, dtype=torch.long).cuda()

In [None]:
torch.manual_seed(5122)
    
args = easydict.EasyDict({ "seed": 714, "epochs": 5000, "lr": 0.0005, "weight_decay": 5e-4, "n_layer": 3, 
                                         "n_block": 1 , "in_dim": 32, "hidden_dim": 16 , 
                                         "bn": True ,"sc": "gc","cuda": True})
args.act = nn.ReLU()

criterion = nn.BCELoss()

net = One_block_GCNNet(args)
    
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)
net.cuda()

for epoch in range(args.epochs):
    train(epoch, args, criterion)
    
### Epoch: 2830 acc_train: 0.8071 Test set results: acc_test: 0.8105 F1_test: 0.8300 auc_test: 0.8815


# XAI

In [None]:
from Code.Graph_composition import *

### Data load
sample_dataset = pd.read_csv('Data/Sample_dataset.csv')


# Loan feature definition
loan_features = sample_dataset.columns[0:7]
loan_features_dataset = sample_dataset[loan_features] 

soft_features = sample_dataset.columns[16:21]
soft_features_dataset = sample_dataset[soft_features] 

# History feature definition
history_features = sample_dataset.columns[7:16]
history_features_dataset = sample_dataset[history_features]

In [None]:
xai_loan_feature = []

for i in range(len(loan_features)):
    loan_features_dataset = sample_dataset[loan_features] 
    del loan_features_dataset[loan_features[i]]
    loan_feature_graph = Numerical_min_max(loan_features_dataset)
    loan_feature_graph_discreted = Discretization(loan_feature_graph)
    loan_feature_name, loan_feature_distance_by_factor_exercise = Algo_distance(loan_feature_graph_discreted)
    loan_feature_weight_vector_exercise = Weight(loan_feature_distance_by_factor_exercise)
    loan_feature_distance_matrix_exercise = Distance(loan_feature_graph, loan_feature_weight_vector_exercise, loan_feature_distance_by_factor_exercise, loan_feature_name)
    xai_loan_feature = [xai_loan_feature, loan_feature_distance_matrix_exercise]


In [None]:
np.save('xai_loan_feature', xai_loan_feature1)


In [None]:
xai_history_feature = []

for i in range(len(history_features)):
    history_features_dataset = sample_dataset[history_features] 
    del history_features_dataset[history_features[i]]
    history_feature_graph = Numerical_min_max(history_features_dataset)
    history_feature_graph_discreted = Discretization(history_feature_graph)
    history_feature_name, history_feature_distance_by_factor_exercise = Algo_distance(history_feature_graph_discreted)
    history_feature_weight_vector_exercise = Weight(history_feature_distance_by_factor_exercise)
    history_feature_distance_matrix_exercise = Distance(history_feature_graph, history_feature_weight_vector_exercise, history_feature_distance_by_factor_exercise, history_feature_name)
    xai_history_feature = [xai_history_feature, history_feature_distance_matrix_exercise]


In [None]:
np.save('xai_history_feature', xai_history_feature1)

In [None]:
xai_soft_feature = []

for i in range(0,2):
    soft_features_dataset = sample_dataset[soft_features] 
    del soft_features_dataset[soft_features[i]]
    soft_feature_graph = Numerical_min_max(soft_features_dataset)
    soft_feature_graph_discreted = Discretization(soft_feature_graph)
    soft_feature_name, soft_feature_distance_by_factor_exercise = Algo_distance(soft_feature_graph_discreted)
    soft_feature_weight_vector_exercise = Weight(soft_feature_distance_by_factor_exercise)
    soft_feature_distance_matrix_exercise = Distance(soft_feature_graph, soft_feature_weight_vector_exercise, soft_feature_distance_by_factor_exercise, soft_feature_name)
    xai_soft_feature = [xai_soft_feature, soft_feature_distance_matrix_exercise]


In [None]:
soft_features_dataset = sample_dataset[soft_features] 
del soft_features_dataset[soft_features[2]]    
soft_feature_name, soft_feature_distance_by_factor_exercise = Algo_distance(soft_features_dataset)
soft_feature_distance_matrix_exercise = Distance_only_category(soft_features_dataset, soft_feature_distance_by_factor_exercise, soft_feature_name)
xai_soft_feature = [xai_soft_feature, soft_feature_distance_matrix_exercise]

In [None]:
for i in range(3,5):
    soft_features_dataset = sample_dataset[soft_features] 
    del soft_features_dataset[soft_features[i]]
    soft_feature_graph = Numerical_min_max(soft_features_dataset)
    soft_feature_graph_discreted = Discretization(soft_feature_graph)
    soft_feature_name, soft_feature_distance_by_factor_exercise = Algo_distance(soft_feature_graph_discreted)
    soft_feature_weight_vector_exercise = Weight(soft_feature_distance_by_factor_exercise)
    soft_feature_distance_matrix_exercise = Distance(soft_feature_graph, soft_feature_weight_vector_exercise, soft_feature_distance_by_factor_exercise, soft_feature_name)
    xai_soft_feature = [xai_soft_feature, soft_feature_distance_matrix_exercise]


In [None]:
np.save('xai_soft_feature', xai_soft_feature1)

In [None]:
xai_soft_feature[0][0][0][0][1]

In [None]:
xai_loan_feature[0][0][0][0][0][0][1]

In [None]:
xai_loan_feature1 = [xai_loan_feature[0][0][0][0][0][0][1],xai_loan_feature[0][0][0][0][0][1], xai_loan_feature[0][0][0][0][1], xai_loan_feature[0][0][0][1], xai_loan_feature[0][0][1], xai_loan_feature[0][1], xai_loan_feature[1]]

In [None]:
xai_history_feature1 = [xai_history_feature[0][0][0][0][0][0][0][0][1], xai_history_feature[0][0][0][0][0][0][0][1],xai_history_feature[0][0][0][0][0][0][1],xai_history_feature[0][0][0][0][0][1], xai_history_feature[0][0][0][0][1], xai_history_feature[0][0][0][1], xai_history_feature[0][0][1], xai_history_feature[0][1], xai_history_feature[1]]

In [None]:
xai_soft_feature1 = [xai_soft_feature[0][0][0][0][1], xai_soft_feature[0][0][0][1], xai_soft_feature[0][0][1], xai_soft_feature[0][1], xai_soft_feature[1]]

# XAI Model

In [None]:
outputs = net(test_loan_node_feature[0:12000, :], test_history_node_feature[0:12000, :], test_soft_node_feature[0:12000, :], 
              test_loan_adj[0:12000, 0:12000], test_history_adj[0:12000, 0:12000], test_soft_adj[0:12000, 0:12000])


In [None]:
def Model_matrix_without_one_feature(dataset, base_category, feature1, feature2):
    temp_dummies1 = pd.get_dummies(dataset)
    temp_dummies2 = pd.get_dummies(dataset[[feature2, feature1]])
    del temp_dummies2[feature2]
    base_category_for_dataset1 = temp_dummies1.columns & base_category
    temp_dummies1 = temp_dummies1.drop(columns = base_category_for_dataset1)
    base_category_for_dataset2 = temp_dummies2.columns & base_category
    temp_dummies2 = temp_dummies2.drop(columns = base_category_for_dataset2)    
    total_colnames = temp_dummies1.columns
    colnames = temp_dummies2.columns
    
    return(temp_dummies1, colnames, total_colnames)

In [None]:
### Distance matrix to Adjancency matrix and Node feature matrix Generation
def XAI_Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, bandwidth = 10, xai_index = 0):
    ### Transformation for Train dataset
    loan_feature_adjacency_matrix_train = Distance_Weight(loan_feature_distance_matrix_train, bandwidth)
    history_feature_adjacency_matrix_train = Distance_Weight(history_feature_distance_matrix_train, bandwidth)
    soft_feature_adjacency_matrix_train = Distance_Weight(soft_feature_distance_matrix_train, bandwidth)
    
    ### Transformation for Test dataset
    loan_feature_adjacency_matrix_test = Distance_Weight_Test(loan_feature_distance_matrix_test, bandwidth)
    history_feature_adjacency_matrix_test = Distance_Weight_Test(history_feature_distance_matrix_test, bandwidth)
    soft_feature_adjacency_matrix_test = Distance_Weight_Test(soft_feature_distance_matrix_test, bandwidth)

    ### Normalize for train dataset
    train_loan_adj = normalize(loan_feature_adjacency_matrix_train + sp.eye(loan_feature_adjacency_matrix_train.shape[0]))
    train_history_adj = normalize(history_feature_adjacency_matrix_train + sp.eye(history_feature_adjacency_matrix_train.shape[0]))
    train_soft_adj = normalize(soft_feature_adjacency_matrix_train + sp.eye(soft_feature_adjacency_matrix_train.shape[0]))

    ### One-hot Encoding and Train X, Train Y   
    temp_dummies = pd.get_dummies(train_dataset)
    train_y = list(temp_dummies['loan_status_Charged Off'])
    base_category = temp_dummies.columns[[11, 13, 18, 19, 21, 24, 26, 29, 32, 34, 40, 43]]
    train_x = Model_matrix(train_dataset, base_category)
    train_x = train_x.drop('loan_status_Charged Off', axis = 1)

    ### Train Node feature Matrix
    loan_features = train_dataset.columns[0:7]
    train_loan_features_dataset = train_dataset[loan_features] 
    masked_feature = loan_features[xai_index]
    if xai_index == 0:
        numeric_feature = loan_features[2]
    else:
        numeric_feature = loan_features[0]
    
    history_features = train_dataset.columns[7:16]
    train_history_features_dataset = train_dataset[history_features] 
    soft_features = train_dataset.columns[16:21]
    train_soft_features_dataset = train_dataset[soft_features] 
    
    loan_x, colnames, total_colnames = Model_matrix_without_one_feature(train_loan_features_dataset, base_category, masked_feature, numeric_feature)
    col_index = [list(loan_x.columns).index(x) + 1 for x in colnames]
    history_x = Model_matrix(train_history_features_dataset, base_category)
    soft_x = Model_matrix(train_soft_features_dataset, base_category)
    DN_count_loan = Default_Neighbor_count(loan_feature_adjacency_matrix_train, train_y)
    train_loan_node_feature = np.concatenate((DN_count_loan, loan_x), axis= 1)
    DN_count_history = Default_Neighbor_count(history_feature_adjacency_matrix_train, train_y)
    train_history_node_feature = np.concatenate((DN_count_history, history_x), axis= 1)
    DN_count_soft = Default_Neighbor_count(soft_feature_adjacency_matrix_train, train_y)
    train_soft_node_feature = np.concatenate((DN_count_soft, soft_x), axis= 1)

    
    
    return(train_loan_adj, train_history_adj, train_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft, masked_feature, total_colnames, col_index)
    





# Loan Feature Importance

In [None]:
xai_loan_feature = np.load('xai_loan_feature.npy')

In [None]:
for i in range(len(xai_loan_feature)):
    criterion = nn.L1Loss()
    train_dataset, test_dataset, loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test = CV_dataset_Extraction(sample_dataset, xai_loan_feature[i], history_feature_distance_matrix_exercise, soft_feature_distance_matrix_exercise, 
            cv = 5)
    col_order = train_dataset.columns

    ### normalize
    test_dataset = Numerical_min_max_for_test(train_dataset, test_dataset)
    train_dataset = Numerical_min_max(train_dataset)
    train_dataset = train_dataset[col_order]
    test_dataset = test_dataset[col_order]
    del col_order
    
    train_loan_adj, train_history_adj, train_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft, masked_feature, total_colnames, col_index  = XAI_Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, 
    bandwidth = 20, xai_index = i)
    
    print(masked_feature)
        
    for indexs in col_index:
        train_loan_adj, train_history_adj, train_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft, masked_feature, total_colnames, col_index  = XAI_Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, 
        bandwidth = 20, xai_index = i)
    
        test_history_node_feature = torch.from_numpy(train_history_node_feature).float().cuda()
        test_soft_node_feature = torch.from_numpy(train_soft_node_feature).float().cuda()
        test_loan_adj = torch.from_numpy(train_loan_adj).float().cuda()
        test_history_adj = torch.from_numpy(train_history_adj).float().cuda()
        test_soft_adj = torch.from_numpy(train_soft_adj).float().cuda()
     
        ### -0.1
        col_index1 = indexs - 1
        print('                %s'%total_colnames[col_index1])
        train_loan_node_feature1 = train_loan_node_feature
        train_loan_node_feature1[:,indexs] = train_loan_node_feature1[:,indexs] - 0.1
        test_loan_node_feature = torch.from_numpy(train_loan_node_feature1).float().cuda()
        net.eval()
        outputs1 = net(test_loan_node_feature, test_history_node_feature, test_soft_node_feature, 
                       test_loan_adj, test_history_adj, test_soft_adj)
        
        loss = criterion(outputs, outputs1)
        print('        MAE_loss: {:.4f}'.format(loss))

    print('--------------------------')

# History Features

In [None]:
xai_history_feature = np.load('xai_history_feature.npy')

In [None]:
### Distance matrix to Adjancency matrix and Node feature matrix Generation
def XAI_Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, bandwidth = 10, xai_index = 0):
    ### Transformation for Train dataset
    loan_feature_adjacency_matrix_train = Distance_Weight(loan_feature_distance_matrix_train, bandwidth)
    history_feature_adjacency_matrix_train = Distance_Weight(history_feature_distance_matrix_train, bandwidth)
    soft_feature_adjacency_matrix_train = Distance_Weight(soft_feature_distance_matrix_train, bandwidth)
    
    ### Transformation for Test dataset
    loan_feature_adjacency_matrix_test = Distance_Weight_Test(loan_feature_distance_matrix_test, bandwidth)
    history_feature_adjacency_matrix_test = Distance_Weight_Test(history_feature_distance_matrix_test, bandwidth)
    soft_feature_adjacency_matrix_test = Distance_Weight_Test(soft_feature_distance_matrix_test, bandwidth)

    ### Normalize for train dataset
    train_loan_adj = normalize(loan_feature_adjacency_matrix_train + sp.eye(loan_feature_adjacency_matrix_train.shape[0]))
    train_history_adj = normalize(history_feature_adjacency_matrix_train + sp.eye(history_feature_adjacency_matrix_train.shape[0]))
    train_soft_adj = normalize(soft_feature_adjacency_matrix_train + sp.eye(soft_feature_adjacency_matrix_train.shape[0]))

    ### One-hot Encoding and Train X, Train Y   
    temp_dummies = pd.get_dummies(train_dataset)
    train_y = list(temp_dummies['loan_status_Charged Off'])
    base_category = temp_dummies.columns[[11, 13, 18, 19, 21, 24, 26, 29, 32, 34, 40, 43]]
    train_x = Model_matrix(train_dataset, base_category)
    train_x = train_x.drop('loan_status_Charged Off', axis = 1)

    ### Train Node feature Matrix
    loan_features = train_dataset.columns[0:7]
    train_loan_features_dataset = train_dataset[loan_features] 
    history_features = train_dataset.columns[7:16]
    train_history_features_dataset = train_dataset[history_features] 
    masked_feature = history_features[i]
    if i == 1:
        numeric_feature = history_features[5]
    else:
        numeric_feature = history_features[1]
    soft_features = train_dataset.columns[16:21]
    train_soft_features_dataset = train_dataset[soft_features] 
    
    loan_x = Model_matrix(train_loan_features_dataset, base_category)
    #history_x = Model_matrix(train_history_features_dataset, base_category)
    history_x, colnames, total_colnames = Model_matrix_without_one_feature(train_history_features_dataset, base_category, masked_feature, numeric_feature)
    col_index = [list(history_x.columns).index(x) + 1 for x in colnames]
    
    soft_x = Model_matrix(train_soft_features_dataset, base_category)
    DN_count_loan = Default_Neighbor_count(loan_feature_adjacency_matrix_train, train_y)
    train_loan_node_feature = np.concatenate((DN_count_loan, loan_x), axis= 1)
    DN_count_history = Default_Neighbor_count(history_feature_adjacency_matrix_train, train_y)
    train_history_node_feature = np.concatenate((DN_count_history, history_x), axis= 1)
    DN_count_soft = Default_Neighbor_count(soft_feature_adjacency_matrix_train, train_y)
    train_soft_node_feature = np.concatenate((DN_count_soft, soft_x), axis= 1)

    
    
    return(train_loan_adj, train_history_adj, train_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft, masked_feature, total_colnames, col_index)
    





In [None]:
for i in range(len(xai_history_feature)):
    train_dataset, test_dataset, loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test = CV_dataset_Extraction(sample_dataset, loan_feature_distance_matrix_exercise, xai_history_feature[i], soft_feature_distance_matrix_exercise, 
            cv = 5)
    col_order = train_dataset.columns

    ### normalize
    test_dataset = Numerical_min_max_for_test(train_dataset, test_dataset)
    train_dataset = Numerical_min_max(train_dataset)
    train_dataset = train_dataset[col_order]
    test_dataset = test_dataset[col_order]
    del col_order
    
    train_loan_adj, train_history_adj, train_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft, masked_feature, total_colnames, col_index  = XAI_Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, 
    bandwidth = 20, xai_index = i)
    
    print(masked_feature)
        
    for indexs in col_index:
        train_loan_adj, train_history_adj, train_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft, masked_feature, total_colnames, col_index  = XAI_Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, 
        bandwidth = 20, xai_index = i)
   
        test_loan_node_feature = torch.from_numpy(train_loan_node_feature).float().cuda()
        test_soft_node_feature = torch.from_numpy(train_soft_node_feature).float().cuda()
        test_loan_adj = torch.from_numpy(train_loan_adj).float().cuda()
        test_history_adj = torch.from_numpy(train_history_adj).float().cuda()
        test_soft_adj = torch.from_numpy(train_soft_adj).float().cuda()
    
        ### -0.1
        col_index1 = indexs - 1
        print('                %s'%total_colnames[col_index1])
        train_history_node_feature1 = train_history_node_feature
        train_history_node_feature1[:,indexs] = train_history_node_feature1[:,indexs] - 0.1
        test_history_node_feature = torch.from_numpy(train_history_node_feature1).float().cuda()
        net.eval()
        outputs1 = net(test_loan_node_feature, test_history_node_feature, test_soft_node_feature, 
                       test_loan_adj, test_history_adj, test_soft_adj)
        
        loss = criterion(outputs, outputs1)
        print('        MAE_loss: {:.4f}'.format(loss))

    print('--------------------------')
    

# Soft features

In [None]:
xai_soft_feature = np.load('xai_soft_feature.npy')

In [None]:
### Distance matrix to Adjancency matrix and Node feature matrix Generation
def XAI_Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, bandwidth = 10, xai_index = 0):
    ### Transformation for Train dataset
    loan_feature_adjacency_matrix_train = Distance_Weight(loan_feature_distance_matrix_train, bandwidth)
    history_feature_adjacency_matrix_train = Distance_Weight(history_feature_distance_matrix_train, bandwidth)
    soft_feature_adjacency_matrix_train = Distance_Weight(soft_feature_distance_matrix_train, bandwidth)
    
    ### Transformation for Test dataset
    loan_feature_adjacency_matrix_test = Distance_Weight_Test(loan_feature_distance_matrix_test, bandwidth)
    history_feature_adjacency_matrix_test = Distance_Weight_Test(history_feature_distance_matrix_test, bandwidth)
    soft_feature_adjacency_matrix_test = Distance_Weight_Test(soft_feature_distance_matrix_test, bandwidth)

    ### Normalize for train dataset
    train_loan_adj = normalize(loan_feature_adjacency_matrix_train + sp.eye(loan_feature_adjacency_matrix_train.shape[0]))
    train_history_adj = normalize(history_feature_adjacency_matrix_train + sp.eye(history_feature_adjacency_matrix_train.shape[0]))
    train_soft_adj = normalize(soft_feature_adjacency_matrix_train + sp.eye(soft_feature_adjacency_matrix_train.shape[0]))

    ### One-hot Encoding and Train X, Train Y   
    temp_dummies = pd.get_dummies(train_dataset)
    train_y = list(temp_dummies['loan_status_Charged Off'])
    base_category = temp_dummies.columns[[11, 13, 18, 19, 21, 24, 26, 29, 32, 34, 40, 43]]
    train_x = Model_matrix(train_dataset, base_category)
    train_x = train_x.drop('loan_status_Charged Off', axis = 1)

    ### Train Node feature Matrix
    loan_features = train_dataset.columns[0:7]
    train_loan_features_dataset = train_dataset[loan_features] 
    history_features = train_dataset.columns[7:16]
    train_history_features_dataset = train_dataset[history_features] 
    soft_features = train_dataset.columns[16:21]
    masked_feature = soft_features[i]
    numeric_feature = soft_features[2]
    train_soft_features_dataset = train_dataset[soft_features] 
    
    loan_x = Model_matrix(train_loan_features_dataset, base_category)
    history_x = Model_matrix(train_history_features_dataset, base_category)
    #soft_x = Model_matrix(train_soft_features_dataset, base_category)
    soft_x, colnames, total_colnames = Model_matrix_without_one_feature(train_soft_features_dataset, base_category, masked_feature, numeric_feature)
    col_index = [list(soft_x.columns).index(x) + 1 for x in colnames]
    DN_count_loan = Default_Neighbor_count(loan_feature_adjacency_matrix_train, train_y)
    train_loan_node_feature = np.concatenate((DN_count_loan, loan_x), axis= 1)
    DN_count_history = Default_Neighbor_count(history_feature_adjacency_matrix_train, train_y)
    train_history_node_feature = np.concatenate((DN_count_history, history_x), axis= 1)
    DN_count_soft = Default_Neighbor_count(soft_feature_adjacency_matrix_train, train_y)
    train_soft_node_feature = np.concatenate((DN_count_soft, soft_x), axis= 1)

    return(train_loan_adj, train_history_adj, train_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft, masked_feature, total_colnames, col_index)
    

In [None]:
for i in [0,1,3,4]:
    train_dataset, test_dataset, loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test = CV_dataset_Extraction(sample_dataset, loan_feature_distance_matrix_exercise, history_feature_distance_matrix_exercise, xai_soft_feature[i], 
            cv = 5)
    col_order = train_dataset.columns

    ### normalize
    test_dataset = Numerical_min_max_for_test(train_dataset, test_dataset)
    train_dataset = Numerical_min_max(train_dataset)
    train_dataset = train_dataset[col_order]
    test_dataset = test_dataset[col_order]
    del col_order
    
    train_loan_adj, train_history_adj, train_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft, masked_feature, total_colnames, col_index  = XAI_Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, 
    bandwidth = 20, xai_index = i)
    
    print(masked_feature)
        
    for indexs in col_index:
        ### -0.1
        train_loan_adj, train_history_adj, train_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft, masked_feature, total_colnames, col_index  = XAI_Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, 
        bandwidth = 20, xai_index = i)
    
        test_loan_node_feature = torch.from_numpy(train_loan_node_feature).float().cuda()
        test_history_node_feature = torch.from_numpy(train_history_node_feature).float().cuda()
        test_loan_adj = torch.from_numpy(train_loan_adj).float().cuda()
        test_history_adj = torch.from_numpy(train_history_adj).float().cuda()
        test_soft_adj = torch.from_numpy(train_soft_adj).float().cuda()
    
        col_index1 = indexs - 1
        print('                %s'%total_colnames[col_index1])
        train_soft_node_feature1 = train_soft_node_feature
        train_soft_node_feature1[:,indexs] = train_soft_node_feature1[:,indexs] - 0.1
        test_soft_node_feature = torch.from_numpy(train_soft_node_feature1).float().cuda()
        net.eval()
        outputs1 = net(test_loan_node_feature, test_history_node_feature, test_soft_node_feature, 
                       test_loan_adj, test_history_adj, test_soft_adj)
        
        loss = criterion(outputs, outputs1)
        print('        MAE_loss: {:.4f}'.format(loss))

    print('--------------------------')
    

In [None]:
### Distance matrix to Adjancency matrix and Node feature matrix Generation
def XAI_Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, bandwidth = 10, xai_index = 0):
    ### Transformation for Train dataset
    loan_feature_adjacency_matrix_train = Distance_Weight(loan_feature_distance_matrix_train, bandwidth)
    history_feature_adjacency_matrix_train = Distance_Weight(history_feature_distance_matrix_train, bandwidth)
    soft_feature_adjacency_matrix_train = Distance_Weight(soft_feature_distance_matrix_train, bandwidth)
    
    ### Transformation for Test dataset
    loan_feature_adjacency_matrix_test = Distance_Weight_Test(loan_feature_distance_matrix_test, bandwidth)
    history_feature_adjacency_matrix_test = Distance_Weight_Test(history_feature_distance_matrix_test, bandwidth)
    soft_feature_adjacency_matrix_test = Distance_Weight_Test(soft_feature_distance_matrix_test, bandwidth)

    ### Normalize for train dataset
    train_loan_adj = normalize(loan_feature_adjacency_matrix_train + sp.eye(loan_feature_adjacency_matrix_train.shape[0]))
    train_history_adj = normalize(history_feature_adjacency_matrix_train + sp.eye(history_feature_adjacency_matrix_train.shape[0]))
    train_soft_adj = normalize(soft_feature_adjacency_matrix_train + sp.eye(soft_feature_adjacency_matrix_train.shape[0]))

    ### One-hot Encoding and Train X, Train Y   
    temp_dummies = pd.get_dummies(train_dataset)
    train_y = list(temp_dummies['loan_status_Charged Off'])
    base_category = temp_dummies.columns[[11, 13, 18, 19, 21, 24, 26, 29, 32, 34, 40, 43]]
    train_x = Model_matrix(train_dataset, base_category)
    train_x = train_x.drop('loan_status_Charged Off', axis = 1)

    ### Train Node feature Matrix
    loan_features = train_dataset.columns[0:7]
    train_loan_features_dataset = train_dataset[loan_features] 
    history_features = train_dataset.columns[7:16]
    train_history_features_dataset = train_dataset[history_features] 
    soft_features = train_dataset.columns[16:21]
    masked_feature = soft_features[i]
    train_soft_features_dataset = train_dataset[soft_features] 
    
    loan_x = Model_matrix(train_loan_features_dataset, base_category)
    history_x = Model_matrix(train_history_features_dataset, base_category)
    soft_x = Model_matrix(train_soft_features_dataset, base_category)
    soft_x['annual_inc'] = soft_x['annual_inc'] - 0.1
    DN_count_loan = Default_Neighbor_count(loan_feature_adjacency_matrix_train, train_y)
    train_loan_node_feature = np.concatenate((DN_count_loan, loan_x), axis= 1)
    DN_count_history = Default_Neighbor_count(history_feature_adjacency_matrix_train, train_y)
    train_history_node_feature = np.concatenate((DN_count_history, history_x), axis= 1)
    DN_count_soft = Default_Neighbor_count(soft_feature_adjacency_matrix_train, train_y)
    train_soft_node_feature = np.concatenate((DN_count_soft, soft_x), axis= 1)

    return(train_loan_adj, train_history_adj, train_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft, masked_feature)
    

In [None]:
for i in [2]:
    train_dataset, test_dataset, loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test = CV_dataset_Extraction(sample_dataset, loan_feature_distance_matrix_exercise, history_feature_distance_matrix_exercise, xai_soft_feature[i], 
            cv = 5)
    col_order = train_dataset.columns

    ### normalize
    test_dataset = Numerical_min_max_for_test(train_dataset, test_dataset)
    train_dataset = Numerical_min_max(train_dataset)
    train_dataset = train_dataset[col_order]
    test_dataset = test_dataset[col_order]
    del col_order
    
    train_loan_adj, train_history_adj, train_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft, masked_feature = XAI_Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, 
    bandwidth = 20, xai_index = i)
    
    test_loan_node_feature = torch.from_numpy(train_loan_node_feature).float().cuda()
    test_history_node_feature = torch.from_numpy(train_history_node_feature).float().cuda()
    test_soft_node_feature = torch.from_numpy(train_soft_node_feature).float().cuda()
    test_loan_adj = torch.from_numpy(train_loan_adj).float().cuda()
    test_history_adj = torch.from_numpy(train_history_adj).float().cuda()
    test_soft_adj = torch.from_numpy(train_soft_adj).float().cuda()
    
    print(masked_feature)
    net.eval()
    outputs1 = net(test_loan_node_feature, test_history_node_feature, test_soft_node_feature, 
                   test_loan_adj, test_history_adj, test_soft_adj)
        
    loss = criterion(outputs, outputs1)
    print('        MAE_loss: {:.4f}'.format(loss))

    

# Network features

In [None]:
### Distance matrix to Adjancency matrix and Node feature matrix Generation
def Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, bandwidth = 10):
    ### Transformation for Train dataset
    loan_feature_adjacency_matrix_train = Distance_Weight(loan_feature_distance_matrix_train, bandwidth)
    history_feature_adjacency_matrix_train = Distance_Weight(history_feature_distance_matrix_train, bandwidth)
    soft_feature_adjacency_matrix_train = Distance_Weight(soft_feature_distance_matrix_train, bandwidth)
    
    ### Transformation for Test dataset
    loan_feature_adjacency_matrix_test = Distance_Weight_Test(loan_feature_distance_matrix_test, bandwidth)
    history_feature_adjacency_matrix_test = Distance_Weight_Test(history_feature_distance_matrix_test, bandwidth)
    soft_feature_adjacency_matrix_test = Distance_Weight_Test(soft_feature_distance_matrix_test, bandwidth)

    ### Normalize for train dataset
    train_loan_adj = normalize(loan_feature_adjacency_matrix_train + sp.eye(loan_feature_adjacency_matrix_train.shape[0]))
    train_history_adj = normalize(history_feature_adjacency_matrix_train + sp.eye(history_feature_adjacency_matrix_train.shape[0]))
    train_soft_adj = normalize(soft_feature_adjacency_matrix_train + sp.eye(soft_feature_adjacency_matrix_train.shape[0]))

    ### Matrix generation for Test dataset
    loan_feature_adjacency_matrix_test = np.concatenate([loan_feature_adjacency_matrix_train,loan_feature_adjacency_matrix_test])
    history_feature_adjacency_matrix_test = np.concatenate([history_feature_adjacency_matrix_train,history_feature_adjacency_matrix_test])
    soft_feature_adjacency_matrix_test = np.concatenate([soft_feature_adjacency_matrix_train,soft_feature_adjacency_matrix_test])

    loan_feature_adjacency_matrix_test = np.concatenate((loan_feature_adjacency_matrix_test, np.zeros((loan_feature_adjacency_matrix_test.shape[0], loan_feature_distance_matrix_test.shape[0]))), axis = 1)
    history_feature_adjacency_matrix_test = np.concatenate((history_feature_adjacency_matrix_test, np.zeros((history_feature_adjacency_matrix_test.shape[0], history_feature_distance_matrix_test.shape[0]))), axis = 1)
    soft_feature_adjacency_matrix_test = np.concatenate((soft_feature_adjacency_matrix_test, np.zeros((soft_feature_adjacency_matrix_test.shape[0], soft_feature_distance_matrix_test.shape[0]))), axis = 1)

    test_loan_adj = normalize(loan_feature_adjacency_matrix_test + sp.eye(loan_feature_adjacency_matrix_test.shape[0]))
    test_history_adj = normalize(history_feature_adjacency_matrix_test + sp.eye(history_feature_adjacency_matrix_test.shape[0]))
    test_soft_adj = normalize(soft_feature_adjacency_matrix_test + sp.eye(soft_feature_adjacency_matrix_test.shape[0]))


    ### One-hot Encoding and Train X, Train Y   
    temp_dummies = pd.get_dummies(train_dataset)
    train_y = list(temp_dummies['loan_status_Charged Off'])
    base_category = temp_dummies.columns[[11, 13, 18, 19, 21, 24, 26, 29, 32, 34, 40, 43]]
    train_x = Model_matrix(train_dataset, base_category)
    train_x = train_x.drop('loan_status_Charged Off', axis = 1)

    ### Train Node feature Matrix
    loan_features = train_dataset.columns[0:7]
    train_loan_features_dataset = train_dataset[loan_features] 
    history_features = train_dataset.columns[7:16]
    train_history_features_dataset = train_dataset[history_features] 
    soft_features = train_dataset.columns[16:21]
    train_soft_features_dataset = train_dataset[soft_features] 
    
    loan_x = Model_matrix(train_loan_features_dataset, base_category)
    history_x = Model_matrix(train_history_features_dataset, base_category)
    soft_x = Model_matrix(train_soft_features_dataset, base_category)
    DN_count_loan = Default_Neighbor_count(loan_feature_adjacency_matrix_train, train_y)
    train_loan_node_feature = np.concatenate((DN_count_loan, loan_x), axis= 1)
    DN_count_history = Default_Neighbor_count(history_feature_adjacency_matrix_train, train_y)
    train_history_node_feature = np.concatenate((DN_count_history, history_x), axis= 1)
    DN_count_soft = Default_Neighbor_count(soft_feature_adjacency_matrix_train, train_y)
    train_soft_node_feature = np.concatenate((DN_count_soft, soft_x), axis= 1)

    all_dataset = pd.concat([train_dataset, test_dataset])
    all_dataset = all_dataset.reset_index()
    all_dataset = all_dataset.drop(columns=['index'])

    temp_dummies = pd.get_dummies(all_dataset)
    test_y = list(temp_dummies['loan_status_Charged Off'])

    ### Test Node feature matrix
    loan_features = all_dataset.columns[0:7]
    test_loan_features_dataset = all_dataset[loan_features] 
    history_features = all_dataset.columns[7:16]
    test_history_features_dataset = all_dataset[history_features] 
    soft_features = all_dataset.columns[16:21]
    test_soft_features_dataset = all_dataset[soft_features] 
    
    loan_x = Model_matrix(test_loan_features_dataset, base_category)
    history_x = Model_matrix(test_history_features_dataset, base_category)
    soft_x = Model_matrix(test_soft_features_dataset, base_category)
    
    DN_count_loan = Default_Neighbor_count(loan_feature_adjacency_matrix_test, test_y)
    test_loan_node_feature = np.concatenate((DN_count_loan, loan_x), axis= 1)
    
    DN_count_history = Default_Neighbor_count(history_feature_adjacency_matrix_test, test_y)
    test_history_node_feature = np.concatenate((DN_count_history, history_x), axis= 1)
    
    DN_count_soft = Default_Neighbor_count(soft_feature_adjacency_matrix_test, test_y)
    test_soft_node_feature = np.concatenate((DN_count_soft, soft_x), axis= 1)
    
    return(train_loan_adj, train_history_adj, train_soft_adj, test_loan_adj, test_history_adj, test_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, test_loan_node_feature, test_history_node_feature, test_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft)
    





In [None]:
train_dataset, test_dataset, loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test = CV_dataset_Extraction(sample_dataset, loan_feature_distance_matrix_exercise, history_feature_distance_matrix_exercise, soft_feature_distance_matrix_exercise, 
            cv = 5)
col_order = train_dataset.columns

    ### normalize
test_dataset = Numerical_min_max_for_test(train_dataset, test_dataset)
train_dataset = Numerical_min_max(train_dataset)
train_dataset = train_dataset[col_order]
test_dataset = test_dataset[col_order]
del col_order
    
train_loan_adj, train_history_adj, train_soft_adj, test_loan_adj, test_history_adj, test_soft_adj, train_loan_node_feature, train_history_node_feature, train_soft_node_feature, test_loan_node_feature, test_history_node_feature, test_soft_node_feature, base_category, train_y, test_y, DN_count_loan, DN_count_history, DN_count_soft = Matrix_Generation(loan_feature_distance_matrix_train, history_feature_distance_matrix_train, soft_feature_distance_matrix_train, loan_feature_distance_matrix_test, history_feature_distance_matrix_test, soft_feature_distance_matrix_test, train_dataset, test_dataset, 
    bandwidth = 20)

test_loan_adj = torch.from_numpy(train_loan_adj).float().cuda()
test_history_adj = torch.from_numpy(train_history_adj).float().cuda()
test_soft_adj = torch.from_numpy(train_soft_adj).float().cuda()

    

In [None]:
test_loan_node_feature

In [None]:
criterion = nn.L1Loss()
    
### loan_network
print('loan_network')
test_history_node_feature = torch.from_numpy(train_history_node_feature).float().cuda()
test_soft_node_feature = torch.from_numpy(train_soft_node_feature).float().cuda()
train_loan_node_feature1 = train_loan_node_feature
train_loan_node_feature1[:,0] = train_loan_node_feature1[:,0] - 0.1
test_loan_node_feature = torch.from_numpy(train_loan_node_feature1).float().cuda()


net.eval()
outputs1 = net(test_loan_node_feature, test_history_node_feature, test_soft_node_feature, 
               test_loan_adj, test_history_adj, test_soft_adj)
loss = criterion(outputs, outputs1)
print('        MAE_loss: {:.4f}'.format(loss))
     


In [None]:
### history_network
print('history_network')
test_loan_node_feature = torch.from_numpy(train_loan_node_feature).float().cuda()
test_soft_node_feature = torch.from_numpy(train_soft_node_feature).float().cuda()
train_history_node_feature1 = train_history_node_feature
train_history_node_feature1[:,0] = train_history_node_feature1[:,0] - 0.1
test_history_node_feature = torch.from_numpy(train_history_node_feature1).float().cuda()


net.eval()
outputs1 = net(test_loan_node_feature, test_history_node_feature, test_soft_node_feature, 
               test_loan_adj, test_history_adj, test_soft_adj)
        
loss = criterion(outputs, outputs1)
print('        MAE_loss: {:.4f}'.format(loss))


In [None]:
test_loan_node_feature

In [None]:
test_history_node_feature

In [None]:
test_soft_node_feature

In [None]:
### soft_network
print('soft_network')
test_history_node_feature = torch.from_numpy(train_history_node_feature).float().cuda()
test_loan_node_feature = torch.from_numpy(train_loan_node_feature).float().cuda()
train_soft_node_feature1 = train_soft_node_feature
train_soft_node_feature1[:,0] = train_soft_node_feature1[:,0] - 0.1
test_soft_node_feature = torch.from_numpy(train_soft_node_feature1).float().cuda()


net.eval()
outputs1 = net(test_loan_node_feature, test_history_node_feature, test_soft_node_feature, 
               test_loan_adj, test_history_adj, test_soft_adj)
        
loss = criterion(outputs, outputs1)
print('        MAE_loss: {:.4f}'.format(loss))


In [None]:
test_history_node_feature

In [None]:
test_soft_node_feature