In [56]:
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import minmax_scale
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers.core import Activation
from keras.layers.core import Dense
from keras import optimizers
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, classification_report, accuracy_score, f1_score

In [4]:
credit = pd.read_csv('NN_workshop_bankloan.csv')
credit.head()

Unnamed: 0,CustomerID,Age,Education,Working,Address,Income,Loan,CardLoan,OtherLocan,CreditScore
0,1,41,3,17,12,176,9.3,11.36,5.01,1
1,2,27,1,10,6,31,17.3,1.36,4.0,0
2,3,40,1,15,14,55,5.5,0.86,2.17,0
3,4,41,1,15,14,120,2.9,2.66,0.82,0
4,5,24,2,2,0,28,17.3,1.79,3.06,1


In [9]:
credit.dtypes

CustomerID       int64
Age              int64
Education        int64
Working          int64
Address          int64
Income           int64
Loan           float64
CardLoan       float64
OtherLocan     float64
CreditScore      int64
dtype: object

In [18]:
credit_values = credit.values
col_names = list(credit.columns.values)
col_names

['CustomerID',
 'Age',
 'Education',
 'Working',
 'Address',
 'Income',
 'Loan',
 'CardLoan',
 'OtherLocan',
 'CreditScore']

In [10]:
credit.describe(include = 'all')

Unnamed: 0,CustomerID,Age,Education,Working,Address,Income,Loan,CardLoan,OtherLocan,CreditScore
count,700.0,700.0,700.0,700.0,700.0,700.0,700.0,700.0,700.0,700.0
mean,350.5,34.86,1.722857,8.388571,8.278571,45.601429,10.260571,1.553457,3.058229,0.261429
std,202.21688,7.997342,0.928206,6.658039,6.824877,36.814226,6.827234,2.117209,3.287524,0.439727
min,1.0,20.0,1.0,0.0,0.0,14.0,0.4,0.01,0.05,0.0
25%,175.75,29.0,1.0,3.0,3.0,24.0,5.0,0.37,1.0475,0.0
50%,350.5,34.0,1.0,7.0,7.0,34.0,8.6,0.855,1.985,0.0
75%,525.25,40.0,2.0,12.0,12.0,55.0,14.125,1.905,3.9275,1.0
max,700.0,56.0,5.0,31.0,34.0,446.0,41.3,20.56,27.03,1.0


In [11]:
credit.isnull().sum()

CustomerID     0
Age            0
Education      0
Working        0
Address        0
Income         0
Loan           0
CardLoan       0
OtherLocan     0
CreditScore    0
dtype: int64

In [13]:
credit.shape

(700, 10)

In [19]:
norm_list = ['Age', 'Education', 'Working', 'Address', 'Income', 'Loan', 'CardLoan', 'OtherLocan', 'CreditScore']
new_colnames = []
credit_norm = []

for i in range(credit.shape[1]):
    temp = []
    if col_names[i] in norm_list:
        temp = minmax_scale(credit_values[:,i].astype(float))
        temp = np.reshape(temp,(len(temp),1))
    else: continue
    
    new_colnames.append(col_names[i])
    if len(credit_norm) == 0:
        credit_norm = temp
    else:
        credit_norm = np.hstack([credit_norm, temp])

In [114]:
(train,test) = train_test_split(credit_norm, test_size = 0.3, shuffle = True, random_state = 123)

In [115]:
x_column_index = [0,1,2,3,4,5,6,7]
y_column_index = [8]
x_train = train[:,x_column_index]
y_train = train[:,y_column_index]
x_test = test[:,x_column_index]
y_test = test[:,y_column_index]

In [136]:
model = Sequential()
model.add(Dense(10, input_dim = x_train.shape[1], activation = "relu"))
model.add(Dense(5, activation = "relu"))
model.add(Dense(6, activation = "relu"))
model.add(Dense(1, activation = "sigmoid"))
model.summary()
model.compile(loss = 'binary_crossentropy', optimizer = optimizers.sgd(lr=0.1))

Model: "sequential_21"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_81 (Dense)             (None, 10)                90        
_________________________________________________________________
dense_82 (Dense)             (None, 5)                 55        
_________________________________________________________________
dense_83 (Dense)             (None, 6)                 36        
_________________________________________________________________
dense_84 (Dense)             (None, 1)                 7         
Total params: 188
Trainable params: 188
Non-trainable params: 0
_________________________________________________________________


In [137]:
class_weights = {0: len(y_train)/np.sum(y_train==0),
                1: len(y_train)/np.sum(y_train==1)}

print("Class weight:", class_weights)

hist = model.fit(x_train, y_train, epochs=100, class_weight=class_weights, verbose = 0)

Class weight: {0: 1.3687150837988826, 1: 3.712121212121212}


In [138]:
y_predict_class = model.predict_classes(x_test)
print(pd.DataFrame(confusion_matrix(y_test, y_predict_class), index=['true:0', 'true:1'], columns=['pred:0', 'pred:1'])) 


        pred:0  pred:1
true:0     128      31
true:1      11      40


In [139]:
print('Accuracy: ',accuracy_score(y_test, y_predict_class))
print('F1: ',f1_score(y_test, y_predict_class))
print('Recall: ',recall_score(y_test, y_predict_class))
print('Precision: ',precision_score(y_test, y_predict_class))

Accuracy:  0.8
F1:  0.6557377049180328
Recall:  0.7843137254901961
Precision:  0.5633802816901409
