# Payments prediction with Neural Network

In this notebook we shall provide the prediction of default payments made by clients in Taiwan from April to Semptember 2005. The execution of the Neural Network will be made step by step.

## Importing libraries

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split
import joblib

%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.5)

rcParams['figure.figsize'] = 16,10

RANDOM_SEED = 60

np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

In [None]:
X_test = pd.read_csv('data/X_test.csv')
X_train = pd.read_csv('data/X_train.csv')
y_train = pd.read_csv('data/y_train.csv')

## Exploration

In [None]:
print(f"Shape x test {X_test.shape}")
print(f"Shape x train {X_train.shape}")
print(f"Shape y train {y_train.shape}")

In [None]:
X_train.columns

In [None]:
X_test.columns

In [None]:
y_train.columns

In [None]:
## First we verify if we have any missing data

missing = X_train.isnull().sum()
missing[missing > 0].sort_values(ascending=False)

In [None]:
missing_y = y_train.isnull().sum()
missing_y[missing_y > 0].sort_values(ascending=False)

In [None]:
X_train.index = X_train.ID
X_test.index = X_test.ID

In [None]:
# Droppping the ID column
X_train.drop('ID',axis=1,inplace=True)
X_test.drop('ID',axis=1,inplace=True)

In [None]:
X_train.head()

In [None]:
X_test.head()

In [None]:
y_train = y_train.rename(columns={"default.payment.next.month":"def_payment"})

In [None]:
X_test.isnull().sum()

In [None]:
X_train.SEX.value_counts(dropna=False)

In [None]:
X_train.EDUCATION.value_counts(dropna=False)

In [None]:
X_train = X_train.rename(columns={"PAY_0":"PAY_1"})
X_test = X_test.rename(columns={"PAY_0":"PAY_1"})

X_train.head()

In [None]:
X_train.columns

## Data visualization

In [None]:
plt.style.use('fivethirtyeight')
X_train.SEX.hist()
plt.xlabel('SEX')
plt.ylabel('COUNT')
plt.title('SEX - COUNT')

In [None]:
plt.style.use('fivethirtyeight')
y_train.def_payment.hist()
plt.xlabel('DEFAULT_PAY')
plt.ylabel('COUNT')
plt.title('Default Credit Card Clients - target value - data unbalance\n (Default = 0, Not Default = 1)')

In [None]:
plt.style.use('fivethirtyeight')
X_train.EDUCATION.hist()
plt.xlabel('EDUCATION')
plt.ylabel('COUNT')
plt.title('EDUCATION - COUNT')

In [None]:
plt.style.use('fivethirtyeight')
X_train.MARRIAGE.hist()
plt.xlabel('MARRIAGE')
plt.ylabel('COUNT')
plt.title('MARRIAGE - COUNT')

In [None]:
sns.barplot(x='SEX',y='LIMIT_BAL',data=X_train,hue='SEX')

In [None]:
sns.countplot(x='SEX',data=X_train,hue='SEX')

In [None]:
sns.countplot(x='def_payment', data=y_train, hue="def_payment", palette="muted")

In [None]:
# simple method to plot the features
def getFeatures(prefix):
    return [prefix+str(x) for x in range(1,7)]

In [None]:
pay_status_columns = getFeatures('PAY_')
figure, ax = plt.subplots(2,3)
figure.set_size_inches(18,8)


for i in range(len(pay_status_columns)):
    row,col = int(i/3), i%3

    d  = X_train[pay_status_columns[i]].value_counts()
    x = X_train[pay_status_columns[i]].value_counts()
    ax[row,col].bar(d.index, d, align='center', color='red')
    ax[row,col].bar(x.index, x, align='center', color='yellow', alpha=0.7)
    ax[row,col].set_title(pay_status_columns[i])
   


    
plt.show()

In [None]:
sns.boxplot(x='MARRIAGE',y='AGE',data=X_train,palette='rainbow')

In [None]:
sns.boxplot(x='EDUCATION',y='AGE',data=X_train,palette='rainbow')

In [None]:
sns.distplot(X_train.LIMIT_BAL,kde=True,bins=30)

In [None]:
# Obeserving the correlation between features of dataset
correlation = X_train.corr()
plt.subplots(figsize=(30,10))
sns.heatmap( correlation, square=True, annot=True, fmt=".1f" )

## Preprocessing

In [None]:
fil = (X_train.EDUCATION == 5) | (X_train.EDUCATION == 6) | (X_train.EDUCATION == 0)
X_train.loc[fil, 'EDUCATION'] = 4
X_train.EDUCATION.value_counts()

In [None]:
fil = (X_test.EDUCATION == 5) | (X_test.EDUCATION == 6) | (X_test.EDUCATION == 0)
X_test.loc[fil, 'EDUCATION'] = 4
X_test.EDUCATION.value_counts()

In [None]:
print(X_train['EDUCATION'].value_counts(dropna = False))
print(X_test['EDUCATION'].value_counts(dropna = False))

In [None]:
X_train.loc[X_train.MARRIAGE == 0, 'MARRIAGE'] = 3
X_train.MARRIAGE.value_counts()

In [None]:
X_test.loc[X_test.MARRIAGE == 0, 'MARRIAGE'] = 3
X_test.MARRIAGE.value_counts()

In [None]:
X_train.head()

In [None]:
X_train.tail()

In [None]:
X_train.plot(y = 'PAY_1',kind='hist')
plt.legend()
plt.show()

In [None]:
X_train.info()

In [None]:
X_train.SEX.nunique()

In [None]:
X_train[['PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']].describe()

In [None]:
X_train[['BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6']].describe()

## Encoding of the categorical variable

In [None]:

categorical_vars = ['SEX','EDUCATION','MARRIAGE','PAY_1','PAY_2','PAY_3','PAY_4','PAY_5','PAY_6']
X_train[categorical_vars].astype(str)
X_test[categorical_vars].astype(str)
X_train.head()

In [None]:
X_train.columns = X_train.columns.map(str.lower)
X_test.columns = X_test.columns.map(str.lower)

In [None]:
X_train.head()

## Feature scaling

In [None]:
# Normalizing the data
col_to_norm = ['limit_bal', 'age', 'bill_amt1', 'bill_amt2', 'bill_amt3', 'bill_amt4',
       'bill_amt5', 'bill_amt6', 'pay_amt1', 'pay_amt2', 'pay_amt3',
       'pay_amt4', 'pay_amt5', 'pay_amt6']
X_train[col_to_norm] = X_train[col_to_norm].apply(lambda x : (x-np.mean(x))/np.std(x))
X_test[col_to_norm] = X_test[col_to_norm].apply(lambda x : (x-np.mean(x))/np.std(x))

In [None]:
X_train.head(5)

In [None]:
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import make_column_transformer

X = X_train
y = np.array(y_train.def_payment.values)

transformer = make_column_transformer(
    (MinMaxScaler(), X_train.columns))
transformer.fit(X)

In [None]:
# scaling
X = transformer.transform(X)

## Splitting the training and test data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)
X_train.shape

# Neural Network Models

## 1. Neural Network with 3 layers

In [None]:
# The following method will help us plotting the F1-Score results
def plot_f1(history):
  hist = pd.DataFrame(history.history)
  hist['epoch'] = history.epoch

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('F1')
  plt.plot(hist['epoch'], hist['loss'],
            label='Train F1')
  plt.plot(hist['epoch'], hist['val_loss'],
            label = 'Val F1')
  plt.legend()
  plt.show()

In [None]:
model1 = keras.Sequential()
model1.add(keras.layers.Dense(units=32, activation="relu", input_shape=[X_train.shape[1]]))
model1.add(keras.layers.Dense(units=64, activation="relu"))
model1.add(keras.layers.Dense(units=128, activation='relu'))

model1.add(keras.layers.Dense(1, activation="sigmoid"))

model1.compile(
    optimizer=keras.optimizers.Adam(0.0001),
    loss = 'binary_crossentropy', 
    metrics = ['accuracy'])

BATCH_SIZE = 32

early_stop = keras.callbacks.EarlyStopping(
  monitor='val_loss',
  mode="min",
  patience=10
)

history = model1.fit(
  x=X_train,
  y=y_train,
  shuffle=True,
  epochs=50,
  validation_split=0.2,
  batch_size=BATCH_SIZE
)

plot_f1(history)

## 2. Neural Network with SGD Optimizer (4-layers)

In [None]:
model2 = keras.Sequential()
model2.add(keras.layers.Dense(units=32, activation="relu", input_shape=[X_train.shape[1]]))
model2.add(keras.layers.Dense(units=64, activation="selu"))
model2.add(keras.layers.Dense(units=128, activation="selu"))
model2.add(keras.layers.Dense(units=256, activation="relu"))
model2.add(keras.layers.Dense(1, activation='sigmoid'))

model2.compile(
    optimizer=keras.optimizers.SGD(0.0001),
    loss='binary_crossentropy', 
    metrics = ['accuracy'])

BATCH_SIZE = 64

early_stop = keras.callbacks.EarlyStopping(
  monitor='val_loss',
  mode="min",
  patience=10
)

history = model2.fit(
  x=X_train,
  y=y_train,
  shuffle=True,
  epochs=100,
  validation_split=0.2,
  batch_size=BATCH_SIZE
)

plot_f1(history)

## 3. Neural Network with 4 layers and Adagrad Optimizer

In [None]:
model3 = keras.Sequential()
model3.add(keras.layers.Dense(units=64, activation="relu", input_shape=[X_train.shape[1]]))
model3.add(keras.layers.Dense(units=128, activation="linear"))
model3.add(keras.layers.Dense(units=256, activation="selu"))
model3.add(keras.layers.Dense(units=512, activation="relu"))
model3.add(keras.layers.Dense(1, activation='sigmoid'))

model3.compile(
    optimizer=keras.optimizers.Adagrad(0.0001),
    loss='binary_crossentropy', 
    metrics = ['accuracy'])

BATCH_SIZE = 64

early_stop = keras.callbacks.EarlyStopping(
  monitor='val_loss',
  mode="min",
  patience=10
)

history = model3.fit(
  x=X_train,
  y=y_train,
  shuffle=True,
  epochs=100,
  validation_split=0.2,
  batch_size=BATCH_SIZE
)

plot_f1(history)

## 4. Neural Network with dropout regularization at 30%

In [None]:
model4 = keras.Sequential()
model4.add(keras.layers.Dropout(0.3, input_shape=(X_train.shape[1],)))
model4.add(keras.layers.Dense(units=128, activation="relu"))
model4.add(keras.layers.Dropout(0.3))
model4.add(keras.layers.Dense(units=256, activation="relu"))
model4.add(keras.layers.Dropout(0.3))
model4.add(keras.layers.Dense(units=512, activation="relu"))
model4.add(keras.layers.Dropout(0.3))
model4.add(keras.layers.Dense(1, activation='sigmoid'))

model4.compile(
    optimizer=keras.optimizers.SGD(0.0001),
    loss = 'binary_crossentropy', 
    metrics = ['accuracy'])

BATCH_SIZE = 64

early_stop = keras.callbacks.EarlyStopping(
  monitor='val_loss',
  mode="min",
  patience=15
)

history = model4.fit(
  x=X_train,
  y=y_train,
  shuffle=True,
  epochs=100,
  validation_split=0.2,
  batch_size=BATCH_SIZE
)

plot_f1(history)

## Model Evaluation

In [None]:
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, roc_auc_score

# We add the predicted score to a file text
f = open("Mirko_Lantieri_858278_score2.txt", "a")

In [None]:
y_pred = model1.predict(X_test)
f.write(f"{np.around(y_pred)}\n")

In [None]:
y_pred = model2.predict(X_test)
f.write(f"{np.around(y_pred)}\n")

In [None]:
y_pred = model3.predict(X_test)
f.write(f"{np.around(y_pred)}\n")

In [None]:
y_pred = model4.predict(X_test)
f.write(f"{np.around(y_pred)}\n")

In [None]:
f.close()


## Metrics evaluation Model 1

In [None]:
roc = roc_auc_score(y_test, model1.predict(X_test))
f1 = f1_score(y_test, np.asarray(model1.predict(X_test)))
acc = accuracy_score(y_test, np.asarray(model1.predict(X_test)))
prec = precision_score(y_test, np.asarray(model1.predict(X_test)))
recall = recall_score(y_test, np.asarray(model1.predict(X_test)))

In [None]:
results = pd.DataFrame([['Logistic Regression', acc,prec,recall, f1,roc]],
               columns = ['Model', 'Accuracy', 'Precision', 'Recall', 'F1 Score','ROC'])
results

## Metrics evaluation Model 2

In [None]:
roc = roc_auc_score(y_test, model2.predict(X_test))
f1 = f1_score(y_test, np.asarray(model2.predict(X_test)))
acc = accuracy_score(y_test, np.asarray(model2.predict(X_test)))
prec = precision_score(y_test, np.asarray(model2.predict(X_test)))
recall = recall_score(y_test, np.asarray(model2.predict(X_test)))

In [None]:
results = pd.DataFrame([['Logistic Regression', acc,prec,recall, f1,roc]],
               columns = ['Model', 'Accuracy', 'Precision', 'Recall', 'F1 Score','ROC'])
results

## Metrics evaluation Model 3

In [None]:
roc = roc_auc_score(y_test, model3.predict(X_test))
f1 = f1_score(y_test, np.asarray(model3.predict(X_test)))
acc = accuracy_score(y_test, np.asarray(model3.predict(X_test)))
prec = precision_score(y_test, np.asarray(model3.predict(X_test)))
recall = recall_score(y_test, np.asarray(model3.predict(X_test)))

In [None]:
results = pd.DataFrame([['Logistic Regression', acc,prec,recall, f1,roc]],
               columns = ['Model', 'Accuracy', 'Precision', 'Recall', 'F1 Score','ROC'])
results

## Metrics evaluation Model 4

In [None]:
roc = roc_auc_score(y_test, model4.predict(X_test))
f1 = f1_score(y_test, np.asarray(model4.predict(X_test)))
acc = accuracy_score(y_test, np.asarray(model4.predict(X_test)))
prec = precision_score(y_test, np.asarray(model4.predict(X_test)))
recall = recall_score(y_test, np.asarray(model4.predict(X_test)))

In [None]:
results = pd.DataFrame([['Logistic Regression', acc,prec,recall, f1,roc]],
               columns = ['Model', 'Accuracy', 'Precision', 'Recall', 'F1 Score','ROC'])
results

In [None]:

from sklearn import metrics

# false positive rate,fpr= FP/(TN+FP) OR fpr=1-specificty, tpr=sensitivity 
y_pred_1 = model1.predict(X_test)
y_pred_2 = model2.predict(X_test)

y_pred_3 = model3.predict(X_test)
y_pred_4 = model4.predict(X_test)

model = [model1,model2,model3,model4]

models=[y_pred_1,y_pred_2,y_pred_3,y_pred_4]
label=['Logistic','SGD','Adagrad','Dropout']

# plotting ROC curves
plt.figure(figsize=(10, 8))
m=np.arange(4)
for m in m:
    fpr, tpr,thresholds= metrics.roc_curve(y_test,models[m])
    auc = metrics.roc_auc_score(y_test,model[m].predict(X_test))
    plt.plot(fpr, tpr, label='%s ROC (area = %0.2f)' % (label[m], auc))
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('1-Specificity(False Positive Rate)')
plt.ylabel('Sensitivity(True Positive Rate)')
plt.title('AUROC')
plt.legend(loc="lower right")
plt.show()