# Neural Network

Goals of this notebook: 
- Experiment with different neural network architectures. Try 
    - leaky relu activation, 
    - different drop outs, 
    - lower # of epochs, 
    - undersampling, 
    - a larger validation split.
- try hyperparameter optimization

In [118]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from torchvision import datasets, transforms

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import regularizers

from imblearn.under_sampling import RandomUnderSampler

import matplotlib.pyplot as plt
import bokeh
import bokeh.plotting
from bokeh.layouts import column, row
from bokeh.models import HoverTool,WheelZoomTool, PanTool, ResetTool
bokeh.io.output_notebook()

#### Load the data, normalize, and split into a training and validation set

In [119]:
def data_processed():
    '''
    Used to read & normalize processed test and train data
    '''
    data_train = pd.read_csv("data/2022-02-07_LOANS_TRAIN.csv")
    data_test = pd.read_csv("data/2022-02-07_LOANS_TEST.csv")
    y_train = data_train['loan_status']
    data_train.drop('loan_status', 1, inplace=True)
    data_train.drop('id', 1, inplace=True)
    data_test.drop('id', 1, inplace=True)
    
    scaler = StandardScaler()
    scaler.fit(data_train)
    normal_data_train = scaler.transform(data_train)
    normal_data_test = scaler.transform(data_test)
    
    return normal_data_train, normal_data_test, y_train

def to_submission(y_test_predicted_probability):
    y_test = pd.DataFrame(y_test_predicted_probability, columns=['loan_status'])
    y_test.to_csv('data/submission.csv', index_label='id')
    return

In [120]:
X_train, X_test, y_train = data_processed()

print(f"""
X_train shape: {X_train.shape}
y_train shape: {y_train.shape}
""")

  data_train.drop('loan_status', 1, inplace=True)
  data_train.drop('id', 1, inplace=True)
  data_test.drop('id', 1, inplace=True)



X_train shape: (197250, 92)
y_train shape: (197250,)



Feature names must be in the same order as they were in fit.



### Try undersampling the data

In [121]:
fraction = 1
rus = RandomUnderSampler(random_state=0, sampling_strategy=fraction)
X_train, y_train = rus.fit_resample(X_train, y_train)

print(f"""
X_train shape: {X_train.shape}
y_train shape: {y_train.shape}
""")


X_train shape: (60448, 92)
y_train shape: (60448,)



We only use ~30% of the dataset.

In [122]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.33, random_state=42, shuffle=True)

print(f"""
X_train shape: {X_train.shape}
y_train shape: {y_train.shape}
X_val shape: {X_val.shape}
y_val shape: {y_val.shape}
""")


X_train shape: (40500, 92)
y_train shape: (40500,)
X_val shape: (19948, 92)
y_val shape: (19948,)



In [123]:
# change to one hot encoding
y_train, y_val = to_categorical(y_train), to_categorical(y_val)

Write function to plot training history

In [124]:
def plot_results(history):
    tools = [PanTool(), WheelZoomTool(), ResetTool(), HoverTool()]
    p = bokeh.plotting.figure(title='Training and validation loss', tools=tools)
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(loss) + 1)
    p.line(epochs, loss, color='blue', legend_label='Training loss')
    p.line(epochs, val_loss, color='red', legend_label='Validation loss')
    p.yaxis.axis_label = "Loss"
    p.xaxis.axis_label = 'Epochs'
    p.legend.click_policy="hide"
    p.legend.visible=False

    q = bokeh.plotting.figure(title='Training and validation accuracy', tools=tools)
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    q.line(epochs, acc, color='blue', legend_label='training')
    q.line(epochs, val_acc, color='red', legend_label='validation')
    q.yaxis.axis_label = "Accuracy"
    q.xaxis.axis_label = 'Epochs'
    q.legend.click_policy="hide"
    q.legend.location="bottom_right"
    bokeh.io.show(row(p, q))

#### Experiment 1a:

In [149]:
network = models.Sequential()
network.add(layers.Dense(200, activation="leaky_relu", input_shape=(X_train.shape[1],)))
network.add(layers.Dropout(0.3))
network.add(layers.Dense(50, activation="leaky_relu"))
network.add(layers.Dense(2, activation="softmax"))
network.compile(loss="categorical_crossentropy",
                optimizer="adam",
                metrics=["accuracy"])

In [150]:
history = network.fit(X_train, y_train, batch_size=16, epochs=10, validation_split=0.1, shuffle=True, verbose=False)

In [151]:
test_loss, test_acc = network.evaluate(X_val, y_val)
print('the final accuracy on the holdout set was ', np.round(test_acc * 100, 2), '%')

the final accuracy on the holdout set was  63.77 %


In [152]:
y_pred = network.predict(X_val)

In [153]:
roc_auc_score(y_val, y_pred)

0.6850796944330741

In [154]:
plot_results(history)

#### Experiment 2a:

In [None]:
network = models.Sequential()
network.add(layers.Dense(300, activation="leaky_relu", input_shape=(X_train.shape[1],)))
network.add(layers.Dropout(0.2))
network.add(layers.Dense(100, activation="leaky_relu"))
network.add(layers.Dropout(0.2))
network.add(layers.Dense(50, activation="leaky_relu"))
network.add(layers.Dense(2, activation="softmax"))
network.compile(loss="categorical_crossentropy",
                optimizer="adam",
                metrics=["accuracy"])

In [None]:
history = network.fit(X_train, y_train, batch_size=16, epochs=8, validation_split=0.2, shuffle=True, verbose=False)

In [None]:
test_loss, test_acc = network.evaluate(X_val, y_val)
print('the final accuracy on the holdout set was ', np.round(test_acc * 100, 2), '%')

In [None]:
y_pred = network.predict(X_val)
roc_auc_score(y_val, y_pred)

In [None]:
plot_results(history)

### Experiment 3a:

In [None]:
network = models.Sequential()
network.add(layers.Dense(400, activation="leaky_relu", input_shape=(X_train.shape[1],)))
network.add(layers.Dropout(0.4))
network.add(layers.Dense(200, activation="leaky_relu"))
network.add(layers.Dropout(0.2))
network.add(layers.Dense(100, activation="leaky_relu"))
network.add(layers.Dense(10, activation="leaky_relu"))
network.add(layers.Dense(2, activation="softmax"))
network.compile(loss="categorical_crossentropy",
                optimizer="adam",
                metrics=["accuracy"])

In [None]:
history = network.fit(X_train, y_train, batch_size=16, epochs=8, validation_split=0.2, shuffle=True, verbose=False)

In [None]:
test_loss, test_acc = network.evaluate(X_val, y_val)
print('the final accuracy on the holdout set was ', np.round(test_acc * 100, 2), '%')

In [None]:
y_pred = network.predict(X_val)
roc_auc_score(y_val, y_pred)

In [None]:
plot_results(history)

## Implement PCA upstream of neural networks

Plot the cumulative variance versus the number of components to get information about the explained variance

In [None]:
pca = PCA().fit(X_train)

plt.rcParams["figure.figsize"] = (15,6)

fig, ax = plt.subplots()
xi = np.arange(1, 30, step=1)
y = np.cumsum(pca.explained_variance_ratio_)

plt.ylim(0.0,1.1)
plt.plot(xi, y, marker='o', linestyle='--', color='b')

plt.xlabel('Number of Components')
plt.xticks(np.arange(0, 30, step=1)) 
plt.ylabel('Cumulative variance (%)')
plt.title('The number of components needed to explain variance')

plt.axhline(y=0.95, color='r', linestyle='-')
plt.text(0.5, 0.85, '95% cut-off threshold', color = 'red', fontsize=16)

ax.grid(axis='x')
plt.show()

#### We want to select a number of components that captures 95% explained variance. We don't need to infer from the above graph what that value would be - the sklearn implementation allows you to define the desired explained variance using the `n_components` argument for the PCA function.

In [None]:
pca = PCA(n_components = 0.95)
pca.fit(X_train)
X_train = pca.transform(X_train)

Apply the same transform to the test data

In [None]:
X_val = pca.transform(X_val)

#### Now, repeat the experiments with the PCA transformed data

#### Experiment 1b:

In [None]:
network = models.Sequential()
network.add(layers.Dense(200, activation="leaky_relu", input_shape=(X_train.shape[1],)))
network.add(layers.Dropout(0.2))
network.add(layers.Dense(50, activation="leaky_relu"))
network.add(layers.Dense(2, activation="softmax"))
network.compile(loss="categorical_crossentropy",
                optimizer="adam",
                metrics=["accuracy"])

In [None]:
history = network.fit(X_train, y_train, batch_size=16, epochs=10, validation_split=0.1, shuffle=True, verbose=False)

In [None]:
test_loss, test_acc = network.evaluate(X_val, y_val)
print('the final accuracy on the holdout set was ', np.round(test_acc * 100, 2), '%')

In [None]:
y_pred = network.predict(X_val)
roc_auc_score(y_val, y_pred)

In [None]:
plot_results(history)

#### Experiment 2b:

In [None]:
network = models.Sequential()
network.add(layers.Dense(300, activation="leaky_relu", input_shape=(X_train.shape[1],)))
network.add(layers.Dropout(0.2))
network.add(layers.Dense(100, activation="leaky_relu"))
network.add(layers.Dropout(0.2))
network.add(layers.Dense(50, activation="leaky_relu"))
network.add(layers.Dense(2, activation="softmax"))
network.compile(loss="categorical_crossentropy",
                optimizer="adam",
                metrics=["accuracy"])

In [None]:
history = network.fit(X_train, y_train, batch_size=16, epochs=10, validation_split=0.2, shuffle=True, verbose=False)

In [None]:
plot_results_loss, test_acc = network.evaluate(X_val, y_val)
print('the final accuracy on the holdout set was ', np.round(test_acc * 100, 2), '%')

In [None]:
y_pred = network.predict(X_val)
roc_auc_score(y_val, y_pred)

In [None]:
plot_results(history)

#### Experiment 3b: 

In [None]:
network = models.Sequential()
network.add(layers.Dense(400, activation="leaky_relu", input_shape=(X_train.shape[1],)))
network.add(layers.Dropout(0.4))
network.add(layers.Dense(200, activation="leaky_relu"))
network.add(layers.Dropout(0.2))
network.add(layers.Dense(100, activation="leaky_relu"))
network.add(layers.Dense(10, activation="leaky_relu"))
network.add(layers.Dense(2, activation="softmax"))
network.compile(loss="categorical_crossentropy",
                optimizer="adam",
                metrics=["accuracy"])

In [None]:
history = network.fit(X_train, y_train, batch_size=16, epochs=10, validation_split=0.2, shuffle=True, verbose=False)

In [None]:
test_loss, test_acc = network.evaluate(X_val, y_val)
print('the final accuracy on the holdout set was ', np.round(test_acc * 100, 2), '%')

In [None]:
y_pred = network.predict(X_val)
roc_auc_score(y_val, y_pred)

In [None]:
plot_results(history)

## Summary of findings:  ** need to update this 

|Experiment #|PCA?|# of hidden layers|# of nodes/layer|activation|dropout|batch_size|epochs|val_split|roc_auc|
|------------|----|------------------|----------------|----------|-------|----------|------|---------|-------|
| # 1 A      | NO |     2            |     200, 50    |leaky relu| 0.2   | 16       | 10   | 0.1     | 0.68  |
| # 1 B      | YES|     2            |     200, 50    |leaky relu| 0.2   | 16       | 10   | 0.1     | 0.67  |
| # 2 A      | NO |     3            |  300, 100, 50  |leaky relu| 2-0.2 | 16       | 30   | 0.1     | 0.68  |
| # 2 B      | YES|     3            |  300, 100, 50  |leaky relu| 2-0.2 | 16       | 30   | 0.1     | 0.67  |
| # 3 A      | NO |     4            |400, 200, 100,50|leaky relu|0.4,0.2| 16       | 20   | 0.1     | 0.68  |
| # 3 B      | YES|     4            |400, 200, 100,50|leaky relu|0.4,0.2| 16       | 20   | 0.1     | 0.67  |