### Model Candidate - Neural Network

Our objective is to beat the model that is being used by our client, Lisboa Bank

In [16]:
###Insert Baseline Model Performance Metrics Here

We will start by creating a simple neural network and seeing how it performs

In [17]:
%matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Using matplotlib backend: Qt5Agg


In [18]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn import preprocessing
#from keras import keras_metrics

In [19]:
#Loading df1 after it has been augmented in iteration 2:
df = pd.read_pickle('../data/pickle_files/df_pickle_w_all_stats')
#Excluding the duration variable as it cannot be used in our baseline
df = df.drop(columns = ['duration'])

In [20]:
#Checking dtypes have loaded correctly (should return empty index)
df.select_dtypes(exclude = ['float64', 'int64']).columns

y = df['y']
X = df.drop(columns=['y', 'Date'])

In [21]:
scaler = preprocessing.StandardScaler().fit(X)
X_transformed = scaler.transform(X)

In [22]:
#will work with numpy arrays
y = np.array(y)
X = np.array(X_transformed)

In [23]:
X.shape

(41188, 72)

In [24]:
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, random_state = 4)

In [26]:
model = Sequential()
#keras.layers.Dense(units, activation=None, use_bias=True, 
#kernel_initializer='glorot_uniform', bias_initializer='zeros', 
#kernel_regularizer=None, bias_regularizer=None, 
#activity_regularizer=None, kernel_constraint=None, bias_constraint=None)

In [27]:
model.add(Dense(72, input_dim=72, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(72, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [28]:
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [29]:
model.fit(X_train, y_train,
          epochs=50,
          batch_size=100, verbose=0)

<keras.callbacks.History at 0xb3243e438>

In [30]:
score = model.evaluate(X_test, y_test, batch_size=100)



In [None]:
print(model.metrics_names)

In [31]:
score

[0.28148761418267015, 0.899776634879861]

Let's create more simple models and see how changing basic hyperparameters affects it

In [74]:
drop_out = 0.5

In [75]:
def create_simple_model():
    
    model = Sequential()
    
    model.add(Dense(64, input_dim=72, activation='relu'))
    model.add(Dropout(drop_out))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(drop_out))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])
    return model

In [33]:
#from keras.utils import plot_model
#plot_model(model, to_file='model.png')

In [34]:
def fit_model(epochs, batch_size, verbose, val_split):
    history = model.fit(X_train, y_train, epochs=epochs, validation_split = val_split,
                        batch_size=batch_size, verbose=verbose)
    return history

In [35]:
def plot_scores(history, val = False):
    
    # Plot training & validation accuracy values
    plt.plot(history.history['acc'])
    if val == True:
        plt.plot(history.history['val_acc'])
        
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper left')
    plt.show()

In [36]:
def plot_loss(history, val = False):
    # Plot training & validation loss values
    plt.plot(history.history['loss'])
    
    if val == True:
        plt.plot(history.history['val_loss'])
    
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper left')
    plt.show()

In [37]:
#Setting model parameters
drop_out = 0.5

In [38]:
model = create_simple_model()

In [39]:
history = fit_model(500, 1000, 0, None)

In [40]:
plot_scores(history)

In [41]:
plot_loss(history)

In [42]:
score = model.evaluate(X_test, y_test, batch_size=100)



In [43]:
print(model.metrics_names)
score

['loss', 'acc']


[0.2839228126526203, 0.8964747056239112]

After a few runs of this basic model with different values for units and optimizer, we are noticing that it tends to always perform at most with 90% accuracy ca.

Let's try to improve this first model with the following:
    - Validation
    - Higher Regularization
    - Random Weight Initialization
    - Hypterparameter Tuning

In [44]:
#Setting new drop out regularization term
drop_out = 0.7

In [45]:
model = create_simple_model()

In [46]:
history = fit_model(100, 100, 0, .2)

In [47]:
plot_scores(history, True)

In [48]:
plot_loss(history)

In [49]:
score = model.evaluate(X_test, y_test, batch_size=100)



In [50]:
print(model.metrics_names)
score

['loss', 'acc']


[0.29451234053324366, 0.8957948912124304]

Evaluating Model with Stratified K Cross Validation

In [51]:
from keras.wrappers.scikit_learn import KerasClassifier

In [58]:
def create_base_model():
# create model
    model = Sequential()
    model.add(Dense(72, input_dim=72, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [63]:
estimator = KerasClassifier(build_fn=create_base_model, epochs=100, batch_size=100, verbose=0)

In [64]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=4)

In [65]:
results = cross_val_score(estimator, X_train, y_train, cv=skf) #using parallel cores
print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Results: 89.86% (0.35%)


Now trying again with earlier simple model function, which actually produces a more complex neural network than the base model function

In [76]:
estimator = KerasClassifier(build_fn=create_simple_model, epochs=100, batch_size=100, verbose=0)

In [77]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=4)

In [None]:
results = cross_val_score(estimator, X_train, y_train, cv=skf) #using parallel cores
print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

New Round of CV using more complex network architecture:

In [None]:
drop_out = 0.6

In [None]:
def create_complex_model():
    model = Sequential()
    model.add(Dense(72, input_dim=72, activation='relu'))
    model.add(Dropout(drop_out))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(72, activation='relu'))
    model.add(Dropout(drop_out))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [None]:
estimator = KerasClassifier(build_fn=create_complex_model, epochs=100, batch_size=100, verbose=0)

In [None]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=4)

In [None]:
results = cross_val_score(estimator, X_train, y_train, cv=skf) #using parallel cores
print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Testing Model with Hyperparameter Tuning

In [None]:
#creating a new model function using the network architecture 
#that seemed to do best in the cross validation trials above
def create_new_model():
    #TBD
    model = Sequential()
    model.add(Dense(72, input_dim=72, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
estimator = KerasClassifier(build_fn=create_new_model, verbose=0)

In [None]:
optimizers = ['rmsprop', 'adam']
inits = ['glorot_uniform', 'normal', 'uniform']
epochs = [300, 500, 700]
batches = [50,100,500]

In [None]:
param_grid = dict(optimizer=optimizers, epochs=epochs, batch_size=batches, init=inits)
gs = GridSearchCV(estimator=estimator, param_grid=param_grid, n_jobs=-1)

In [None]:
grid_result = gs.fit(X_train, y_train)

In [None]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))