Data uploading

In [1]:
from google.colab import files
train_uploaded = files.upload()
test_uploaded = files.upload()

Saving train.csv to train.csv


Saving test.csv to test.csv


Library loading

In [2]:
import collections
import sys
import os
import io
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display
%matplotlib inline

from plotly.offline import init_notebook_mode, iplot
import plotly.offline as py
import plotly.figure_factory as ff
import plotly.express as px
import plotly.graph_objs as go
import plotly.tools as tls
py.init_notebook_mode(connected=True)

import warnings
warnings.filterwarnings('ignore')

Plotly Code

In [3]:
def configure_plotly_browser_state():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-latest.min.js?noext',
            },
          });
        </script>
        '''))

In [12]:
df = pd.read_csv(io.BytesIO(train_uploaded['train.csv']))
df_test = pd.read_csv(io.BytesIO(test_uploaded['test.csv']))
df_name = df.columns
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


### Data preprocessing

1. Check for null values

In [13]:
df.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

It seems that age and cabin has null values in the data sets. Let's try to explore the data distribution further.

In [14]:
configure_plotly_browser_state()
hist_data = [df['Age'].dropna()]
group_labels = ['Age']
colors = ['rgb(0, 200, 200)']

fig = ff.create_distplot(hist_data, group_labels, bin_size=5, colors=colors)
fig.update_layout(title_text='Distplot of Age', template='plotly_white')
iplot(fig, filename='Distplot')

The age data is rightly skewed, hence using the mean might give us biased results by filling in ages that are older than desired. Median would provide us with a much better performance.

In [15]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy='median')
age = df['Age'].values
age = imputer.fit_transform(age.reshape(-1, 1))
df.drop('Age', axis=1)
df['Age'] = age

In [16]:
cabin_percentage = (df['Cabin'].isnull().sum() / df.shape[0]) * 100
print('Percent of missing Cabin records is %.2f%%' %cabin_percentage)

Percent of missing Cabin records is 77.10%


Since the missing value of the Cabin is more than 70%, it would be wiser to drop the entire column

In [17]:
df = df.drop('Cabin', axis=1)

Since there are two missing values in embarked, it is wise to impute in with the most common to prevent any bias.

In [19]:
imputer = SimpleImputer(strategy='most_frequent')
embarked = df['Embarked'].values
embarked = imputer.fit_transform(embarked.reshape(-1, 1))
df.drop('Embarked', axis=1)
df['Embarked'] = embarked

2. Encode object values

In [20]:
from sklearn.preprocessing import LabelEncoder

In [21]:
df.select_dtypes('object').columns

Index(['Name', 'Sex', 'Ticket', 'Embarked'], dtype='object')

In [22]:
label_encoder = LabelEncoder()
Sex_encoded = df['Sex'].values
Sex_encoded = label_encoder.fit_transform(Sex_encoded)
df.drop('Sex', axis=1)
df['Sex'] = Sex_encoded

In [23]:
label_encoder = LabelEncoder()
embarked_encoded = df['Embarked'].values
embarked_encoded = label_encoder.fit_transform(embarked_encoded)
df.drop('Embarked', axis=1)
df['Embarked'] = embarked_encoded

3. Remove duplicated values

In [None]:
df.duplicated().sum()

0

4. Additional Variable

According to the Kaggle data dictionary, both SibSp and Parch relate to traveling with family. For simplicity's sake and to account for possible multicollinearity, combining the effect of these variables into one categorical predictor would be better: whether or not that individual was traveling alone.

In [25]:
df['TravelAlone'] = np.where((df['SibSp'] + df['Parch']) > 0, 0, 1)
df.drop('SibSp', axis=1, inplace=True)
df.drop('Parch', axis=1, inplace=True)

Title exploratory

In [26]:
def Barplot(df, feature):
  configure_plotly_browser_state()

  trace = go.Bar(
      x = df[feature],
      y = df['Survived'],
  )

  data = [trace]

  layout = go.Layout(
      title = 'Survivability by {}'.format(feature),
      template = 'plotly_white',
  )

  fig = go.Figure(data=data, layout=layout)
  fig.show()

In [27]:
import re

df['Title'] = df.Name.apply(lambda x: re.search(' ([A-Z][a-z]+)\.', x).group(1))
Barplot(df, 'Title')

It seems that we can apply some social status to this data.

In [28]:
Title_Dictionary = {
    'Capt'    : 'Officer',
    'Col'     : 'Officer',
    'Major'   : 'Officer',
    'Dr'      : 'Officer',
    'Rev'     : 'Officer',
    'Jonkheer': 'Royalty',
    'Don'     : 'Royalty',
    'Sir'     : 'Royalty',
    'Countess': 'Royalty',
    'Dona'    : 'Royalty',
    'Lady'    : 'Royalty',
    'Mme'     : 'Mrs',
    'Ms'      : 'Mrs',
    'Mrs'     : 'Mrs',
    'Mlle'    : 'Miss',
    'Miss'    : 'Miss',
    'Mr'      : 'Mr',
    'Master'  : 'Master'
}
    
df['Title'] = df.Title.map(Title_Dictionary)

In [29]:
label_encoder = LabelEncoder()
title_encoded = df['Title'].values
title_encoded = label_encoder.fit_transform(title_encoded)
df.drop('Title', axis=1)
df['Title'] = title_encoded

Age exploratory

In [None]:
configure_plotly_browser_state()

hist_data = [df['Age'][df['Survived'] == 1], df['Age'][df['Survived'] == 0]]
group_labels = ['Survived', 'Not Survived']
colors = ['rgb(0, 0, 100)', 'rgb(0, 200, 200)']

fig = ff.create_distplot(hist_data, group_labels, bin_size=5, colors=colors)
fig.update_layout(title_text='Distplot of Age', template='plotly_white')
iplot(fig, filename='Distplot')

We can provide a category for age and group them accordingly.

In [30]:
interval = (0, 5, 12, 18, 65, 120)
category = ['Babies', 'Children', 'Teenager', 'Adult', 'Senior']
df['Category'] = pd.cut(df.Age, interval, labels=category)

In [31]:
label_encoder = LabelEncoder()
category_encoded = df['Category'].values
category_encoded = label_encoder.fit_transform(category_encoded)
df.drop('Category', axis=1)
df['Category'] = category_encoded

Fare exploratory

In [32]:
interval = (-1, 0, 8, 15, 31, 600)
category = ['NoInf', 'Quarter 1', 'Quarter 2', 'Quarter 3', 'Quarter 4']
df['Quarter'] = pd.cut(df.Fare, interval, labels=category)

In [33]:
label_encoder = LabelEncoder()
quarter_encoded = df['Quarter'].values
quarter_encoded = label_encoder.fit_transform(quarter_encoded)
df.drop('Quarter', axis=1)
df['Quarter'] = quarter_encoded

Now, we can elimate some of the features to reduce the dimensionality of data. PassengerId and Name are unique, hence we can remove them from our data. Ticket is useless as it does not imply any logic in fitting the model. We could remove age and fare as we had already categorize them.

In [None]:
df = df.drop('PassengerId', axis=1).drop('Name', axis=1).drop('Ticket', axis=1).drop('Age', axis=1).drop('Fare', axis=1)

### Data training

In [35]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.pipeline import Pipeline

from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier

In [36]:
X = df.drop('Survived',  axis=1)
y = df['Survived']
X_train, X_test, y_train, y_test =train_test_split(X, y, test_size=0.25, random_state=0, stratify=df['Survived'])

Let's start off with the basic parameter of neural network first.

In [None]:
def CreateModel():
  model = Sequential()
  pipeline = []

  model.add(Dense(12, input_dim=7, activation='relu'))
  model.add(Dense(1, activation='sigmoid'))
  model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
  
  return model

In [37]:
def ScoreDataFrame(names, results):
  def floatingDecimals(f_val, dec=3):
    prc = '{:.' + str(dec) + 'f}'
    return float(prc.format(f_val))

  score = (floatingDecimals(result.mean(), 4))
  scoreDataFrame = pd.DataFrame({'Model': [names], 'Score': [score]})

  return scoreDataFrame

In [38]:
def BasedLine(X_train, y_train, model):
  num_folds = 10
  scoring = 'accuracy'

  kfold = StratifiedKFold(n_splits=num_folds, random_state=42)
  cv_results = cross_val_score(model, X_train, y_train, cv=kfold, scoring=scoring, n_jobs=-1)
  message = '%s: %f (%f)' % (name, cv_results.mean(), cv_results.std())
  result = cv_results.mean()
  print(message)

  return name, result

In [None]:
clf = KerasClassifier(build_fn=CreateModel, verbose=0)
classifier = Pipeline(steps=[('Keras', clf)])

name, result = BasedLine(X_train, y_train, classifier)
name = 'KerasNN'

KerasNN: 0.556988 (0.135359)


In [None]:
basedLineScore = ScoreDataFrame(name, result)
basedLineScore

Unnamed: 0,Model,Score
0,KerasNN,0.557


Data standardisation

In [None]:
def CreatePipeline(nameOfScaler, clf):
  scaler = StandardScaler() if (nameOfScaler == 'standard') else MinMaxScaler()
  classifier = Pipeline(steps=[('Scaler', scaler), (nameOfScaler + 'Keras', clf)])

  return classifier

In [None]:
clf = KerasClassifier(build_fn=CreateModel, verbose=0)
classifier = CreatePipeline('standard', clf)
nameStandard, result = BasedLine(X_train, y_train, classifier)
nameStandard = 'standardKerasNN' 

KerasNN: 0.591022 (0.149215)


In [None]:
scaledScoreStandard = ScoreDataFrame(nameStandard, results)
compareModels = pd.concat([basedLineScore, scaledScoreStandard], axis=0)
compareModels

Unnamed: 0,Model,Score
0,KerasNN,0.557
0,standardKerasNN,0.591


In [None]:
clf = KerasClassifier(build_fn=CreateModel, verbose=0)
classifier = CreatePipeline('MinMax', clf)
nameMinMax, result = BasedLine(X_train, y_train, classifier)
nameMinMax = 'MinMaxKerasNN' 

KerasNN: 0.613569 (0.099737)


In [None]:
scaledScoreMinMax = ScoreDataFrame('MinMaxKerasNN', results)
compareModels = pd.concat([basedLineScore, scaledScoreStandard, scaledScoreMinMax], axis=0)
compareModels

Unnamed: 0,Model,Score
0,KerasNN,0.557
0,standardKerasNN,0.591
0,MinMaxKerasNN,0.6136


Hyperparameter tuning

In [43]:
def LayerNodes(n_layers, first_layer_nodes, last_layer_nodes):
  layers = []

  nodes_increment = (last_layer_nodes - first_layer_nodes) / (n_layers - 1)
  nodes = first_layer_nodes

  for i in range(1, n_layers + 1):
    layers.append(math.ceil(nodes))
    nodes = nodes + nodes_increment

  return layers

1. Tune the number of layers and neurons in each layer alongside with the activation function and loss

In [39]:
import math
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

from keras.optimizers import Nadam
from keras.constraints import maxnorm
from keras.layers import Dropout

In [53]:
def CreateModel(n_layers, first_layer_nodes, last_layer_nodes, activation, loss, final_activation):
  model = Sequential()
  n_nodes = LayerNodes(n_layers, first_layer_nodes, last_layer_nodes)
  input_dim = 7

  for i in range(1, n_layers):
    if i == 1:
      model.add(Dense(first_layer_nodes, input_dim=input_dim, activation=activation))
    else:
      model.add(Dense(n_nodes[i - 1], activation=activation))

  model.add(Dense(1, activation=final_activation))
  model.compile(optimizer='adam', loss=loss, metrics = ['accuracy'])

  return model

In [55]:
activation = ['elu', 'relu', 'selu', 'tanh']
final_activation = ['sigmoid', 'softmax']
loss = ['binary_crossentropy', 'hinge'] 
n_layers = [5]
first_layer_nodes = [16, 32, 64]
last_layer_nodes = [4, 8]
epochs = [20, 40, 60, 80, 100, 120]

model = KerasClassifier(build_fn=CreateModel, verbose=False)
param_grid = dict(activation=activation, loss=loss, n_layers=n_layers, first_layer_nodes=first_layer_nodes, last_layer_nodes=last_layer_nodes, batch_size=[100], epochs=epochs, final_activation=final_activation)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid.fit(X_train, y_train)

print(f'Best Score: {grid.best_score_}')
print(f'Best Parameters: {grid.best_params_}')

Best Score: 0.8158270319302877
Best Parameters: {'activation': 'selu', 'batch_size': 100, 'epochs': 60, 'final_activation': 'sigmoid', 'first_layer_nodes': 32, 'last_layer_nodes': 4, 'loss': 'binary_crossentropy', 'n_layers': 5}


Let us take the best parameters and estimate other parameters.

2. Tune optimization algorithm

In [61]:
def CreateModel(optimizer):
  n_layers = 5
  first_layer_nodes = 32
  last_layer_nodes = 4
  activation = 'selu'
  model = Sequential()
  n_nodes = LayerNodes(n_layers, first_layer_nodes, last_layer_nodes)
  input_dim = 7

  for i in range(1, n_layers):
    if i == 1:
      model.add(Dense(first_layer_nodes, input_dim=input_dim, activation=activation))
    else:
      model.add(Dense(n_nodes[i - 1], activation=activation))

  model.add(Dense(1, activation='sigmoid'))
  model.compile(optimizer, loss='binary_crossentropy', metrics = ['accuracy'])

  return model

In [66]:
optimizer = ['Adadelta', 'Adam', 'Adamax', 'Nadam']

model = KerasClassifier(build_fn=CreateModel, batch_size=100, epochs=100, verbose=False)
param_grid = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid.fit(X_train, y_train)

print(f'Best Score: {grid.best_score_}')
print(f'Best Parameters: {grid.best_params_}')

Best Score: 0.8053367336591085
Best Parameters: {'optimizer': 'Adam'}


There is a slight rise in performance when Adadelta is used.

3. Tune learning rate of optimization algorithm

In [None]:
def CreateModel(learning_rate, beta_1, beta_2):
  n_layers = 4
  first_layer_nodes = 64
  last_layer_nodes = 4
  activation = 'relu'
  model = Sequential()
  n_nodes = LayerNodes(n_layers, first_layer_nodes, last_layer_nodes)
  input_dim = 7

  for i in range(1, n_layers):
    if i == 1:
      model.add(Dense(first_layer_nodes, input_dim=input_dim, activation=activation))
    else:
      model.add(Dense(n_nodes[i - 1], activation=activation))

  model.add(Dense(1, activation='sigmoid'))
  optimizer = Nadam(learning_rate=learning_rate, beta_1=beta_1, beta_2=beta_2)
  model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics = ['accuracy'])

  return model

In [None]:
learning_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
beta_1 = [0.4, 0.6, 0.8, 0.9, 0.95]
beta_2 = [0.4, 0.6, 0.8, 0.9, 0.95]

model = KerasClassifier(build_fn=CreateModel, batch_size=100, epochs=60, verbose=False)
param_grid = dict(learning_rate=learning_rate, beta_1=beta_1, beta_2=beta_2)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid.fit(X_train, y_train)

print(f'Best Score: {grid.best_score_}')
print(f'Best Parameters: {grid.best_params_}')

Best Score: 0.8038554191589355
Best Parameters: {'beta_1': 0.4, 'beta_2': 0.95, 'learning_rate': 0.01}


3. Tune network weight initialization

In [None]:
def CreateModel(kernel_initializer):
  n_layers = 4
  first_layer_nodes = 64
  last_layer_nodes = 4
  activation = 'relu'
  learning_rate = 0.01
  beta_1 = 0.4
  beta_2 = 0.95
  input_dim = 7
  n_nodes = LayerNodes(n_layers, first_layer_nodes, last_layer_nodes)
  model = Sequential()

  for i in range(1, n_layers):
    if i == 1:
      model.add(Dense(first_layer_nodes, input_dim=input_dim, activation=activation, kernel_initializer=kernel_initializer))
    else:
      model.add(Dense(n_nodes[i - 1], activation=activation, kernel_initializer=kernel_initializer))

  model.add(Dense(1, activation='sigmoid'))
  optimizer = Nadam(learning_rate=learning_rate, beta_1=beta_1, beta_2=beta_2)
  model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics = ['accuracy'])

  return model

In [None]:
kernel_initializer = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']

model = KerasClassifier(build_fn=CreateModel, batch_size=100, epochs=60, verbose=False)
param_grid = dict(kernel_initializer=kernel_initializer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid.fit(X_train, y_train)

print(f'Best Score: {grid.best_score_}')
print(f'Best Parameters: {grid.best_params_}')

Best Score: 0.8113562067349752
Best Parameters: {'kernel_initializer': 'glorot_normal'}


4. Tune dropout regularization

In [None]:
def CreateModel(dropout_rate, weight_constraint):
  n_layers = 4
  first_layer_nodes = 64
  last_layer_nodes = 4
  activation = 'relu'
  learning_rate = 0.01
  beta_1 = 0.4
  beta_2 = 0.95
  input_dim = 7
  kernel_initializer = 'glorot_normal'
  n_nodes = LayerNodes(n_layers, first_layer_nodes, last_layer_nodes)
  model = Sequential()

  for i in range(1, n_layers):
    if i == 1:
      model.add(Dense(first_layer_nodes, input_dim=input_dim, activation=activation, kernel_initializer=kernel_initializer, kernel_constraint=maxnorm(weight_constraint)))
      model.add(Dropout(dropout_rate))
    else:
      model.add(Dense(n_nodes[i - 1], activation=activation, kernel_initializer=kernel_initializer, kernel_constraint=maxnorm(weight_constraint)))
      model.add(Dropout(dropout_rate))

  model.add(Dense(1, activation='sigmoid'))
  optimizer = Nadam(learning_rate=learning_rate, beta_1=beta_1, beta_2=beta_2)
  model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics = ['accuracy'])

  return model

In [None]:
weight_constraint = [1, 2, 3, 4, 5]
dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

model = KerasClassifier(build_fn=CreateModel, batch_size=100, epochs=60, verbose=False)
param_grid = dict(weight_constraint=weight_constraint, dropout_rate=dropout_rate)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid.fit(X_train, y_train)

print(f'Best Score: {grid.best_score_}')
print(f'Best Parameters: {grid.best_params_}')

Best Score: 0.8173285325368246
Best Parameters: {'dropout_rate': 0.2, 'weight_constraint': 4}


Fitting the model

In [None]:
def CreateModel():
  n_layers = 4
  first_layer_nodes = 64
  last_layer_nodes = 4
  activation = 'relu'
  learning_rate = 0.01
  beta_1 = 0.4
  beta_2 = 0.95
  input_dim = 7
  kernel_initializer = 'glorot_normal'
  dropout_rate = 0.2
  weight_constraint = 4
  n_nodes = LayerNodes(n_layers, first_layer_nodes, last_layer_nodes)
  model = Sequential()

  for i in range(1, n_layers):
    if i == 1:
      model.add(Dense(first_layer_nodes, input_dim=input_dim, activation=activation, kernel_initializer=kernel_initializer, kernel_constraint=maxnorm(weight_constraint)))
      model.add(Dropout(dropout_rate))
    else:
      model.add(Dense(n_nodes[i - 1], activation=activation, kernel_initializer=kernel_initializer, kernel_constraint=maxnorm(weight_constraint)))
      model.add(Dropout(dropout_rate))

  model.add(Dense(1, activation='sigmoid'))
  optimizer = Nadam(learning_rate=learning_rate, beta_1=beta_1, beta_2=beta_2)
  model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics = ['accuracy'])

  return model

In [None]:
model = CreateModel()
history = model.fit(X_train, y_train, batch_size=100, epochs=60, verbose=0)
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Loss: {loss}')
print(f'Accuracy: {accuracy}')

Loss: 0.45272117853164673
Accuracy: 0.8161435127258301


Data visualization

In [None]:
epoch = []
for i in range(0, 60):
  epoch.append(i)
history.history['epoch'] = epoch

In [None]:
configure_plotly_browser_state()
fig = px.line(history.history, x='epoch', y='loss', title='Model Loss')
fig.update_layout(template='plotly_white')
fig.show()

In [None]:
configure_plotly_browser_state()
fig = px.line(history.history, x='epoch', y='accuracy', title='Accuracy')
fig.update_layout(template='plotly_white')
fig.show()

In [None]:
configure_plotly_browser_state()
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x = history.history['epoch'],
        y = history.history['accuracy'],
        mode = 'markers',
        opacity = 0.75,
        name = 'Scatter',
        marker = dict(
            color = history.history['loss'],
            size = 20,
            colorscale = 'blues',
            reversescale = True,
        )
    )
)

fig.add_trace(
    go.Bar(
        x = history.history['epoch'],
        y = history.history['accuracy'],
        name = 'Bar',
    )
)

fig.update_layout(template='plotly_white')
fig.show()

From the graph, it seems that the model has a lower loss and higher accuracy. We could try to utilized early stopping in this scenario.

In [40]:
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import load_model

In [41]:
def CreateModel():
  n_layers = 4
  first_layer_nodes = 64
  last_layer_nodes = 4
  activation = 'relu'
  learning_rate = 0.01
  beta_1 = 0.4
  beta_2 = 0.95
  input_dim = 7
  kernel_initializer = 'glorot_normal'
  dropout_rate = 0.2
  weight_constraint = 4
  n_nodes = LayerNodes(n_layers, first_layer_nodes, last_layer_nodes)
  model = Sequential()

  for i in range(1, n_layers):
    if i == 1:
      model.add(Dense(first_layer_nodes, input_dim=input_dim, activation=activation, kernel_initializer=kernel_initializer, kernel_constraint=maxnorm(weight_constraint)))
      model.add(Dropout(dropout_rate))
    else:
      model.add(Dense(n_nodes[i - 1], activation=activation, kernel_initializer=kernel_initializer, kernel_constraint=maxnorm(weight_constraint)))
      model.add(Dropout(dropout_rate))

  model.add(Dense(1, activation='sigmoid'))
  optimizer = Nadam(learning_rate=learning_rate, beta_1=beta_1, beta_2=beta_2)
  model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics = ['accuracy'])

  return model

In [52]:
model = CreateModel()
early_stopping = EarlyStopping(monitor='accuracy', mode='max', verbose=1, patience=30)
model_checkpoint = ModelCheckpoint('best_model.h5', monitor='accuracy', mode='max', verbose=1, save_best_only=True)

history = model.fit(X_train, y_train, batch_size=100, epochs=60, verbose=1, callbacks=[early_stopping, model_checkpoint])
saved_model = load_model('best_model.h5')
loss, accuracy = saved_model.evaluate(X_test, y_test)
train = saved_model.evaluate(X_test, y_test)
print(f'Loss: {loss}')
print(f'Accuracy: {accuracy}')
print(f'Train: {train}')

Epoch 1/60

Epoch 00001: accuracy improved from -inf to 0.63473, saving model to best_model.h5
Epoch 2/60

Epoch 00002: accuracy improved from 0.63473 to 0.73204, saving model to best_model.h5
Epoch 3/60

Epoch 00003: accuracy did not improve from 0.73204
Epoch 4/60

Epoch 00004: accuracy improved from 0.73204 to 0.76347, saving model to best_model.h5
Epoch 5/60

Epoch 00005: accuracy did not improve from 0.76347
Epoch 6/60

Epoch 00006: accuracy improved from 0.76347 to 0.76946, saving model to best_model.h5
Epoch 7/60

Epoch 00007: accuracy improved from 0.76946 to 0.77994, saving model to best_model.h5
Epoch 8/60

Epoch 00008: accuracy did not improve from 0.77994
Epoch 9/60

Epoch 00009: accuracy did not improve from 0.77994
Epoch 10/60

Epoch 00010: accuracy did not improve from 0.77994
Epoch 11/60

Epoch 00011: accuracy improved from 0.77994 to 0.79641, saving model to best_model.h5
Epoch 12/60

Epoch 00012: accuracy did not improve from 0.79641
Epoch 13/60

Epoch 00013: accuracy