# Shallow Neural Network (Multi-Layer Perceptron)

In [1]:
# You need keras for this notebook.
# for windows OS
!py -m pip install keras
# for unix OS
#!pip install keras



You are using pip version 9.0.1, however version 19.0.3 is available.
You should consider upgrading via the 'python -m pip install --upgrade pip' command.


In [107]:
from keras.models import Sequential, load_model
from keras.layers import Dense, Input, Dropout
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, StratifiedKFold
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import keras
import pickle
import numpy as np
import pandas as pd

init_notebook_mode(connected=True)
np.random.seed(42)

## Import data sources
We import crafted features from previous preprocessing steps.

In [3]:
# import data
X = np.loadtxt('./data/feature_extraction.csv',delimiter=',')
y_ = pd.read_csv('./data/y_train.csv').values # remove headers
print('X shape:',X.shape,type(X))
print('y shape:',y_.shape,type(y_))

m = X.shape[0]

X shape: (3810, 81) <class 'numpy.ndarray'>
y shape: (3810, 3) <class 'numpy.ndarray'>


## Normalise features for better training outcomes

In [None]:
scaler = StandardScaler()
X = scalar.fit_transform(X) #scales each feature column

## OHE of y labels
While it is necessary to do one-hot encoding of labels, the tools we use here do not require OHE. StratifiedKFold directly uses the class number without requiring one-hot encoding. Similarly by using sparse_categorical_crossentropy as our model loss, we can directly use the class number. However we still need to convert class in string format into number format.

In [4]:
y = y_[:,-1] # last column is the y label

encoder = LabelEncoder()
y_multiclass = encoder.fit_transform(y)
print('y_multiclass shape:',y_multiclass.shape)
print('Encoded classes:',['{}:{}'.format(tup[0],tup[1]) for tup in enumerate(encoder.classes_)])

y_multiclass shape: (3810,)
Encoded classes: ['0:carpet', '1:concrete', '2:fine_concrete', '3:hard_tiles', '4:hard_tiles_large_space', '5:soft_pvc', '6:soft_tiles', '7:tiled', '8:wood']


## Determine set of hyperparameters for K-fold evaluation
In our model, we shall attempt to find the best of hyperparameters. The tunable hyperparameters are:
- Number of Neurons in hidden layer 1
- Dropout value (post-activation at hidden layer 1)
- Epochs for training

In Keras we can also see the validation accuracy on every epoch.

In [5]:
hidden_neurons = [150,300,500]
dropouts = [0.1,0.25,0.5]
epochs = [250,500,750]

## Create Model
We create a hidden layer with 50 neurons.

In [6]:
def create_model(hyperparams):
    # Create shallow NN model
    input_dim = X.shape[1]
    output_dim = len(encoder.classes_) # number of classes 
    hidden_dim_1 = hyperparams['hidden_layer']
    hidden_dim_2 = output_dim*5

    model = Sequential()
    model.add(Dense(hidden_dim_1, input_dim=input_dim, activation='relu'))
    model.add(Dropout(hyperparams['dropout']))
    model.add(Dense(hidden_dim_2, activation='relu'))
    model.add(Dense(output_dim, activation='softmax'))

#     model.summary()
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

## Split Train-Test Set for later evaluation

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y_multiclass, test_size=0.2)

## Train the model and evaluate each set of hyperparameters using K-Fold Cross Validation
As defined earlier, we will create various permutations of hyperparameters. Since there are 3 possible options for each adjustable hyperparameter, we will obtain a total of 27 possible cases.

In [8]:
kfold_splits = 5

# Instantiate the cross validator
skf = StratifiedKFold(n_splits=kfold_splits, shuffle=True)

val_loss_all = {}

# Enumerate over all possible combos
for i,neurons in enumerate(hidden_neurons):
    for j,dropout in enumerate(dropouts):
        for k,epoch in enumerate(epochs):
            print('Model hidden neurons = {}, dropout rate = {}, # epochs = {}'.format(neurons,dropout,epoch))
        
            val_loss_kfold = []

            # Loop through the indices the split() method returns
            for index, (train_indices, val_indices) in enumerate(skf.split(X_train, y_train)):
                print("Training on fold " + str(index+1) + "/{}...".format(kfold_splits))

                # Generate batches from indices
                xtrain, xval = X[train_indices], X[val_indices]
                ytrain, yval = y_multiclass[train_indices], y_multiclass[val_indices]

                #define hyperparams set
                hyperparams = {}
                hyperparams['hidden_layer'] = neurons
                hyperparams['dropout'] = dropout
                hyperparams['epoch'] = epoch

                # Clear model, and create it
                model = None
                model = create_model(hyperparams)

                history = model.fit(xtrain, ytrain, epochs=hyperparams['epoch'], validation_data=(xval,yval),verbose=0)

                hist = history.history
                val_loss = hist['val_loss']
                loss = hist['loss']

                # for choosing best val acc
                val_loss_kfold.append(hist['val_loss'])
                print("Last training accuracy: " + str(hist['acc'][-1]) + ", last validation accuracy: " + str(hist['val_acc'][-1]))

            print('Average Loss: {}'.format(np.mean(val_loss_kfold)))
            val_loss_all[(neurons,dropout,epoch)] = np.mean(val_loss_kfold)
    
    

Model hidden neurons = 150, dropout rate = 0.1, # epochs = 150
Training on fold 1/5...
Last training accuracy: 0.7778234086364691, last validation accuracy: 0.6867862970949581
Training on fold 2/5...
Last training accuracy: 0.7968814115716044, last validation accuracy: 0.6972176760386328
Training on fold 3/5...
Last training accuracy: 0.784747847527351, last validation accuracy: 0.6584564860426929
Training on fold 4/5...
Last training accuracy: 0.7897540983606557, last validation accuracy: 0.7072368421052632
Training on fold 5/5...
Last training accuracy: 0.7906595658249954, last validation accuracy: 0.6902800648177082
Average Loss: 1.1562583283555086
Model hidden neurons = 150, dropout rate = 0.1, # epochs = 250
Training on fold 1/5...
Last training accuracy: 0.8427104722792608, last validation accuracy: 0.7292006527230168
Training on fold 2/5...
Last training accuracy: 0.8395568321829308, last validation accuracy: 0.7463175123725116
Training on fold 3/5...
Last training accuracy: 0.8

Last training accuracy: 0.8097580975931948, last validation accuracy: 0.7044334975369458
Training on fold 4/5...
Last training accuracy: 0.8102459016393443, last validation accuracy: 0.7203947368421053
Training on fold 5/5...
Last training accuracy: 0.7906595658005774, last validation accuracy: 0.7199341011597258
Average Loss: 1.1202140906832931
Model hidden neurons = 300, dropout rate = 0.25, # epochs = 250
Training on fold 1/5...
Last training accuracy: 0.8669404517453799, last validation accuracy: 0.7308319739960925
Training on fold 2/5...
Last training accuracy: 0.8666393106400502, last validation accuracy: 0.7070376432566323
Training on fold 3/5...
Last training accuracy: 0.8540385403854038, last validation accuracy: 0.7257799671592775
Training on fold 4/5...
Last training accuracy: 0.8676229508196721, last validation accuracy: 0.7203947368421053
Training on fold 5/5...
Last training accuracy: 0.8611224908557206, last validation accuracy: 0.7512355837633425
Average Loss: 1.0768899

Last training accuracy: 0.7893223819546631, last validation accuracy: 0.7422512235153363
Training on fold 2/5...
Last training accuracy: 0.8153467378168259, last validation accuracy: 0.6988543372497621
Training on fold 3/5...
Last training accuracy: 0.8081180811930309, last validation accuracy: 0.7241379310344828
Training on fold 4/5...
Last training accuracy: 0.7983606557377049, last validation accuracy: 0.7319078947368421
Training on fold 5/5...
Last training accuracy: 0.8070462925519087, last validation accuracy: 0.7116968702445117
Average Loss: 1.0598773589714925
Model hidden neurons = 500, dropout rate = 0.5, # epochs = 350
Training on fold 1/5...
Last training accuracy: 0.8443531827760183, last validation accuracy: 0.7210440457256155
Training on fold 2/5...
Last training accuracy: 0.8301189987689782, last validation accuracy: 0.7168576104746317
Training on fold 3/5...
Last training accuracy: 0.8388683886961059, last validation accuracy: 0.7372742200328407
Training on fold 4/5...


In [124]:
# convert dict of validation losses to 3d coordinate format + value
heatmap_3d = np.zeros([0,4])
for key,value in val_loss_all.items():
    heatmap_3d = np.vstack((heatmap_3d,[hidden_neurons.index(key[0]),dropouts.index(key[1]),epochs.index(key[2]),value]))

# saving val losses for various combinations
def write_pickle(file,data):
    with open(file, 'wb') as outfile:
        serialized = pickle.dumps(data)
        outfile.write(serialized)
        
def read_pickle(file):
    with open(file,'rb') as datafile:
        serialized = datafile.read()
        data = pickle.loads(serialized)
        return data
        
write_pickle('./history/val_loss.pickle',val_loss_all)

## Visualise the best hyperparameter selection
In the diagram, we want to visualise the losses in a 3d space, allowing us to quickly draw trends or get the lowest/highest losses. Drag & hold to visualise the 3d heatmap from different angles.

In [101]:
# Using plotly's Scatter3d

trace1 = go.Scatter3d(
    x=heatmap_3d[:,0],
    y=heatmap_3d[:,1],
    z=heatmap_3d[:,2],
    mode='markers',
    marker=dict(
        size=24,
        color=heatmap_3d[:,3],                # set color to an array/list of desired values
        colorscale='Viridis',   # choose a colorscale
        opacity=0.8,
        showscale=True
    )
)

data = [trace1]
layout = go.Layout(
    title='Validation Loss',
    width=800,
    height=800,
    scene = dict(
    xaxis=dict(
        ticktext=hidden_neurons,
        tickvals=[0,1,2],
        nticks=3,
        title='# Hidden Neurons'
    ),
    yaxis=dict(
        ticktext=dropouts,
        tickvals=[0,1,2],
        nticks=3,
        title='Dropout Rate'
    ),
    zaxis=dict(
        ticktext=epochs,
        tickvals=[0,1,2],
        nticks=3,
        title='# Epochs'
    )
))

fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='3d-scatter-colorscale')

From the above plot, we compare between any of the 2 hyperparameter dimensions. All validation losses are referred to as K-fold validation losses. Let us explain some of the observations:

**1) dropout vs hidden neurons (@ epochs = 150)**

We observe that with increased learning capacity (hidden neurons = 500), increasing dropout rate actually reduces the validation loss. However, with lower learning capacity (hidden neurons = 150,300), increasing dropout actually harms the performance of the model by increasing the validation loss. This is expected, because with increasingly complex models, it is more likely to overfit, hence dropouts serve as regularisation parameters, which improves the model's ability to generalise. We also observe that for higher dropout rates (0.25,0.5), performance improves with increasing hidden neurons. If we try to apply more regularisation to models with lower learning capacity, we will observe higher bias and poorer performance.

**2) Epochs vs hidden neurons (@ dropout = 0.1)**

We observe that with more hidden neurons, performance is better with less training epochs. The lowest validation loss is at 350 epochs for a model with 150 hidden neurons, whereas the lowest validation loss is at 150 epochs for a model with 300 & 500 hidden neurons. With more neurons, we increase the complexity & expressiveness of the model, hence resulting in faster convergence. Any further training resulted in worsening performance (increased validation losses). Furthermore, due to the low dropout rate for this case, it is easy for the model to overfit.

**3) Epochs vs dropout (@ hidden neurons = 500)**

We see that for low dropout rates, we quickly converge and any further training hurts the performance. At increasing dropout rates, performance improved with more epochs, as more iterations are required for the model to converge. For a more complex model, regardless at any epoch counts, we observe that performance improves with increased dropout.

In [138]:
# Get best hyperparameter set based on val loss
val_loss_best,best_hyper = min(val_loss_all.values()),min(val_loss_all,key=val_loss_all.get)
print('The best combinaton belongs to {}, with a lowest val loss of {}.'.format(best_hyper,val_loss_best))

The best combinaton belongs to (300, 0.5, 350), with a lowest val loss of 1.0518331272969335.


## Retrain the best model
From the previous step, we have determined that the best model has the hyperparameters:
- Hidden Layer Neurons = 300
- Dropout = 0.5
- Epochs = 350

Now we will retrain this permutation using all available training data and visualise the process.

In [142]:
best_hyper[0]

300

In [143]:
#define hyperparams set
best_hyperparams = {}
best_hyperparams['hidden_layer'] = best_hyper[0]
best_hyperparams['dropout'] = best_hyper[1]
best_hyperparams['epoch'] = best_hyper[2]

model = create_model(best_hyperparams)
history = model.fit(X_train, y_train, epochs=hyperparams['epoch'],verbose=1)
    
# plot loss history
hist = history.history
val_loss = hist['val_loss']
loss = hist['loss']
plt.figure()
plt.plot(val_loss)
plt.plot(loss)
plt.legend(['val loss','loss'])

Epoch 1/350
Epoch 2/350
Epoch 3/350
Epoch 4/350
Epoch 5/350
Epoch 6/350
Epoch 7/350
Epoch 8/350
Epoch 9/350
Epoch 10/350
Epoch 11/350
Epoch 12/350
Epoch 13/350
Epoch 14/350
Epoch 15/350
Epoch 16/350
Epoch 17/350
Epoch 18/350
Epoch 19/350
Epoch 20/350
Epoch 21/350
Epoch 22/350
Epoch 23/350
Epoch 24/350
Epoch 25/350
Epoch 26/350
Epoch 27/350
Epoch 28/350
Epoch 29/350
Epoch 30/350
Epoch 31/350
Epoch 32/350
Epoch 33/350
Epoch 34/350
Epoch 35/350
Epoch 36/350
Epoch 37/350
Epoch 38/350
Epoch 39/350
Epoch 40/350
Epoch 41/350
Epoch 42/350
Epoch 43/350
Epoch 44/350
Epoch 45/350
Epoch 46/350
Epoch 47/350
Epoch 48/350
Epoch 49/350
Epoch 50/350
Epoch 51/350
Epoch 52/350
Epoch 53/350
Epoch 54/350
Epoch 55/350
Epoch 56/350
Epoch 57/350
Epoch 58/350
Epoch 59/350
Epoch 60/350
Epoch 61/350
Epoch 62/350
Epoch 63/350
Epoch 64/350
Epoch 65/350
Epoch 66/350
Epoch 67/350
Epoch 68/350
Epoch 69/350
Epoch 70/350
Epoch 71/350
Epoch 72/350
Epoch 73/350
Epoch 74/350
Epoch 75/350
Epoch 76/350
Epoch 77/350
Epoch 78

Epoch 82/350
Epoch 83/350
Epoch 84/350
Epoch 85/350
Epoch 86/350
Epoch 87/350
Epoch 88/350
Epoch 89/350
Epoch 90/350
Epoch 91/350
Epoch 92/350
Epoch 93/350
Epoch 94/350
Epoch 95/350
Epoch 96/350
Epoch 97/350
Epoch 98/350
Epoch 99/350
Epoch 100/350
Epoch 101/350
Epoch 102/350
Epoch 103/350
Epoch 104/350
Epoch 105/350
Epoch 106/350
Epoch 107/350
Epoch 108/350
Epoch 109/350
Epoch 110/350
Epoch 111/350
Epoch 112/350
Epoch 113/350
Epoch 114/350
Epoch 115/350
Epoch 116/350
Epoch 117/350
Epoch 118/350
Epoch 119/350
Epoch 120/350
Epoch 121/350
Epoch 122/350
Epoch 123/350
Epoch 124/350
Epoch 125/350
Epoch 126/350
Epoch 127/350
Epoch 128/350
Epoch 129/350
Epoch 130/350
Epoch 131/350
Epoch 132/350
Epoch 133/350
Epoch 134/350
Epoch 135/350
Epoch 136/350
Epoch 137/350
Epoch 138/350
Epoch 139/350
Epoch 140/350
Epoch 141/350
Epoch 142/350
Epoch 143/350
Epoch 144/350
Epoch 145/350
Epoch 146/350
Epoch 147/350
Epoch 148/350
Epoch 149/350
Epoch 150/350
Epoch 151/350
Epoch 152/350
Epoch 153/350
Epoch 154/

Epoch 162/350
Epoch 163/350
Epoch 164/350
Epoch 165/350
Epoch 166/350
Epoch 167/350
Epoch 168/350
Epoch 169/350
Epoch 170/350
Epoch 171/350
Epoch 172/350
Epoch 173/350
Epoch 174/350
Epoch 175/350
Epoch 176/350
Epoch 177/350
Epoch 178/350
Epoch 179/350
Epoch 180/350
Epoch 181/350
Epoch 182/350
Epoch 183/350
Epoch 184/350
Epoch 185/350
Epoch 186/350
Epoch 187/350
Epoch 188/350
Epoch 189/350
Epoch 190/350
Epoch 191/350
Epoch 192/350
Epoch 193/350
Epoch 194/350
Epoch 195/350
Epoch 196/350
Epoch 197/350
Epoch 198/350
Epoch 199/350
Epoch 200/350
Epoch 201/350
Epoch 202/350
Epoch 203/350
Epoch 204/350
Epoch 205/350
Epoch 206/350
Epoch 207/350
Epoch 208/350
Epoch 209/350
Epoch 210/350
Epoch 211/350
Epoch 212/350
Epoch 213/350
Epoch 214/350
Epoch 215/350
Epoch 216/350
Epoch 217/350
Epoch 218/350
Epoch 219/350
Epoch 220/350
Epoch 221/350
Epoch 222/350
Epoch 223/350
Epoch 224/350
Epoch 225/350
Epoch 226/350
Epoch 227/350
Epoch 228/350
Epoch 229/350
Epoch 230/350
Epoch 231/350
Epoch 232/350
Epoch 

Epoch 242/350
Epoch 243/350
Epoch 244/350
Epoch 245/350
Epoch 246/350
Epoch 247/350
Epoch 248/350
Epoch 249/350
Epoch 250/350
Epoch 251/350
Epoch 252/350
Epoch 253/350
Epoch 254/350
Epoch 255/350
Epoch 256/350
Epoch 257/350
Epoch 258/350
Epoch 259/350
Epoch 260/350
Epoch 261/350
Epoch 262/350
Epoch 263/350
Epoch 264/350
Epoch 265/350
Epoch 266/350
Epoch 267/350
Epoch 268/350
Epoch 269/350
Epoch 270/350
Epoch 271/350
Epoch 272/350
Epoch 273/350
Epoch 274/350
Epoch 275/350
Epoch 276/350
Epoch 277/350
Epoch 278/350
Epoch 279/350
Epoch 280/350
Epoch 281/350
Epoch 282/350
Epoch 283/350
Epoch 284/350
Epoch 285/350
Epoch 286/350
Epoch 287/350
Epoch 288/350
Epoch 289/350
Epoch 290/350
Epoch 291/350
Epoch 292/350
Epoch 293/350
Epoch 294/350
Epoch 295/350
Epoch 296/350
Epoch 297/350
Epoch 298/350
Epoch 299/350
Epoch 300/350
Epoch 301/350
Epoch 302/350
Epoch 303/350
Epoch 304/350
Epoch 305/350
Epoch 306/350
Epoch 307/350
Epoch 308/350
Epoch 309/350
Epoch 310/350
Epoch 311/350
Epoch 312/350
Epoch 

Epoch 322/350
Epoch 323/350
Epoch 324/350
Epoch 325/350
Epoch 326/350
Epoch 327/350
Epoch 328/350
Epoch 329/350
Epoch 330/350
Epoch 331/350
Epoch 332/350
Epoch 333/350
Epoch 334/350
Epoch 335/350
Epoch 336/350
Epoch 337/350
Epoch 338/350
Epoch 339/350
Epoch 340/350
Epoch 341/350
Epoch 342/350
Epoch 343/350
Epoch 344/350
Epoch 345/350
Epoch 346/350
Epoch 347/350
Epoch 348/350
Epoch 349/350
Epoch 350/350


KeyError: 'val_loss'

In [None]:
# evaluate the model
scores = model.evaluate(X_test, y_test)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

## Analysis of Results
overfitting, underfitting, performance of dropout (reg)