# Neural Net Analysis Notebook
## W207 Final Project
### T. P. Goter
### July 6, 2019

This workbook is used to assess various models created as part of the Facial Keypoint Detection project for W207.

In [29]:
# Import the packages we need
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import numpy as np
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
import os

In [9]:
# Load the pkled dataframe for the baseline single layer neural net
bl_sl_df = pd.read_pickle("OutputData/single_layer_df.pkl")
bl_sl_df.sample(10)

Unnamed: 0,loss,mean_squared_error,val_loss,val_mean_squared_error,epoch,RMSE,val_RMSE,times,hunits,activation,optimizer,lrate
396,77.049095,77.049088,111.200488,111.200485,396,8.777761,10.545164,0.474865,150,relu,adam,0.001
76,2525.411537,2525.411377,2520.734831,2520.734863,76,50.253471,50.20692,0.320276,100,tanh,sgd,0.01
305,2525.412289,2525.412354,2520.73562,2520.735596,305,50.253481,50.206928,0.527155,100,tanh,adagrad,0.01
244,2525.41177,2525.411377,2520.735104,2520.735107,244,50.253471,50.206923,0.319467,100,sigmoid,sgd,0.01
306,2427.31786,2427.318359,2424.645512,2424.645752,306,49.267823,49.240692,0.448901,200,relu,sgd,0.01
261,174.564472,174.564453,198.644428,198.64444,261,13.212284,14.094128,0.673629,150,relu,adagrad,0.01
100,2525.412145,2525.411865,2520.735503,2520.735596,100,50.253476,50.206928,0.316765,100,sigmoid,sgd,0.01
238,2619.589233,2619.589111,2614.870316,2614.870605,238,51.181922,51.135806,0.294078,100,relu,sgd,0.01
186,2525.411534,2525.411865,2520.734893,2520.735107,186,50.253476,50.206923,0.355427,100,tanh,sgd,0.01
101,89.210516,89.210503,117.918552,117.918556,101,9.445131,10.859031,0.446759,150,relu,adam,0.001


In [36]:
# Create a plotting function to pass to the interact widget function
def plot_validation_loss(optimizer = bl_sl_df.optimizer.unique(), 
                    activation = bl_sl_df.activation.unique()):
    
    # Subset the baseline df by the specified optimizer and activation
    sub_df = bl_sl_df[bl_sl_df.optimizer.str.match(optimizer)]
    sub_df = sub_df[sub_df.activation.str.match(activation)]
     
    # Group the neural net data by optimizer and activation
    groups = sub_df.groupby(['hunits'])
    fig, axes = plt.subplots(1, 2, figsize=(15, 10))
    axes = axes.flatten()
    
    # Loop over the grouped data and plot out epoch timing and validation loss data
    for name, group in groups:
        axes[0].plot(group.epoch, group.val_RMSE, label=str(name)+' Validation Loss')
    #     axes[0].scatter(group.epoch, group.RMSE, label=' '.join(name)+' Training Loss')
        axes[1].scatter(group.epoch, group.times*1000, label=str(name)+' Fit Time')
        axes[0].set_xlabel('Epoch')
        axes[0].set_ylabel('Root Mean Square Error')
        axes[1].set_xlabel('Epoch')
        axes[1].set_ylabel('Fit Time (milliseconds)')
        axes[0].set_ylim([0,sub_df.val_RMSE.max()])
        axes[1].set_ylim([0,1000])
        axes[0].legend()
        axes[1].legend()
        axes[0].set_title("{} Optimizer and {} Activation".format(group.optimizer.unique(), group.activation.unique()))
    
    # Adjust the spacing of the subplots
    fig.subplots_adjust(left=0.03, right=0.97, hspace=0.1, wspace=0.15)

    # Add an overarching title for these plots
    fig.suptitle("Performance Comparison for Single Layer, Fully Connected Neural Nets",
                 fontsize=18, y=0.93)

#     # Print out the table of data for viewing
#     print(sub_df)
interact_manual(plot_validation_loss)
print()

interactive(children=(Dropdown(description='optimizer', options=('adam', 'sgd', 'nadam', 'adagrad'), value='ad…




### Assessment of Baseline Results
1. Adam and Adagrad Optimizers are working well. 
2. Adam is faster and works well with 200 hidden units
3. Adagrad is slower buts works best with 100 hidden units.

In the evaluation above, both the hidden layer and the output layer used the activation function specified by the user. For the study below, the activation function of the output layer was set to softmax which is a multinomial classifier version of the sigmoid function. The plots below help to assess if the choice of activation function for the output layer significant alters are perception of which activation function and optimizers work well for our neural network.

In [34]:
# Load the pkled dataframe for the baseline single layer neural net
sm_sl_df = pd.read_pickle("OutputData/single_layer_softmax_df.pkl")
sm_sl_df.sample(10)

Unnamed: 0,loss,mean_squared_error,val_loss,val_mean_squared_error,epoch,RMSE,val_RMSE,times,hunits,activation,optimizer,lrate
168,2615.206895,2615.207275,2610.494079,2610.494141,168,51.139097,51.092995,0.20707,50,relu,adam,0.001
187,2614.113165,2614.113525,2609.399187,2609.399414,187,51.128402,51.082281,0.785358,200,tanh,adagrad,0.01
391,2614.315019,2614.315186,2609.594234,2609.594238,391,51.130374,51.084188,0.418621,200,sigmoid,sgd,0.01
350,2614.113151,2614.112793,2609.399146,2609.398926,350,51.128395,51.082276,0.37662,50,sigmoid,nadam,0.002
204,2614.544201,2614.544434,2609.843739,2609.84375,204,51.132616,51.08663,0.280232,50,relu,adagrad,0.01
41,2615.206869,2615.206543,2610.493995,2610.493896,41,51.13909,51.092993,0.685046,100,relu,nadam,0.002
309,2614.272288,2614.272705,2609.577266,2609.577148,309,51.129959,51.08402,0.298626,100,tanh,sgd,0.01
222,2615.878739,2615.878174,2611.163881,2611.163818,222,51.145656,51.099548,1.040879,150,relu,nadam,0.002
284,2616.416941,2616.416748,2611.699417,2611.699463,284,51.150921,51.104789,0.380148,50,relu,nadam,0.002
252,2614.113117,2614.113281,2609.399095,2609.39917,252,51.1284,51.082278,0.382913,150,sigmoid,adam,0.001


In [35]:
# Create a plotting function to pass to the interact widget function
def plot_validation_loss(optimizer = sm_sl_df.optimizer.unique(), 
                    activation = sm_sl_df.activation.unique()):
    
    # Subset the baseline df by the specified optimizer and activation
    sub_df = sm_sl_df[sm_sl_df.optimizer.str.match(optimizer)]
    sub_df = sub_df[sub_df.activation.str.match(activation)]
     
    # Group the neural net data by optimizer and activation
    groups = sub_df.groupby(['hunits'])
    fig, axes = plt.subplots(1, 2, figsize=(15, 10))
    axes = axes.flatten()
    
    # Loop over the grouped data and plot out epoch timing and validation loss data
    for name, group in groups:
        axes[0].scatter(group.epoch, group.val_RMSE, label=str(name)+' Validation Loss')
    #     axes[0].scatter(group.epoch, group.RMSE, label=' '.join(name)+' Training Loss')
        axes[1].scatter(group.epoch, group.times*1000, label=str(name)+' Fit Time')
        axes[0].set_xlabel('Epoch')
        axes[0].set_ylabel('Root Mean Square Error')
        axes[1].set_xlabel('Epoch')
        axes[1].set_ylabel('Fit Time (milliseconds)')
        axes[0].set_ylim([0,sub_df.val_RMSE.max()])
        axes[1].set_ylim([0,1000])
        axes[0].legend()
        axes[1].legend()
        axes[0].set_title("{} Optimizer and {} Activation".format(group.optimizer.unique(), group.activation.unique()))
    
    # Adjust the spacing of the subplots
    fig.subplots_adjust(left=0.03, right=0.97, hspace=0.1, wspace=0.15)

    # Add an overarching title for these plots
    fig.suptitle("Performance Comparison for Single Layer, Fully Connected Neural Nets",
                 fontsize=18, y=0.93)

#     # Print out the table of data for viewing
#     print(sub_df)
interact_manual(plot_validation_loss)
print()

interactive(children=(Dropdown(description='optimizer', options=('adam', 'sgd', 'nadam', 'adagrad'), value='ad…




In [38]:
# Load the pkled dataframe for the baseline single layer neural net
relu_sl_df = pd.read_pickle("OutputData/single_layer_relu_df.pkl")
relu_sl_df.sample(10)

Unnamed: 0,loss,mean_squared_error,val_loss,val_mean_squared_error,epoch,RMSE,val_RMSE,times,hunits,activation,optimizer,lrate
209,693.430185,693.430115,691.275398,691.275391,209,26.333061,26.292117,0.312216,100,tanh,adam,0.001
294,693.777985,693.777954,692.507681,692.507629,294,26.339665,26.31554,0.329013,150,sigmoid,sgd,0.01
239,921.213388,921.213501,919.227193,919.227112,239,30.351499,30.318758,0.261787,50,sigmoid,adagrad,0.01
62,285.910883,285.910858,286.802552,286.802521,62,16.908899,16.935245,0.383565,150,sigmoid,adam,0.001
170,1108.915862,1108.916016,1104.156994,1104.156982,170,33.300391,33.228858,0.422945,50,sigmoid,nadam,0.002
75,640.055802,640.055786,639.210816,639.210754,75,25.299324,25.282618,0.484436,100,sigmoid,adagrad,0.01
256,693.432417,693.432373,691.271096,691.271057,256,26.333104,26.292034,0.310465,100,tanh,adam,0.001
261,487.449163,487.449127,486.786071,486.786011,261,22.07825,22.063228,0.760776,200,tanh,adagrad,0.01
120,1107.811088,1107.811279,1103.192954,1103.192993,120,33.283799,33.214349,0.37664,150,tanh,adam,0.001
223,466.308174,466.308167,464.690888,464.690857,223,21.59417,21.556689,0.36067,50,tanh,nadam,0.002


In [39]:
# Create a plotting function to pass to the interact widget function
def plot_validation_loss(optimizer = relu_sl_df.optimizer.unique(), 
                    activation = relu_sl_df.activation.unique()):
    
    # Subset the baseline df by the specified optimizer and activation
    sub_df = relu_sl_df[relu_sl_df.optimizer.str.match(optimizer)]
    sub_df = sub_df[sub_df.activation.str.match(activation)]
     
    # Group the neural net data by optimizer and activation
    groups = sub_df.groupby(['hunits'])
    fig, axes = plt.subplots(1, 2, figsize=(15, 10))
    axes = axes.flatten()
    
    # Loop over the grouped data and plot out epoch timing and validation loss data
    for name, group in groups:
        axes[0].scatter(group.epoch, group.val_RMSE, label=str(name)+' Validation Loss')
    #     axes[0].scatter(group.epoch, group.RMSE, label=' '.join(name)+' Training Loss')
        axes[1].scatter(group.epoch, group.times*1000, label=str(name)+' Fit Time')
        axes[0].set_xlabel('Epoch')
        axes[0].set_ylabel('Root Mean Square Error')
        axes[1].set_xlabel('Epoch')
        axes[1].set_ylabel('Fit Time (milliseconds)')
        axes[0].set_ylim([0,sub_df.val_RMSE.max()])
        axes[1].set_ylim([0,1000])
        axes[0].legend()
        axes[1].legend()
        axes[0].set_title("{} Optimizer and {} Activation".format(group.optimizer.unique(), group.activation.unique()))
    
    # Adjust the spacing of the subplots
    fig.subplots_adjust(left=0.03, right=0.97, hspace=0.1, wspace=0.15)

    # Add an overarching title for these plots
    fig.suptitle("Performance Comparison for Single Layer, Fully Connected Neural Nets",
                 fontsize=18, y=0.93)

#     # Print out the table of data for viewing
#     print(sub_df)
interact_manual(plot_validation_loss)
print()

interactive(children=(Dropdown(description='optimizer', options=('adam', 'sgd', 'nadam', 'adagrad'), value='ad…




In [42]:
relu_sl_df.groupby('optimizer').val_RMSE.min()

optimizer
adagrad    22.046067
adam        3.142767
nadam      13.423687
sgd         7.542013
Name: val_RMSE, dtype: float64

### Single Layer Assessment
Based on the three different experiments run, we will use a RELU final layer activation function. We will continue to assess adagrad and adam optimizers. We likely do not need to train for more than 200 epochs or so to get reasonably converged nets.

In [40]:
# Load the pkled dataframe for the baseline single layer neural net
relu_sl_lr_df = pd.read_pickle("OutputData/single_layer_relu_lr_df.pkl")
relu_sl_lr_df.sample(10)

Unnamed: 0,loss,mean_squared_error,val_loss,val_mean_squared_error,epoch,RMSE,val_RMSE,times,hunits,activation,optimizer,lrate
20,580.891349,580.891296,580.287992,580.287964,20,24.101687,24.089167,0.297721,50,tanh,adagrad_005,0.05
3,187.435284,187.435287,185.993254,185.99324,3,13.690701,13.637934,0.526855,100,tanh,adagrad_005,0.05
161,814.482202,814.482361,813.40233,813.402344,161,28.539137,28.520209,0.256438,50,tanh,adagrad_02,0.02
160,135.214011,135.214005,133.959131,133.959122,160,11.628156,11.574071,0.818475,200,tanh,adagrad_005,0.05
31,240.97226,240.972244,240.499103,240.4991,31,15.523281,15.508033,0.493412,100,tanh,adagrad_02,0.02
22,35.179296,35.179291,35.044818,35.044819,22,5.931213,5.919866,0.453727,150,tanh,adam_005,0.005
72,815.030874,815.030945,813.811062,813.811035,72,28.548747,28.527373,0.261414,50,tanh,adagrad_02,0.02
181,35.293242,35.293247,35.034047,35.034046,181,5.940812,5.918957,0.393565,150,tanh,adam_005,0.005
9,152.25199,152.251999,151.503053,151.503036,9,12.339044,12.308657,0.600839,150,tanh,adagrad_02,0.02
56,680.404938,680.404968,675.654405,675.654419,56,26.084573,25.993353,0.200003,50,tanh,adam_0005,0.0005


In [41]:
# Create a plotting function to pass to the interact widget function
def plot_validation_loss(optimizer = relu_sl_lr_df.optimizer.unique(), 
                    activation = relu_sl_lr_df.activation.unique()):
    
    # Subset the baseline df by the specified optimizer and activation
    sub_df = relu_sl_lr_df[relu_sl_lr_df.optimizer.str.match(optimizer)]
    sub_df = sub_df[sub_df.activation.str.match(activation)]
     
    # Group the neural net data by optimizer and activation
    groups = sub_df.groupby(['hunits'])
    fig, axes = plt.subplots(1, 2, figsize=(15, 10))
    axes = axes.flatten()
    
    # Loop over the grouped data and plot out epoch timing and validation loss data
    for name, group in groups:
        axes[0].scatter(group.epoch, group.val_RMSE, label=str(name)+' Validation Loss')
    #     axes[0].scatter(group.epoch, group.RMSE, label=' '.join(name)+' Training Loss')
        axes[1].scatter(group.epoch, group.times*1000, label=str(name)+' Fit Time')
        axes[0].set_xlabel('Epoch')
        axes[0].set_ylabel('Root Mean Square Error')
        axes[1].set_xlabel('Epoch')
        axes[1].set_ylabel('Fit Time (milliseconds)')
        axes[0].set_ylim([0,sub_df.val_RMSE.max()])
        axes[1].set_ylim([0,1000])
        axes[0].legend()
        axes[1].legend()
        axes[0].set_title("{} Optimizer and {} Activation".format(group.optimizer.unique(), group.activation.unique()))
    
    # Adjust the spacing of the subplots
    fig.subplots_adjust(left=0.03, right=0.97, hspace=0.1, wspace=0.15)

    # Add an overarching title for these plots
    fig.suptitle("Performance Comparison for Single Layer, Fully Connected Neural Nets",
                 fontsize=18, y=0.93)

#     # Print out the table of data for viewing
#     print(sub_df)
interact_manual(plot_validation_loss)
print()

interactive(children=(Dropdown(description='optimizer', options=('adam_005', 'adam_0005', 'adagrad_02', 'adagr…




In [43]:
relu_sl_lr_df.groupby('optimizer').val_RMSE.min()

optimizer
adagrad_005    11.573482
adagrad_02     12.259225
adam_0005      16.874466
adam_005        5.915614
Name: val_RMSE, dtype: float64