In [1]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure, output_notebook, show
from bokeh.palettes import Category10_10
from bokeh.models import LinearAxis, Range1d
import holoviews as hv
import warnings
warnings.filterwarnings('ignore')
import os
os.chdir('../')

Uncomment a line and run the rest of notebook for the analysis

In [43]:
# df = pd.read_csv('results/0d90126cf4364e20bf9791647ce8388f.csv') # 1500 epoch
# df = pd.read_csv('results/76efa25e8dc344c99d9dc9c0a22ebb19.csv') #2 - 500 epoch
# df = pd.read_csv('results/fae6349bab72457192fc46201e5ef10e.csv') #3 - 1000 epoch
df = pd.read_csv('results/4f1ea36bd1a240efaa22a782fd665afa.csv') #4 - 1000 epoch

In [44]:
print('The model had it\'s best performance in epoch: {}.'.format(df.val_acc.idxmax()))

The model had it's best performance in epoch: 821.


In [45]:
hv.extension('matplotlib')
(hv.Curve(df, 'epoch', 'acc', label='train') * hv.Curve(df, 'epoch', 'val_acc', label='test')).opts(aspect=6, fig_size=800, ylim=(0.7, 1), title='Accuracy - train vs test')

In [46]:
hv.extension('matplotlib')
(hv.Curve(df, 'epoch', 'loss', label='train') * hv.Curve(df, 'epoch', 'val_loss', label='test')).opts(aspect=6, fig_size=800, title='Loss - train vs test')

In [47]:
def get_max_acc(early_stop_th):
    max_acc = 0
    acc_age = 0
    improovement = 0
    for epoch, acc in enumerate(df.val_acc):
        if acc > max_acc:
            improovement = acc - max_acc
            max_acc = acc
            acc_age = 0
        else:
            acc_age += 1
        if acc_age > early_stop_th:
            break
    return max_acc, improovement
    

stop_accs = [get_max_acc(early_stop_th)[0] for early_stop_th in range(1000)]
improovements = [get_max_acc(early_stop_th)[1] for early_stop_th in range(1000)]
improovements, improovements_indices = np.unique(improovements, return_index=True)

In [48]:
print('The optimal early stopping threshold is {} epoch.'.format(np.argmax(stop_accs)))

The optimal early stopping threshold is 411 epoch.


In [49]:
width = 800
height = 600
output_notebook()
p = figure(title='Validation accuracy vs early stopping threshold', plot_width=width, plot_height=height)
p.line(x=range(len(stop_accs)), y=stop_accs, legend='Final accuracy')
p.extra_y_ranges = {'imp': Range1d(0, 0.03)}
p.vbar(x=improovements_indices, top=improovements, width=0.9, color='red', y_range_name='imp', legend='Improovement (right)')
p.add_layout(LinearAxis(y_range_name="imp"), 'right')
p.yaxis.axis_label = 'Accuracy'
p.xaxis.axis_label = 'Early stopping threshold'
show(p)

In [52]:
hv.extension('matplotlib')
(hv.Curve(stop_accs, 'early stopping threshold', 'accuracy') + 
 hv.Bars(pd.DataFrame({'early stopping threshold': improovements_indices, 'accuracy improovement': improovements}).sort_values(by=['early stopping threshold']))).opts(title='Validation accuracy vs early stopping threshold')

In [51]:
print("Improovements: {}".format(improovements))

Improovements: [0.0001 0.0004 0.0008 0.0016 0.0023 0.0043 0.0049 0.0064 0.0234]


In [53]:
pd.DataFrame({'early stopping threshold': improovements_indices, 'accuracy improovement': improovements},).sort_values(by=['early stopping threshold'])

Unnamed: 0,early stopping threshold,accuracy improovement
8,0,0.0234
4,1,0.0023
5,2,0.0043
6,8,0.0049
1,19,0.0004
7,28,0.0064
3,45,0.0016
2,189,0.0008
0,411,0.0001
