In [1]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure, output_notebook, show
from bokeh.palettes import Category10_10
from bokeh.models import LinearAxis, Range1d

In [2]:
def show_wide_lines(lines, legends, title, height=200, width=800):
    output_notebook()
    p = figure(title=title, plot_width=width, plot_height=height)
    for i, (line, legend) in enumerate(zip(lines, legends)):
        p.line(x=range(len(line)), y=line, color=Category10_10[i], legend=legend)
    show(p)

In [3]:
# df = pd.read_csv('results/0d90126cf4364e20bf9791647ce8388f.csv') # 1500 epoch
# df = pd.read_csv('results/76efa25e8dc344c99d9dc9c0a22ebb19.csv') #2 - 500 epoch
# df = pd.read_csv('results/fae6349bab72457192fc46201e5ef10e.csv') #3 - 1000 epoch
df = pd.read_csv('results/4f1ea36bd1a240efaa22a782fd665afa.csv') #4 - 1000 epoch

In [4]:
df.head()

Unnamed: 0,epoch,acc,loss,val_acc,val_loss
0,0,0.43468,1.54805,0.5727,1.196283
1,1,0.59358,1.142452,0.6697,0.937697
2,2,0.6621,0.96444,0.7031,0.84397
3,3,0.69734,0.86032,0.7284,0.788456
4,4,0.7224,0.792648,0.7518,0.712628


In [5]:
print('The model had it\'s best performance in epoch: {}.'.format(df.val_acc.idxmax()))

The model had it's best performance in epoch: 821.


In [6]:
show_wide_lines([df.acc, df.val_acc], ['train', 'test'], 'Accuracy - train vs test')

In [7]:
show_wide_lines([df.loss, df.val_loss], ['train', 'test'], 'Loss - train vs test')

In [8]:
def get_max_acc(early_stop_th):
    max_acc = 0
    acc_age = 0
    improovement = 0
    for epoch, acc in enumerate(df.val_acc):
        if acc > max_acc:
            improovement = acc - max_acc
            max_acc = acc
            acc_age = 0
        else:
            acc_age += 1
        if acc_age > early_stop_th:
            break
    return max_acc, improovement
    

stop_accs = [get_max_acc(early_stop_th)[0] for early_stop_th in range(1000)]
improovements = [get_max_acc(early_stop_th)[1] for early_stop_th in range(1000)]
improovements, improovements_indices = np.unique(improovements, return_index=True)

In [9]:
print('The optimal early stopping threshold is {} epoch.'.format(np.argmax(stop_accs)))

The optimal early stopping threshold is 411 epoch.


In [10]:
width = 800
height = 600
output_notebook()
p = figure(title='Validation accuracy vs early stopping threshold', plot_width=width, plot_height=height)
p.line(x=range(len(stop_accs)), y=stop_accs, legend='Final accuracy')
p.extra_y_ranges = {'imp': Range1d(0, 0.03)}
p.vbar(x=improovements_indices, top=improovements, width=0.9, color='red', y_range_name='imp', legend='Improovement (right)')
p.add_layout(LinearAxis(y_range_name="imp"), 'right')
p.yaxis.axis_label = 'Accuracy'
p.xaxis.axis_label = 'Early stopping threshold'
show(p)

In [11]:
print("Improovements: {}".format(improovements))

Improovements: [0.0001 0.0004 0.0008 0.0016 0.0023 0.0043 0.0049 0.0064 0.0234]


In [12]:
improovements_indices

array([411,  19, 189,  45,   1,   2,   8,  28,   0])