I need to plot the training vs testing error as a function of time. We will define the error as 
 $$ MSE(x_{ s \leq t \leq T}, \tilde{x}^s_{ s \leq t \leq T}) $$
where $\tilde{x}^{s}_{x \leq t \leq T}$ is the time series of the prediction made using the forced initial value problem. We will use the first 40 days of the simulation as a training set, and the second 40 as a testing set.

The expected prediction error will be taken over all points in the testing dataset.

In [None]:
%matplotlib inline
import seaborn as sns

sns.set_style('whitegrid')

In [None]:
import json, glob, os, re
from toolz import merge
import pandas as pd

In [None]:
def open_json(path, **kwargs):
    d = json.load(open(path))
    
    metadata = merge(d['args'], kwargs)
    metadata['path'] = path
    metadata['nhidden'] = metadata['nhidden'][0]
    
    df = pd.DataFrame(d['training']).assign(**metadata)
    return df

def open_model(path, **kwargs):
    seeds = glob.glob(path + "/*.json")
    for seed in seeds:
        i = os.path.splitext(os.path.basename(seed))[0]
        yield open_json(seed, seed=i, **kwargs)


def _open_jsons(path):
    models = glob.glob(path + "/model.*")
    models = filter(os.path.isdir, models)
    for model in models:
        m = re.search(r"model\.(.+)", os.path.basename(model))
        yield from open_model(model, model=m.group(1))
    
    
def open_jsons(path):
    return pd.concat(_open_jsons(path), axis=0)
    

In [None]:
df = open_jsons("../data/output/")

# fill in Na to training loss when batch = 0
df.loc[df.batch == 0, 'train_loss'] = np.NaN
# only keep a subet of the variables
variables = ['nhidden', 'window_size', 'test_loss', 'train_loss', 'epoch', 'batch', 'seed']
df = df[variables]

df[variables].head()

The test error decreases strongly based on the number of trainin batches. Unfortunately, this is not a fair measure, because the total number of training steps is much smaller between samples.

Median loss statistics

# Sensitivity to hyper parameters

In [None]:
# Get vary num hidden experiment
nhid = df[df.window_size == 10]
vt = df[df.nhidden == 128]

In [None]:
nhid[(nhid.test_loss > 200) & (nhid.epoch > 1)]

It looks like seed 6 of the 256 hidden layer experiment is very far off, so let's just remove it. Also, the window_size = 2 experiment does not converge at all.

In [None]:
nhid = nhid[~ ((nhid.seed == '6') & (nhid.nhidden == 256))]
vt = vt[vt.window_size != 2]

Here, lets plot the distirbution of the test errors

In [None]:
fig, (axn, axt) = plt.subplots(1, 2, sharey=True, figsize=(7,3),
                              gridspec_kw=dict(width_ratios=(.66, .34), wspace=0.0))

sns.boxplot(x="epoch", y="test_loss", hue="nhidden", data=nhid, ax=axn)
sns.boxplot(x="epoch", y="test_loss", hue="window_size", data=vt, ax=axt)

axn.legend(loc="lower left", ncol=2, title="Hidden Nodes")

axt.set_ylabel('')
axn.set_ylabel('Test Error')

axt.set_ylim([75, 200])

axt.set_xlabel("Epoch")
# plt.ylim([100, 200])

This plot is a little busy, so it is probably better to just the performance over the last 4 epochs.

In [None]:
fig, (axn, axt) = plt.subplots(1, 2, figsize=(6, 6/1.62), sharey=True,
                              gridspec_kw=dict(width_ratios=(.67, .33), wspace=0))

kws = dict(capsize=.2, join=True)

sns.pointplot(x="nhidden", y="test_loss", data=nhid[nhid.epoch > 2], ax=axn, **kws)
sns.pointplot(x="window_size", y="test_loss", data=vt[vt.epoch>2], ax=axt, **kws)

plt.ylim([120, 155])
axt.set_ylabel('')
axn.set_ylabel('Error')

axn.set_xlabel("Hidden Nodes")
axt.set_xlabel("Window Size")
fig.suptitle("Test error for last 4 epochs")

# Median Training Error

I do not show the testing/training loss after a number of steps in the analysis above because it is kind of noisy. Here I will plot the median training and testing errors for each epoch.

In [None]:
def plot_train_test(val, axtrain, axtest, **kwargs):
    stats = val.groupby('epoch').median()
    stats.test_loss.plot(ax=axtest, **kwargs)
    stats.train_loss.plot(ax=axtrain, **kwargs)
    

legend_box = (1.0, .7)
fig, (axtrain, axtest) = plt.subplots(1, 2, figsize=(6, 3))


kws = dict(marker='s')

lines_nhid = []
lines_vt  =[] 

alpha = .2
for n, val in nhid.groupby('nhidden'):
    plot_train_test(val, axtrain, axtest, color='b', alpha=alpha, label=n, **kws)
    lines_nhid.append(n)
    alpha += .2
    
    

leg1 = plt.legend(axtest.get_lines(), lines_nhid, title='Hidden Nodes',
                  loc="upper left", bbox_to_anchor=legend_box)
    
alpha = .5
for T, val in vt.groupby('window_size'):
    plot_train_test(val, axtrain, axtest, color='r', label=T, alpha=alpha, **kws)
    lines_vt.append(T)
    alpha += .5
    
axtest.add_artist(leg1)


leg2 = plt.legend(axtest.get_lines()[-2:], lines_vt, title='Window Size',
                  loc="lower left", bbox_to_anchor=legend_box)


axtest.set_ylim([120, 160])


# labels
axtrain.set_title('Training Loss')
axtest.set_title('Median Test Error')
    
# axtest.legend(title='Window Size')