In [9]:
import json
import os
import numpy as np
import matplotlib.pyplot as plt

import pylab
import operator

In [10]:
experiment_name = 'baseline_network'
experiment_path = '/home/n1no/Documents/ethz/master_thesis/code/project/results/runs/%s' % experiment_name
plot_output_path = experiment_path + '/plots'

In [11]:
#------------------------------------------------------------------------------------------------------------------------
# cross-validation error statistics
#------------------------------------------------------------------------------------------------------------------------

if not os.path.exists(plot_output_path):
    os.makedirs(plot_output_path)

dir_names = [dir_name for dir_name in os.listdir(experiment_path) if os.path.isdir(experiment_path + '/' + dir_name) and not dir_name == 'plots']
for dir_name in dir_names:
    run_path = experiment_path + '/' + dir_name
    
    # get experiment specification
    dir_spec_list = dir_name.split('_')
    dir_spec_dict = {'model' : dir_spec_list[0]}
    for i in range(1,len(dir_spec_list),2):
        dir_spec_dict[dir_spec_list[i]] = dir_spec_list[i+1]
        
    # format experiment title
    experiment_title = '%s (grid=%s, batch=%s, lr=%s, mom=%s, slice=%s)' % (dir_spec_dict['model'].title(),
                                                    str(dir_spec_dict['grid']),
                                                    str(dir_spec_dict['bs']),
                                                    str(dir_spec_dict['lr']),
                                                    str(dir_spec_dict['mom']),
                                                    str(dir_spec_dict['sl']))
        
    # load data
    with open(run_path + '/experiment_info.json') as json_data:
        experiment_information = json.load(json_data)
    run_times = np.load(run_path + '/run_times.dat')
    epoch_times = np.load(run_path + '/epoch_times.dat')
    train_times = np.load(run_path + '/train_times.dat')
    test_times = np.load(run_path + '/test_times.dat')
    error_statistic = np.load(run_path + '/error_statistic.dat')
    skip_statistic = np.load(run_path + '/skip_statistic.dat')
        
    # shape of error statistics: dim_0=Runs, dim_1=Epochs, dim_2=[train_err, test_err]
    runs, epochs, _ = error_statistic.shape

    x=[i+1 for i in range(epochs)]

    fig, axes = plt.subplots(2, sharey=True, figsize=(20, 20))
    for run in range(runs):
        axes[0].plot(x, error_statistic[run,:,0], alpha=0.5, label='Run %s' % str(run))
        axes[1].plot(x, error_statistic[run,:,1], alpha=0.5, label='Run %s' % str(run))
    axes[0].plot(x, np.mean(error_statistic[:,:,0], axis=0), 'r--', label='Mean')
    axes[1].plot(x, np.mean(error_statistic[:,:,1], axis=0), 'r--', label='Mean')

    axes[1].set_xlabel('Epoch', fontsize=16)

    axes[0].set_title('Train Error', fontsize=16)
    axes[1].set_title('Test Error', fontsize=16)
    for i in range(2):
        axes[i].set_xticks(x)
        axes[i].set_ylabel('RMSE [°C]', fontsize=16)
        axes[i].grid('On')
        axes[i].legend()
        
    axes[0].text(1, 0.35, 'Min. Mean Error: %s' % "{:2.3f}".format(np.min(np.mean(error_statistic[:,:,0], axis=0))), fontsize=18, fontweight='bold')
    axes[1].text(1, 0.35, 'Min. Mean Error: %s' % "{:2.3f}".format(np.min(np.mean(error_statistic[:,:,1], axis=0))), fontsize=18, fontweight='bold')
    axes[0].text(1, 0.25, 'Min. Error: %s' % "{:2.3f}".format(np.min(error_statistic[:,:,0])), fontsize=18)
    axes[1].text(1, 0.25, 'Min. Error: %s' % "{:2.3f}".format(np.min(error_statistic[:,:,1])), fontsize=18)

    plt.ylim(0,1.2*np.max(error_statistic[:,:,0]))
    plt.suptitle(experiment_title, fontsize=20)
    fig.tight_layout(rect=[0, 0.03, 1, 0.96], h_pad=4)
    fig.savefig(plot_output_path + '/%s.png' % dir_name)
    plt.close()

In [None]:
skip_statistic

In [16]:
#------------------------------------------------------------------------------------------------------------------------
# skipped samples statistics due to corrupted measurement stations
#------------------------------------------------------------------------------------------------------------------------

# skip statistics data format: [processed_train_samples,skipped_train_samples,processed_test_samples,skipped_test_samples]
if not os.path.exists(plot_output_path):
    os.makedirs(plot_output_path)

dir_names = [dir_name for dir_name in os.listdir(experiment_path) if os.path.isdir(experiment_path + '/' + dir_name) and not dir_name == 'plots']
dir_names.sort()

N = len(dir_names)
data = np.zeros((4, N))
experiment_title = []

# for scatter of error to skip rate
train_error_skip = []
test_error_skip = []

fig = plt.figure(figsize=(20,10))
for idx, dir_name in enumerate(dir_names):
    run_path = experiment_path + '/' + dir_name
    
    # get experiment specification
    dir_spec_list = dir_name.split('_')
    dir_spec_dict = {'model' : dir_spec_list[0]}
    for i in range(1,len(dir_spec_list),2):
        dir_spec_dict[dir_spec_list[i]] = dir_spec_list[i+1]
        
    # format experiment title
    experiment_title += ['%s\n(grid=%s, bs=%s,\nlr=%s,\nmom=%s,\nsl=%s)' % (dir_spec_dict['model'].title(),
                                                    str(dir_spec_dict['grid']),
                                                    str(dir_spec_dict['bs']),
                                                    str(dir_spec_dict['lr']),
                                                    str(dir_spec_dict['mom']),
                                                    str(dir_spec_dict['sl']))]

    error_statistic = np.load(run_path + '/error_statistic.dat')
    skip_statistic = np.load(run_path + '/skip_statistic.dat')
        
    # shape of error statistics: dim_0=Runs, dim_1=Epochs, dim_2=[train_err, test_err]
    runs, epochs, _ = skip_statistic.shape
    
    data[:,idx] = np.mean(skip_statistic, axis=(0,1))
    
    mean_per_run = np.mean(skip_statistic, axis=1)
    std_per_run = np.std(skip_statistic, axis=1)
    for run in range(mean_per_run.shape[0]):
        print('Run %s' % run )
        print('MEAN: Processed Train:\t%s, Skipped Train:\t%s, Processed Test:\t%s, Skipped Test:\t%s' % tuple(["{:6.0f}".format(x) for x in mean_per_run[run]]))
        print('STD:  Processed Train:\t%s, Skipped Train:\t%s, Processed Test:\t%s, Skipped Test:\t%s\n' % tuple(["{:6.0f}".format(x) for x in std_per_run[run]]))

    # gather (error, skip) tuples for all experiments and runs to scatter them
    train_error_skip += list(zip(np.mean(error_statistic[:,:,0],axis=1), np.mean(skip_statistic[:,:,1],axis=1)))
    test_error_skip += list(zip(np.mean(error_statistic[:,:,1],axis=1), np.mean(skip_statistic[:,:,3],axis=1)))

ind = np.arange(N)    # the x locations for the groups
width = 0.35       # the width of the bars: can also be len(x) sequence

p1 = plt.bar(ind, data[0,:], width)
p2 = plt.bar(ind, data[1,:], width, bottom=data[0,:])
p3 = plt.bar(ind, data[2,:], width, bottom=np.sum(data[0:2,:], axis=0))
p4 = plt.bar(ind, data[3,:], width, bottom=np.sum(data[0:3,:], axis=0))

plt.ylabel('Samples')
plt.title('Processed / Skipped Samples', fontsize=22)
plt.xticks(ind, experiment_title)
plt.xlim((-0.5, np.max(ind) + 1))
plt.legend((p1[0], p2[0], p3[0], p4[0]), ('Processed Train', 'Skipped Train', 'Processed Test', 'Skipped Test'), loc='upper right')
fig.savefig(plot_output_path + '/skip_sample_statistic.png')
plt.close()

fig = plt.figure(figsize=(20,10))
plt.ylabel('Skipped Samples')
plt.xlabel('Error [°C]')
plt.title('Train Error vs. Skipped Samples', fontsize=22)
plt.scatter(list(list(zip(*train_error_skip))[0]), list(list(zip(*train_error_skip))[1]))
fig.savefig(plot_output_path + '/skip_to_train_error.png')
plt.close()

fig = plt.figure(figsize=(20,10))
plt.ylabel('Skipped Samples')
plt.xlabel('Error [°C]')
plt.title('Test Error vs. Skipped Samples', fontsize=22)
plt.scatter(list(list(zip(*test_error_skip))[0]), list(list(zip(*test_error_skip))[1]))
fig.savefig(plot_output_path + '/skip_to_test_error.png')
plt.close()

Run 0
MEAN: Processed Train:	354216, Skipped Train:	 44664, Processed Test:	 71088, Skipped Test:	  8832
STD:  Processed Train:	   289, Skipped Train:	   289, Processed Test:	     0, Skipped Test:	     0

Run 1
MEAN: Processed Train:	322820, Skipped Train:	 35596, Processed Test:	 69584, Skipped Test:	 10336
STD:  Processed Train:	   229, Skipped Train:	   229, Processed Test:	     0, Skipped Test:	     0

Run 2
MEAN: Processed Train:	320826, Skipped Train:	 42630, Processed Test:	 71024, Skipped Test:	  8896
STD:  Processed Train:	   293, Skipped Train:	   293, Processed Test:	     0, Skipped Test:	     0

Run 0
MEAN: Processed Train:	312224, Skipped Train:	 40288, Processed Test:	 70160, Skipped Test:	  9760
STD:  Processed Train:	   359, Skipped Train:	   359, Processed Test:	     0, Skipped Test:	     0

Run 1
MEAN: Processed Train:	301018, Skipped Train:	 41126, Processed Test:	 70960, Skipped Test:	  8960
STD:  Processed Train:	   325, Skipped Train:	   325, Processed Test:	     

In [8]:
list(list(zip(*train_error_skip))[0])

[3.394809613820629, 3.705718831132419]