# Notes

Based on feedback:

* Better not to compute mean values on the three configurations
* Not really meaningful to examine the MAE and validation MAE of the last epoch


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import pickle

In [None]:
from ipypublish import nb_setup
# https://stackoverflow.com/a/39566040/11552622
rcparams = {
    'axes.titlesize':13,
    'axes.labelsize':9,
    'xtick.labelsize':8,
    'ytick.labelsize':8
}
plt = nb_setup.setup_matplotlib(rcparams=rcparams)
pd = nb_setup.setup_pandas()
sns.set(style='whitegrid')

# Load data, setup code

In [None]:
df = pd.read_csv('data/dnn_stats.csv')
df

In [None]:
avg = pd.read_csv('data/dnn_stats_avg.csv')
avg

In [None]:
with open('data/dnn_stats_data.bin', 'rb') as f:
    data = pickle.load(f)

In [None]:
def plot_statistics(data, configs, rpms, display_values, fun_descr):
    nrow = len(configs)
    ncol = len(rpms)
    fig, axgrid = plt.subplots(nrow, ncol, figsize=(2.8*ncol + .5, 1.75*nrow + .5),
                              sharey='row', sharex='col')
    
    for i, (axes, config) in enumerate(zip(axgrid, configs)):
        # Plot data
        last_val = {val: [] for val in display_values}
        for rpm, a in zip(rpms, axes):
            key = (rpm, ) + config
            a.plot(data[key]['loss'], label='loss', lw=.5)
            a.plot(data[key]['val_loss'], label='val loss', lw=.5)
            # Extract last values
            for val in display_values:
                last_val[val].append(data[key][val][-1])
            #last_val['loss'].append(data[key]['loss'])
        
        
        
        # X-Label management
        # First row vs not
        if i == 0:
            axes[0].legend()
            for rpm, a in zip(rpms, axes): a.set_title(f'{rpm} RPM')
        # Last row vs not
        #if i < nrow-1:
        #    for a in axes: a.set_xticklabels([])
        #else:
        #    for a in axes: a.set_xlabel('Epochs')
        
        # Y-label management
        axes[0].set_ylabel('Loss')
        #axes[-1].yaxis.set_label_position('right')
        #axes[-1].yaxis.tick_right()
        
        #fig.align_ylabels(axes)
        
        # Text management - Compute mean values, display
        text = '\n'.join([
            'mean ' + val.replace("_", " ") + ': ' + \
            '{:.3}'.format(np.mean(last_val[val]))
            for val in display_values
        ])
        text = '\\textbf{'+fun_descr(config)+'}' + '\n' + text
        axes[0].text(-1.5, .5, text, transform=axes[0].transAxes, 
                     verticalalignment='center')
    
    for a in axgrid[-1]: a.set_xlabel('Epochs')
    # General plot properties
    fig.subplots_adjust(wspace=.05, hspace=.05)
    plt.tight_layout()

In [None]:
def df2config(df):
    return [
        (l, n, c) for l,n,c in zip(df.hlayers, df.neurons, df.aks)
    ]

## Example


In [None]:
configs = [(2, 64, 40),
           (4, 32, 40),
           (2, 32, 40)]
fun_descr = lambda config: f'{config[0]} layers, {config[1]} neurons'
plot_statistics(data, configs, [4000, 5000, 6000], 
                ['loss', 'val_loss', 'mae', 'val_mae'], fun_descr)
#plt.savefig('figs/dnn_statistics_sorted_40aks.eps', bbox_inches='tight')

# Best architectures - 50 aks

## Mean values

1. We have 312 configurations in total, determined by the number of hidden layers (L), the number of neurons (N), the number of Fourier coefficients (C) and the pump speed (RPM). 

1. A DNN was trained for each configuration, losses were extracted

1. We computed the mean values among the different pump speeds

1. This yields 104 DNN architectures:

In [None]:
avg.iloc[np.r_[0:5, -5:0], :].rename(columns={col: col.replace('_', ' ') for col in avg.columns})

We then constrain the number of Fourier coefficients, according to the value that allows a physiologically meaningful reconstruction of the DNN input signals. We choose $K=50$ and we sort the architectures by the validation MAE (mean absolute error):

In [None]:
sub = avg[avg.aks == 50].copy()
sub.sort_values('val_mae', inplace=True, ignore_index=True)
sub.head()

In [None]:
sub.rename(columns={col: col.replace('_', ' ') for col in avg.columns})

In [None]:
plt.figure(figsize=(10, 3))
plt.plot(sub.index, sub.val_mae, '-o', label='val mae')
plt.plot(sub.index, sub.mae, '--o', label='mae')
plt.ylabel('MAE')
plt.xlabel('Index in Table 1.2')
plt.xticks(sub.index)
plt.legend();

In [None]:
fun_descr = lambda config: f'L={config[0]}, N={config[1]}, C={config[2]}'
plot_statistics(data, [(1,8,50),(1,16,50),(1,32,50),(1,64,50)], [4000, 5000, 6000], 
                ['loss', 'val_loss', 'mae', 'val_mae'], fun_descr)

In [None]:
conf = df2config(sub.head(7))
conf

In [None]:
fun_descr = lambda config: f'L={config[0]}, N={config[1]}, C={config[2]}'
plot_statistics(data, conf, [4000, 5000, 6000], 
                ['loss', 'val_loss', 'mae', 'val_mae'], fun_descr)

In [None]:
sub

In [None]:
def plot_loss_comparison(df):
    fig, axes = plt.subplots(3, 2, figsize=(8, 6), sharex=True, sharey=True)
    axes = axes.ravel()
    hlayers = sorted(df.hlayers.unique())
    
    for L, ax in zip(hlayers, axes):
        tmp = df[df.hlayers == L].sort_values('neurons')
        ax.plot(tmp.neurons, tmp.val_loss, '-o', label='val loss')
        ax.plot(tmp.neurons, tmp.loss, '--o', label='loss')
        ax.set_title(fr'$L = {L}$')
    
    axes[0].set_xticks(sorted(df.neurons.unique()))
    axes[-1].set_xlabel('Neurons')
    axes[-2].set_xlabel('Neurons')
    for a in axes[0::2]:
        a.set_ylabel('Loss')
    plt.legend()
    plt.subplots_adjust(hspace=.3)

In [None]:
plot_loss_comparison(sub)

# Best architectures - non-averaged values - 50 aks


In [None]:
sub = df[df.aks == 50].reset_index(drop=True)

In [None]:
df

In [None]:
sub.iloc[[0, 1, 2, 3, 4, -4, -3, -2, -1], :].\
    rename(columns={c: c.replace('_', ' ') for c in df.columns})

## 4000 RPM

In [None]:
N = 16

In [None]:
sub4000 = sub[sub.RPM == 4000].sort_values('val_mae').reset_index(drop=True)
sub4000[:N].\
    rename(columns={c: c.replace('_', ' ') for c in df.columns})

In [None]:
def plot_statistics(data, configs, fun_descr, rpm, ymin=0.005, ymax=.03):
    ncol = 4
    nrow = len(configs) // ncol
    if len(configs) % ncol != 0:
        nrow += 1
    
    fig, axes = plt.subplots(nrow, ncol, figsize=(9, 1.65*nrow + .5),
                             sharex=True, sharey=True)
    
    #Nepochs = len(next(iter(data.values()))['loss'])
    #rng = range(low, Nepochs)
    for a, conf in zip(axes.ravel(), configs):
        key = (rpm, ) + conf
        a.plot(data[key]['loss'], label='loss', lw=.6)
        a.plot(data[key]['val_loss'], label='val loss', lw=.6)
        a.set_title(fun_descr(conf))
    
    for a in axes[-1]:
        a.set_xlabel('Epochs')
        #a.set_xticks([50, 500, 1000])
    for a in axes[:, 0]:
        a.set_ylabel('Loss')
    
    axes[0,0].legend()
    axes[0,0].axis(ymin=ymin, ymax=ymax)
    plt.subplots_adjust(wspace=.1, hspace=.3)
    plt.tight_layout()
    #plt.margins(0, 0)

In [None]:
configs = df2config(sub4000[:N])
descfun = lambda conf: f'$L={conf[0]}, N = {conf[1]}$'
plot_statistics(data, configs, descfun, 4000, ymin=0.005, ymax=0.035)

## 5000 RPM

In [None]:
sub5000 = sub[sub.RPM == 5000].sort_values('val_mae').reset_index(drop=True)
sub5000[:N].\
    rename(columns={c: c.replace('_', ' ') for c in df.columns})

In [None]:
configs = df2config(sub5000[:N])
plot_statistics(data, configs, descfun, 5000)

## 6000 RPM


In [None]:
sub6000 = sub[sub.RPM == 6000].sort_values('val_mae').reset_index(drop=True)
sub6000[:N].\
    rename(columns={c: c.replace('_', ' ') for c in df.columns})

In [None]:
configs = df2config(sub6000[:N])
plot_statistics(data, configs, descfun, 6000, ymin=.004)