In [1]:
import glob

import tensorflow as tf

from tensorflow.python.summary.summary_iterator import summary_iterator
from tensorflow.python.framework import tensor_util

from matplotlib import pyplot as plt
import yaml

import itertools

import numpy as np

import pandas as pd

In [2]:
metrics = ['accuracy_L1', 'accuracy_L12', 'loss', "I_XZ", "I_YZ"]

def event_file(path):
    files = glob.glob(f"{path}/*")
    return files[0]

def read_metrics(path):
    data = dict(zip(metrics,  [[] for x in range(len(metrics))]))
    
    for e in summary_iterator(event_file(path)):
        for v in e.summary.value:
            if v.tag in metrics:
                t = tensor_util.MakeNdarray(v.tensor)
                data[v.tag].append(float(t))
    return data


data = read_metrics("../artifacts-server/mnist--various-opts-M1/vdb-mnist--2019-11-25--17-46-49/logs/train");

Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


In [3]:
def read_model_metrics(model_path, metric, average_n_last_epochs=5):
    with open(f"{model_path}/summary.yml", "r") as fh:
        summary = yaml.safe_load(fh)


    del summary["metrics"]
    del summary["epoch"]
    

    train_res = read_metrics(f"{model_path}/logs/train")
    test_res = read_metrics(f"{model_path}/logs/test")
    
    df = pd.DataFrame(list(range(1, len(train_res["accuracy_L1"])+1)), columns=["epoch"])
    
    for m in metrics:
        df[f"metrics:train:{m}"] = train_res[m]
        df[f"metrics:test:{m}"] = test_res[m]
        
    for k, v in summary.items():
        df[k] = v
        
    summary["_key"] = "M=%d::beta=%1.1e::strategy=%s" % (summary["M"], summary["beta"], summary["strategy"])

    df["metrics:train:error_L1"] =  1 - df["metrics:train:accuracy_L1"]
    df["metrics:test:error_L1"] =  1 - df["metrics:test:accuracy_L1"]
    df["metrics:train:error_L12"] =  1 - df["metrics:train:accuracy_L12"]
    df["metrics:test:error_L12"] =  1 - df["metrics:test:accuracy_L12"]
    
    average_metric = df[f"metrics:{metric}"][(-average_n_last_epochs):].mean()
    summary[metric] = average_metric
    return summary

read_model_metrics(
    "../artifacts-server/mnist--various-opts-M1/vdb-mnist--2019-11-25--17-46-49",
    "test:error_L1",
    average_n_last_epochs=5
)

{'M': 1,
 'beta': 0.001,
 'dataset': 'mnist',
 'lr': 0.0001,
 'model': 'vdb/e1:1024|e2:1024|z:256',
 'strategy': 'seq/d:5|e:1',
 '_key': 'M=1::beta=1.0e-03::strategy=seq/d:5|e:1',
 'test:error_L1': 0.01905980110168457}

In [4]:
def get_models_from_dirs(dirs):
    files = []
    for d in dirs:
        files.extend(glob.glob(f"{d}/*/summary.yml"))
    
    files = list(map(lambda f: "/".join(f.split("/")[:-1]), files))
    print(f"Reading {','.join(dirs)}")
    print(f"we have {len(files)} models")
    return files

In [5]:
model_paths = get_models_from_dirs(["../artifacts-server/mnist--various-opts-M1"])

model_paths

Reading ../artifacts-server/mnist--various-opts-M1
we have 40 models


['../artifacts-server/mnist--various-opts-M1/vdb-mnist--2019-11-25--17-46-49',
 '../artifacts-server/mnist--various-opts-M1/vdb-mnist--2019-11-26--00-40-34',
 '../artifacts-server/mnist--various-opts-M1/vdb-mnist--2019-11-25--17-46-46',
 '../artifacts-server/mnist--various-opts-M1/vdb-mnist--2019-11-26--07-37-55',
 '../artifacts-server/mnist--various-opts-M1/vdb-mnist--2019-11-26--09-30-08-925070',
 '../artifacts-server/mnist--various-opts-M1/vdb-mnist--2019-11-26--12-10-41-823999',
 '../artifacts-server/mnist--various-opts-M1/vdb-mnist--2019-11-25--23-44-36',
 '../artifacts-server/mnist--various-opts-M1/vdb-mnist--2019-11-25--22-49-18',
 '../artifacts-server/mnist--various-opts-M1/vdb-mnist--2019-11-26--09-27-16-207667',
 '../artifacts-server/mnist--various-opts-M1/vdb-mnist--2019-11-26--12-10-36-984242',
 '../artifacts-server/mnist--various-opts-M1/vdb-mnist--2019-11-25--18-28-29',
 '../artifacts-server/mnist--various-opts-M1/vdb-mnist--2019-11-26--07-49-01',
 '../artifacts-server/mn

In [6]:
n_last = 5
metric = "test:error_L12"

In [7]:
def read_stats_from(dirs, metric, n=5):
    print(f"average over {n} last epochs")
    model_paths = get_models_from_dirs(dirs)
    
    df = pd.DataFrame([read_model_metrics(m, metric, average_n_last_epochs=n) for m in model_paths])
    return df

df_mnist_training_stats = read_stats_from(
    ["../artifacts-server/mnist--various-opts-M*"],
    metric=metric,
    n=n_last
)

average over 5 last epochs
Reading ../artifacts-server/mnist--various-opts-M*
we have 160 models


In [8]:
df_mnist_training_stats.groupby("_key")["M"].count()

_key
M=12::beta=1.0e-03::strategy=oneshot         5
M=12::beta=1.0e-03::strategy=seq/d:10|e:1    5
M=12::beta=1.0e-03::strategy=seq/d:20|e:1    5
M=12::beta=1.0e-03::strategy=seq/d:5|e:1     5
M=12::beta=1.0e-05::strategy=oneshot         5
M=12::beta=1.0e-05::strategy=seq/d:10|e:1    5
M=12::beta=1.0e-05::strategy=seq/d:20|e:1    5
M=12::beta=1.0e-05::strategy=seq/d:5|e:1     5
M=1::beta=1.0e-03::strategy=oneshot          5
M=1::beta=1.0e-03::strategy=seq/d:10|e:1     5
M=1::beta=1.0e-03::strategy=seq/d:20|e:1     5
M=1::beta=1.0e-03::strategy=seq/d:5|e:1      5
M=1::beta=1.0e-05::strategy=oneshot          5
M=1::beta=1.0e-05::strategy=seq/d:10|e:1     5
M=1::beta=1.0e-05::strategy=seq/d:20|e:1     5
M=1::beta=1.0e-05::strategy=seq/d:5|e:1      5
M=3::beta=1.0e-03::strategy=oneshot          5
M=3::beta=1.0e-03::strategy=seq/d:10|e:1     5
M=3::beta=1.0e-03::strategy=seq/d:20|e:1     5
M=3::beta=1.0e-03::strategy=seq/d:5|e:1      5
M=3::beta=1.0e-05::strategy=oneshot          5
M=3::bet

In [9]:
df_mnist_training_stats[:10]

Unnamed: 0,M,_key,beta,dataset,lr,model,strategy,test:error_L12
0,3,M=3::beta=1.0e-05::strategy=seq/d:20|e:1,1e-05,mnist,0.0001,vdb/e1:1024|e2:1024|z:256,seq/d:20|e:1,0.0144
1,3,M=3::beta=1.0e-05::strategy=oneshot,1e-05,mnist,0.0001,vdb/e1:1024|e2:1024|z:256,oneshot,0.0138
2,3,M=3::beta=1.0e-05::strategy=seq/d:5|e:1,1e-05,mnist,0.0001,vdb/e1:1024|e2:1024|z:256,seq/d:5|e:1,0.01434
3,3,M=3::beta=1.0e-03::strategy=seq/d:20|e:1,0.001,mnist,0.0001,vdb/e1:1024|e2:1024|z:256,seq/d:20|e:1,0.01384
4,3,M=3::beta=1.0e-03::strategy=seq/d:20|e:1,0.001,mnist,0.0001,vdb/e1:1024|e2:1024|z:256,seq/d:20|e:1,0.01452
5,3,M=3::beta=1.0e-03::strategy=seq/d:10|e:1,0.001,mnist,0.0001,vdb/e1:1024|e2:1024|z:256,seq/d:10|e:1,0.0135
6,3,M=3::beta=1.0e-05::strategy=seq/d:10|e:1,1e-05,mnist,0.0001,vdb/e1:1024|e2:1024|z:256,seq/d:10|e:1,0.01358
7,3,M=3::beta=1.0e-05::strategy=seq/d:20|e:1,1e-05,mnist,0.0001,vdb/e1:1024|e2:1024|z:256,seq/d:20|e:1,0.01336
8,3,M=3::beta=1.0e-03::strategy=seq/d:20|e:1,0.001,mnist,0.0001,vdb/e1:1024|e2:1024|z:256,seq/d:20|e:1,0.01456
9,3,M=3::beta=1.0e-03::strategy=seq/d:5|e:1,0.001,mnist,0.0001,vdb/e1:1024|e2:1024|z:256,seq/d:5|e:1,0.01398


In [10]:
strategies = ['oneshot', 'seq/d:5|e:1', 'seq/d:10|e:1', 'seq/d:20|e:1']
strategies_mapping = dict(zip(strategies, range(len(strategies))))

def print_table(df, beta, metric, should_print=True):
    
    df = df[df.beta == beta]\
        .groupby(["strategy", "M"])[metric]\
        .agg({metric: ['mean', 'std']})\
        .reset_index()
        
    df["temp"] = df[metric][['mean', 'std']].apply(lambda x: "%.2f±%.4f\%%" % (x[0]*100, x[1]*100), axis=1) 

    
    df = df.drop(columns=[metric])\
        .rename(columns={"temp": f"{metric} ($\\beta=10^{{%d}}$ and averaged over {n_last} epochs)" % np.log10(beta)}) \
        .pivot(index="strategy", columns="M")
    
    if should_print:
        print(df.to_latex(column_format="rcccc", escape=False))
    
    return df

    
print_table(df_mnist_training_stats, 0.001, metric)

\begin{tabular}{rcccc}
\toprule
{} & \multicolumn{4}{l}{test:error_L12 ($\beta=10^{-3}$ and averaged over 5 epochs)} \\
{} \\
M &                                                          1  &             3  &             6  &             12 \\
strategy     &                                                             &                &                &                \\
\midrule
oneshot      &                                      1.52±0.0746\% &  1.48±0.0491\% &  1.42±0.0652\% &  1.41±0.0767\% \\
seq/d:10|e:1 &                                      1.46±0.0276\% &  1.45±0.0800\% &  1.38±0.0498\% &  1.40±0.0513\% \\
seq/d:20|e:1 &                                      1.47±0.0686\% &  1.42±0.0499\% &  1.42±0.0540\% &  1.43±0.0687\% \\
seq/d:5|e:1  &                                      1.48±0.0355\% &  1.42±0.0540\% &  1.42±0.0471\% &  1.45±0.0428\% \\
\bottomrule
\end{tabular}



is deprecated and will be removed in a future version
  
  new_axis = axis.drop(labels, errors=errors)


Unnamed: 0_level_0,test:error_L12 ($\beta=10^{-3}$ and averaged over 5 epochs),test:error_L12 ($\beta=10^{-3}$ and averaged over 5 epochs),test:error_L12 ($\beta=10^{-3}$ and averaged over 5 epochs),test:error_L12 ($\beta=10^{-3}$ and averaged over 5 epochs)
M,1,3,6,12
strategy,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
oneshot,1.52±0.0746\%,1.48±0.0491\%,1.42±0.0652\%,1.41±0.0767\%
seq/d:10|e:1,1.46±0.0276\%,1.45±0.0800\%,1.38±0.0498\%,1.40±0.0513\%
seq/d:20|e:1,1.47±0.0686\%,1.42±0.0499\%,1.42±0.0540\%,1.43±0.0687\%
seq/d:5|e:1,1.48±0.0355\%,1.42±0.0540\%,1.42±0.0471\%,1.45±0.0428\%


In [222]:
# print_table(df_mnist_training_stats, 0.00001, metric)

In [276]:
df_mnist_training_stats_lr_decay = read_stats_from(
    ["../artifacts-server/mnist--lr-decay-various-opts-*"],
    metric=metric,
    n=n_last
)

average over 5 last epochs
Reading ../artifacts-server/mnist--lr-decay-various-opts-*
we have 16 models


In [277]:
print_table(df_mnist_training_stats_lr_decay, 0.001, metric, should_print=False)

is deprecated and will be removed in a future version
  


Unnamed: 0_level_0,test:error_L12 ($\beta=10^{-3}$ and averaged over 5 epochs),test:error_L12 ($\beta=10^{-3}$ and averaged over 5 epochs)
M,1,6
strategy,Unnamed: 1_level_2,Unnamed: 2_level_2
oneshot,1.40±0.0171\%,1.41±nan\%
seq/d:10|e:1,1.37±0.0727\%,1.44±nan\%
seq/d:20|e:1,1.32±0.0301\%,1.46±nan\%
seq/d:5|e:1,1.39±0.0857\%,1.40±nan\%


In [278]:
# print_table(df_mnist_training_stats_lr_decay, 0.00001, metric, should_print=False)

In [378]:
df_mnist_training_stats_lr_decay_algo2 = read_stats_from(
    ["../artifacts-server/train_algo2-/*", "../artifacts-server/train-/*"],
    metric=metric,
    n=n_last
)

average over 5 last epochs
Reading ../artifacts-server/train_algo2-/*,../artifacts-server/train-/*
we have 129 models


In [379]:
df_mnist_training_stats_lr_decay_algo2.groupby("_key")["M"].count()

_key
M=12::beta=1.0e-03::strategy=alt/e:10|d:1    3
M=12::beta=1.0e-03::strategy=alt/e:20|d:1    3
M=12::beta=1.0e-03::strategy=alt/e:5|d:1     3
M=12::beta=1.0e-03::strategy=oneshot         3
M=12::beta=1.0e-03::strategy=seq/d:10|e:1    3
M=12::beta=1.0e-03::strategy=seq/d:20|e:1    3
M=12::beta=1.0e-03::strategy=seq/d:5|e:1     3
M=12::beta=1.0e-05::strategy=alt/e:10|d:1    3
M=12::beta=1.0e-05::strategy=alt/e:20|d:1    3
M=12::beta=1.0e-05::strategy=alt/e:5|d:1     3
M=12::beta=1.0e-05::strategy=oneshot         3
M=12::beta=1.0e-05::strategy=seq/d:10|e:1    3
M=12::beta=1.0e-05::strategy=seq/d:20|e:1    3
M=12::beta=1.0e-05::strategy=seq/d:5|e:1     3
M=1::beta=1.0e-03::strategy=alt/e:10|d:1     3
M=1::beta=1.0e-03::strategy=alt/e:20|d:1     3
M=1::beta=1.0e-03::strategy=alt/e:5|d:1      3
M=1::beta=1.0e-03::strategy=oneshot          6
M=1::beta=1.0e-03::strategy=seq/d:10|e:1     3
M=1::beta=1.0e-03::strategy=seq/d:20|e:1     3
M=1::beta=1.0e-03::strategy=seq/d:5|e:1      3
M=1::bet

In [380]:
print_table(df_mnist_training_stats_lr_decay_algo2, 0.001, metric, should_print=False)

is deprecated and will be removed in a future version
  


Unnamed: 0_level_0,test:error_L12 ($\beta=10^{-3}$ and averaged over 5 epochs),test:error_L12 ($\beta=10^{-3}$ and averaged over 5 epochs),test:error_L12 ($\beta=10^{-3}$ and averaged over 5 epochs)
M,1,6,12
strategy,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
alt/e:10|d:1,1.34±0.0420\%,1.42±0.0620\%,1.47±0.0500\%
alt/e:20|d:1,1.44±0.0393\%,1.44±0.0397\%,1.46±0.0406\%
alt/e:5|d:1,1.36±0.0162\%,1.46±0.0671\%,1.47±0.0397\%
oneshot,1.33±0.0886\%,1.43±0.0496\%,1.36±0.0197\%
seq/d:10|e:1,1.37±0.0727\%,1.45±0.0352\%,1.45±0.0270\%
seq/d:20|e:1,1.32±0.0301\%,1.43±0.0580\%,1.39±0.0463\%
seq/d:5|e:1,1.39±0.0857\%,1.39±0.0481\%,1.38±0.0561\%


In [381]:
print_table(df_mnist_training_stats_lr_decay_algo2, 0.00001, metric, should_print=False)

is deprecated and will be removed in a future version
  


Unnamed: 0_level_0,test:error_L12 ($\beta=10^{-5}$ and averaged over 5 epochs),test:error_L12 ($\beta=10^{-5}$ and averaged over 5 epochs),test:error_L12 ($\beta=10^{-5}$ and averaged over 5 epochs)
M,1,6,12
strategy,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
alt/e:10|d:1,1.35±0.0462\%,1.34±0.0397\%,1.35±0.0257\%
alt/e:20|d:1,1.40±0.0721\%,1.30±0.0473\%,1.33±0.0322\%
alt/e:5|d:1,1.33±0.0423\%,1.34±0.0189\%,1.36±0.0531\%
oneshot,1.38±0.0400\%,1.38±0.0433\%,1.35±0.0280\%
seq/d:10|e:1,1.33±0.0635\%,1.36±0.0240\%,1.36±0.0223\%
seq/d:20|e:1,1.36±0.0987\%,1.34±0.0295\%,1.38±0.0436\%
seq/d:5|e:1,1.35±0.0950\%,1.41±0.0457\%,1.36±0.0450\%


In [324]:
# df_mnist_training_stats_lr_decay.groupby("_key")["M"].count()

In [377]:
df = read_stats_from(
    ["../artifacts-server/train-/fix-normalised*"],
    metric=metric,
    n=n_last
)

average over 5 last epochs
Reading ../artifacts-server/train-/fix-normalised*
we have 3 models


In [383]:
print_table(df, 0.001, metric, should_print=False)

is deprecated and will be removed in a future version
  


Unnamed: 0_level_0,test:error_L12 ($\beta=10^{-3}$ and averaged over 5 epochs)
M,1
strategy,Unnamed: 1_level_2
oneshot,1.25±0.0538\%


In [15]:
df2 = read_stats_from(
    ["../artifacts-server/train-/cherry-pick-fix-normalised*"],
    metric=metric,
    n=n_last
)

average over 5 last epochs
Reading ../artifacts-server/train-/cherry-pick-fix-normalised*
we have 6 models


In [16]:
print_table(df2, 0.001, metric, should_print=False)

is deprecated and will be removed in a future version
  


Unnamed: 0_level_0,test:error_L12 ($\beta=10^{-3}$ and averaged over 5 epochs)
M,1
strategy,Unnamed: 1_level_2
oneshot,1.28±0.0458\%


In [17]:
print_table( read_stats_from(
    ["../artifacts-server/train-/cherry-pick-after-merging-mnist--lr-decay-various-opts-M1*"],
    metric=metric,
    n=n_last
)
, 0.001, metric, should_print=False)

average over 5 last epochs
Reading ../artifacts-server/train-/cherry-pick-after-merging-mnist--lr-decay-various-opts-M1*
we have 3 models


is deprecated and will be removed in a future version
  


Unnamed: 0_level_0,test:error_L12 ($\beta=10^{-3}$ and averaged over 5 epochs)
M,1
strategy,Unnamed: 1_level_2
oneshot,1.22±0.0367\%
