In [272]:
%matplotlib inline
import pandas as pd
import glob
import os
import numpy as np
import json
from bokeh.plotting import show, output_notebook, figure, ColumnDataSource
from bokeh.charts import Scatter
from bokeh.models import HoverTool, BoxSelectTool
from lightjob.cli import load_db
from lightjob.db import SUCCESS

def moving(l):
    v = 0.
    lm = []
    for val in l:
        v = v * 0.999 + val * 0.001
        lm.append(v)
    return lm
output_notebook()

def get_df(folder, from_jobs=False):
    db = load_db('../students/.lightjob')
    rows = []
    job_stats  = {}
    for folder in glob.glob('../students/{}/*'.format(folder)):
        stats = os.path.join(folder, 'stats.csv')
        valid = os.path.join(folder, 'valid.csv')
        if not os.path.exists(stats):
            continue
        if not os.path.exists(valid):
            continue
                
        df_stats = pd.read_csv(stats)
        df_valid = pd.read_csv(valid)
        
        if from_jobs:
            job = db.get_job_with_summary(os.path.basename(folder))
            hypers = job['content']['hypers']
            start_time = job['life'][0]['dt']
            end_time = job['life'][0]['dt']
        else:
            result = os.path.join(folder, 'result.json')
            if not os.path.exists(result):
                continue
            with open(result) as fd:
                result_dict = json.load(fd)
            hypers = result_dict['params']
            start_time = result_dict['start_time']
            end_time = result_dict.get('end_time')
        
        acc = moving(df_stats['acc'])[-1]
        row = {}
        row.update(hypers)
        row['train_acc'] = acc
        row['valid_acc'] = df_valid.values.max()
        row['id'] = os.path.basename(folder)
        row['nb_updates'] = len(df_stats)
        row['start_time'] = start_time
        row['end_time'] = end_time
        row['n_epochs'] = (len(df_stats)*32)/40000
        job_stats[row['id']] = df_stats
        rows.append(row)
    df = pd.DataFrame(rows)
    df['train_val_ratio'] = df['train_acc'] / df['valid_acc']
    return df, job_stats


In [293]:
df, job_stats = get_df('jobs', from_jobs=True)

In [294]:
df.sort_values(by='valid_acc', ascending=False)

Unnamed: 0,algo,end_time,fc1,fc2,id,lr,model,momentum,n_epochs,nb_updates,nbf,sf,start_time,train_acc,valid_acc,train_val_ratio
1,nesterov,2017-04-12T01:34:06.590957,200,2300,450a1f40034efe1afc553f9be9d3d597,0.000655,convfc,0.835576,227.0,283750,800,3,2017-04-12T01:34:06.590957,0.900524,0.796625,1.130423
0,sgd,2017-04-12T01:34:06.683333,500,800,abb3abc12baf7dd6f51c2455ea45eafd,0.001322,convfc,,250.0,312500,256,3,2017-04-12T01:34:06.683333,0.871009,0.78105,1.115177
8,nesterov,2017-04-12T01:34:06.608787,100,700,657009e0990abca7488991bace4115ad,0.000825,convfc,0.602424,253.0,316250,700,5,2017-04-12T01:34:06.608787,0.844151,0.779353,1.083143
4,adam,2017-04-12T01:34:06.646083,500,1500,f518b1daf7f71a696ec3052e45a0c29a,1.1e-05,convfc,,246.0,307500,600,3,2017-04-12T01:34:06.646083,0.821085,0.751697,1.092308
7,nesterov,2017-04-12T01:34:06.702008,500,600,6d88fff72360f903c92f8bf366b85f0e,2.6e-05,convfc,0.745343,249.0,311250,700,3,2017-04-12T01:34:06.702008,0.794339,0.741613,1.071095
5,sgd,2017-04-12T01:34:06.627242,200,800,2574edd157e0c31bacae37cea1b05327,0.000205,convfc,,113.0,141250,700,5,2017-04-12T01:34:06.627242,0.750079,0.722444,1.038252
2,sgd,2017-04-12T01:09:35.559324,50,2300,dcc0640e6d41dec35c782e7367ea2c9c,1.6e-05,convfc,,627.0,783750,256,3,2017-04-12T01:09:35.559324,0.634608,0.629293,1.008446
3,adam,2017-04-12T02:05:12.094266,300,2300,90804440e918337ad573652feb621782,0.005073,convfc,,44.0,55000,700,3,2017-04-12T02:05:12.094266,0.105449,0.565395,0.186504
6,nesterov,2017-04-12T01:34:06.529413,300,2500,93cd09e442866325c4b4768153159b28,0.003358,convfc,0.838144,51.0,63750,256,5,2017-04-12T01:34:06.529413,0.094182,0.099042,0.950933


In [295]:
hover = HoverTool(
    tooltips=[
        ('lr', '@lr'),
        ('train_acc', '@train_acc'),
        ('valid_acc', '@valid_acc'),
        ('train_val_ratio', '@train_val_ratio'),
        ('algo', '@algo')
    ]
)
p = figure(tools=[hover])
p.circle('fc', 'valid_acc', source=ColumnDataSource(df))
show(p)

In [251]:
best = df.sort_values(by='valid_acc', ascending=False).iloc[0].to_dict()
s = job_stats[best['id']]
p = figure()
p.line(np.arange(len(s['acc'])), s['acc'], color='blue')
p.line(np.arange(len(s['acc'])), moving(s['acc']), color='orange')
show(p)

In [None]:
df_dataset, _ = get_df('opt_dataset')
df_generator, _ = get_df('opt_generator')

In [254]:
val_acc_dataset = df_dataset.sort_values(by='start_time')['valid_acc'].cummax()
val_acc_generator = df_generator.sort_values(by='start_time')['valid_acc'].cummax()
p = figure()
p.line(np.arange(len(val_acc_dataset)), val_acc_dataset, color='blue', legend='dataset')
p.line(np.arange(len(val_acc_generator)), val_acc_generator, color='orange', legend='generator')
p.legend.location = 'bottom_right'
show(p)