In [100]:
from ipywidgets import interact
import numpy as np

from bokeh.io import push_notebook, show, output_notebook, export_svgs
from bokeh.layouts import column, row, gridplot
from bokeh.plotting import figure
from bokeh.models import Band, Span, Range1d
from bokeh.models.sources import ColumnDataSource
from bokeh.models.formatters import NumeralTickFormatter
from bokeh.palettes import all_palettes
output_notebook()

import pymongo
from pprint import pprint
import pandas as pd
import qgrid
import math
from datetime import datetime

In [232]:
def cross(key, *args):
    return [{key: l} for l in args]

query = {
    # "_id": {"$gt": 2000},
    # "config.user": "rockt", 
    "start_time": {
        "$gt": datetime.strptime("2018-03-20 00:00", "%Y-%m-%d %H:%M"),
        # "$lt": datetime.strptime("2018-01-24 00:00", "%Y-%m-%d %H:%M"),
    },
    "config.label": "lr_sweep"
#     "$or": [{"config.label": "fs10_baseline"}, {"config.label": "fs10_ae_fix"}]
}

In [241]:
%%time
x_key = "total_timesteps"
y_key = "rewards"
facets = ["env_id"]
params = ["name", "lr"]
db_name = "sympl"

len_threshold = 5

db = pymongo.MongoClient("mongodb://symplOwner:GK2a2ESgLDzwjHnxXaPSJz6zb9Fn9qgr@gandalf.cs.ox.ac.uk:27017/sympl",
                         27017,
                         ssl=True)[db_name]["runs"]
xs = []
docs = db.find(query)
data = {}

for doc in docs:
    _id = doc["_id"]
    config = doc["config"]
    info = doc["info"]
    if info and x_key in info:
        x = info[x_key]        
        if len(x) > len_threshold:
            y = info[y_key]
            param = " ".join([str(config[param] if param in config else "n/a") for param in params])
            face = " ".join([str(config[face]) for face in facets])
            key = face+" | "+param

            if len(x) > len(xs):
                xs = x

            if key in data:
                data[key].append(y)
            else:
                data[key] = [y]

CPU times: user 1.08 s, sys: 2.12 s, total: 3.2 s
Wall time: 6.61 s


In [242]:
%%time
smoothing = 0.03
drop_unfinished_runs = True
max_timestep = 40e6
max_repeats = 5
num_timesteps = len([x for x in xs if x <= max_timestep])
assert 0.0 <= smoothing <= 1.0

dfs = {}
meta = {}

for key in data:
    tmp = pd.DataFrame(data[key]).T   
    tmp = tmp[:num_timesteps]    
    
    if drop_unfinished_runs:
        mask = tmp.notnull().all()
        tmp = tmp[tmp.columns[mask]]
        
    if tmp.shape[1] > max_repeats:
        tmp = tmp.iloc[:,0:(max_repeats)]
    elif tmp.shape[1] < max_repeats:
        print("Unfinished:", key, "[%dx]" % tmp.shape[1])
        
    meta_info = {
        "max": tmp.max().max(), 
        "min": tmp.min().min(),
        "runs": len(tmp.columns)
    }
    
    if smoothing > 0:
        span = max(int(len(tmp) * smoothing), 1)
        smoothed = tmp.ewm(span=span, min_periods=0).mean()
    
    mask = tmp.notnull()
    tmp = smoothed.where(mask, np.nan)
            
    std = tmp.std(1)
    mean = tmp.mean(1)    
    df = pd.concat([mean, std], axis=1)
    df.columns = ["mean", "std"]    
    df["lower"] = df["mean"] - df["std"]
    df["upper"] = df["mean"] + df["std"]       
    df[x_key] = xs[:len(df)]    
    
    face, param = key.split(" | ")
        
    if face in dfs:
        dfs[face][param] = df
        meta[face][param] = meta_info
    else:
        dfs[face] = { param: df }  
        meta[face] = { param: meta_info }        


Unfinished: Pong | default True True [2x]
Unfinished: Enduro | default True True [2x]
Unfinished: Pong | default True False [2x]
Unfinished: Enduro | default True False [2x]
CPU times: user 99.6 ms, sys: 3.34 ms, total: 103 ms
Wall time: 103 ms


In [244]:
%%time
save_svg = False
num_facets = len(dfs.keys())
plot_cols = num_facets if num_facets < 3 else 3
plot_rows = math.ceil(num_facets / plot_cols)
width = int(900 / plot_cols)
height = int(600 / plot_rows) if plot_rows <= 3 else 300

plots = []
for face in sorted(dfs.keys()):
    df = dfs[face]
    
    p = figure(plot_width=width, plot_height=height, title=face)
    
    num_cols = len(df)
    num_colors = num_cols
    max_colors = 9
    max_val = 0
    min_val = 0

    if num_colors < 3:
        num_colors = 3
    elif num_colors > max_colors:
        num_colors = max_colors

    palette = all_palettes['Set1'][num_colors]

    for i, param in enumerate(df.keys()): 
        ds = ColumnDataSource(df[param])
        color = palette[i % max_colors]     
        if save_svg:
            name = param
        else:
            name = "[%dx] %s" % (int(meta[face][param]["runs"]), param)
        max_val = max(max_val, int(round(meta[face][param]["max"])))     
        min_val = min(min_val, int(round(meta[face][param]["min"])))
            
        p.line(source=df[param], x=x_key, y="mean", color=color, line_width=3, legend=[name])      
        band = Band(base=x_key, lower='lower', upper='upper', source=ds, level='underlay', fill_color=color,
                fill_alpha=0.2, line_width=1, line_color=color)
        p.add_layout(band)
            
    p.title.align = "center"
    p.legend.location = "top_left"
    p.legend.click_policy="hide"
    p.legend.label_standoff = 5
    p.legend.spacing = 0
    p.legend.padding = 1
    p.legend.margin = 5
#     max_val=.1
    p.y_range = Range1d(min_val, max_val)
    if math.fabs(max_val) > 1:
        p.yaxis.formatter = NumeralTickFormatter(format="0.0a")
    p.xaxis.formatter = NumeralTickFormatter(format="0a")

    if save_svg:
        p.output_backend = "svg"    
        export_svgs(p, filename="./svg/%s.svg" % face)  
    
    plots.append(p)
    
tmp = [plots[i:i+plot_cols] for i in range(0, len(plots), plot_cols)]  
p = gridplot(tmp)
          
show(p, notebook_handle=True)

CPU times: user 744 ms, sys: 11.4 ms, total: 755 ms
Wall time: 758 ms


In [245]:
vals = []
for face in sorted(dfs.keys()):    
    for i, param in enumerate(dfs[face].keys()): 
        max_val = int(round(meta[face][param]["max"]))
        vals.append([face, param, max_val])
    for model in published_results:
        if face in published_results[model]:
            vals.append([face, model, published_results[model][face]])
df = pd.DataFrame(vals, columns=["face", "param", "val"]).pivot(index='param', columns='face', values='val')
# pd.to_latex()
df

face,Enduro,Pong
param,Unnamed: 1_level_1,Unnamed: 2_level_1
VPN,382.0,
default True False,37.0,0.0
default True True,41.0,0.0


In [41]:
max_val

36