## Import Data

In [1]:
import json
import os, glob
import pandas as pd
import pandas.io.json as pdjson
import seaborn as sns
import requests

In [3]:
def results_url(**kwargs):
    return "http://{host}/artifacts/{environment}/{repo_branch_name}/{commitid}/{variant}/{timestamp}/{timestamp}_{ocaml_version}.bench".format(**kwargs)

def load_bench_results(**kwargs):
    url = results_url(**kwargs)
    r = requests.get(url)
    xs = [json.loads(l) for l in r.text.splitlines()]
    df = pdjson.json_normalize(xs)
    return df

commit_a = {
    'host': 'bench2.ocamllabs.io:8083',
    'environment': 'bench2.ocamllabs.io',
    'repo_branch_name': 'ocaml_trunk__trunk',
    'commitid': '4ecd0e477677c4a16cc0f964d6fb85eb48fb1ff0',
    'variant': 'vanilla',
    'timestamp': '20191019_060219',
    'ocaml_version': '4.10.0',
    }

commit_b = {
    'host': 'bench2.ocamllabs.io:8083',
    'environment': 'bench2.ocamllabs.io',
    'repo_branch_name': 'closure_rep3__closure_rep3',
    'commitid': '7fd87287430596fbeb17e9cf74c8e5d5688112b6',
    'variant': 'vanilla',
    'timestamp': '20190928_025446',
    'ocaml_version': '4.10.0',
    }

def load_commit(conf):
    df = load_bench_results(**conf)
    df['commitid'] = conf['commitid'][0:8]
    return df
    

df = pd.concat([load_commit(commit_a), load_commit(commit_b)])
df.info()
df.describe()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 386 entries, 0 to 192
Data columns (total 25 columns):
name                               386 non-null object
command                            386 non-null object
time_secs                          386 non-null float64
user_time_secs                     386 non-null float64
sys_time_secs                      386 non-null float64
maxrss_kB                          386 non-null int64
ocaml_url                          386 non-null object
ocaml.version                      386 non-null object
ocaml.c_compiler                   386 non-null object
ocaml.architecture                 386 non-null object
ocaml.word_size                    386 non-null object
ocaml.system                       386 non-null object
ocaml.function_sections            386 non-null object
ocaml.supports_shared_libraries    386 non-null object
gc.allocated_words                 386 non-null int64
gc.minor_words                     386 non-null int64
gc.promoted_wor

Unnamed: 0,time_secs,user_time_secs,sys_time_secs,maxrss_kB,gc.allocated_words,gc.minor_words,gc.promoted_words,gc.major_words,gc.minor_collections,gc.major_collections,gc.heap_words,gc.heap_chunks,gc.top_heap_words,gc.compactions
count,386.0,386.0,386.0,386.0,386.0,386.0,386.0,386.0,386.0,386.0,386.0,386.0,386.0,386.0
mean,2.503011,2.455272,0.04708,27002.11399,278581500.0,267919200.0,12442710.0,23104960.0,1076.598446,84.689119,3094364.0,4.194301,3316968.0,7.670984
std,7.535295,7.466775,0.364199,78578.808961,1008237000.0,1000231000.0,44550100.0,94999590.0,3851.568193,349.478858,9718476.0,6.752475,10159710.0,50.969388
min,0.001129,0.0,0.0,2480.0,87.0,87.0,0.0,0.0,0.0,0.0,73728.0,1.0,491520.0,0.0
25%,0.381861,0.375533,0.0,4528.0,981802.0,981802.0,75.0,78.0,4.0,0.0,491520.0,1.0,491520.0,0.0
50%,0.975114,0.970249,0.0,5326.0,50000100.0,45900090.0,1372.5,40068.5,196.0,2.0,491520.0,1.0,491520.0,0.0
75%,1.968765,1.950511,0.007938,10213.0,90039130.0,87654510.0,3575816.0,7999584.0,369.0,37.0,565248.0,3.0,622848.0,2.0
max,92.215903,91.718258,4.750759,728548.0,9746759000.0,9746743000.0,435612800.0,1039655000.0,37210.0,3562.0,91529730.0,38.0,91529730.0,562.0


In [None]:
df.head().T

## Table based comparison

In [None]:
def calc_compare_table(df, topic):
    tmp = pd.pivot_table(df, values=topic, index='name', columns='commitid')
    tmp = tmp.reset_index().rename_axis(None, axis=1)
    id1 = df.commitid.unique()[0] 
    id2 = df.commitid.unique()[1]
    tmp = tmp[['name', id1, id2]]
    tmp['diff'] = tmp[id2] - tmp[id1]
    tmp['pct_diff'] = 100.*tmp['diff']/tmp[id1]
    return tmp
    
    
def display_compare_table(df, N=4):
    print('Top %d percentage increases'%N)
    display(df.sort_values('pct_diff', ascending=False).head(N))

    print('Top %d percentage decreases'%N)
    display(df.sort_values('pct_diff', ascending=True).head(N))


In [None]:
tmp = calc_compare_table(df, 'time_secs')
display_compare_table(tmp)

## Time

In [None]:
g = sns.catplot (x='name', y='time_secs', hue='commitid', data = df, kind ='bar', aspect=16)
g.set_xticklabels(rotation=90)

### Normalised

In [None]:
base_commitid = df.commitid.unique()[0]
print('For normalization using base_commitid: '+base_commitid)

In [None]:
def normalize(df, base_commitid, topic):
    idx = df['commitid'] == base_commitid
    normal_df = df[idx][['name', topic]].set_index('name')
    tmp = df[['name', topic, 'commitid']][~idx].set_index(['commitid','name'])
    tmp = tmp.div(normal_df)
    return tmp.rename(columns={topic:'n'+topic}).reset_index()

In [None]:
ndf = normalize(df,base_commitid,'time_secs')
ndf.sort_values(by=['ntime_secs'],inplace=True)
ndf

In [None]:
ndf['ntime_cumavg']=ndf.ntime_secs.expanding().mean()
ndf

In [None]:
sns.set_style("whitegrid")
g = sns.catplot (x='name', y='ntime_secs', hue='commitid', data = ndf, kind ='bar', aspect=16, orient="v")
g.set_xticklabels(rotation=90)

Save the figure as a pdf

In [None]:
g.savefig("no_infix_tag_normalised_runtime.pdf")

## Major words

In [None]:
g = sns.catplot (x='name', y='gc.major_words', hue='commitid', data = df, kind ='bar', aspect=4)
g.set_xticklabels(rotation=90)

### Normalised

In [None]:
ndf = normalize(df,base_commitid,'gc.major_words')
ndf.sort_values(by=['ngc.major_words'],inplace=True)
g = sns.catplot (x='name', y='ngc.major_words', hue='commitid', data = ndf, kind ='bar', aspect=4)
g.set_xticklabels(rotation=90)