# Throughput

## Import Data

In [None]:
import json
import os, glob
import pandas as pd
import pandas.io.json as pdjson
import seaborn as sns

sns.set(style="whitegrid")

In [None]:
data_frames = []

for file in glob.glob("*_1.orun.summary.bench"):
    with open(file) as f:
        data = []
        for l in f:
            temp = json.loads(l)
            if 'name' in temp:
                data.append(temp)
        df = pd.json_normalize(data)
        df['variant'] = file.replace("_1.orun.summary.bench","")
        data_frames.append(df)

df = pd.concat (data_frames, sort=False)
df = df.sort_values(['name']) 
# Uncomment the following to display all the lines in pandas output
#pd.set_option('display.max_rows', df.shape[0]+1)
df

<hr>

### (USER INPUT)  Baseline variant name

Please choose the baseline variant name for normalized graphs:

In [None]:
baseline = '5.0.0'

Ensure that the baseline is one of the variants. Returns Raises `ValueError` if the baseline is not present in the loaded data:

In [None]:
is_present = df["variant"].unique().tolist().index(baseline)

<hr>

### Selection example

```
select * from df where variant = baseline and time_secs > 10
```

In [None]:
df.loc[(df['variant'] == baseline) & (df['time_secs'] > 10)]

In [None]:
df.loc[df['name'] == 'LU_decomposition.1024']

### Projection example

```
select name, variant, time_secs from df
```

In [None]:
df.filter(['name','variant','time_secs'])

## Time

In [None]:
g = sns.catplot (x='name', y='time_secs', hue='variant', data = df, kind ='bar', aspect=6)
g.set_xticklabels(rotation=90)

### Normalised

In [None]:
# returns a dictionary with {key = name : value : metric}
# the metric corresponds to the variant
def create_column(df, variant, metric):
    df = pd.DataFrame.copy(df)
    variant_metric_name = list([ zip(df[metric], df[x], df['name']) 
              for x in df.columns.array if x == "variant" ][0])
    name_metric = {n:t for (t, v, n) in variant_metric_name if v == variant}
#     print(name_metric)
    return name_metric

def add_display_name(df,variant, metric):
    name_metric = create_column(pd.DataFrame.copy(df), variant, metric)
#    print(name_metric)
    disp_name = [name+" ("+str(round(name_metric[name], 2))+")" for name in df["name"]]
    df["display_name"] = pd.Series(disp_name, index=df.index)
    return df

def normalise(df, baseline, topic, additionalTopics=[]):
    """Normalise the different variant values against the baseline.
    
    The resultant dataframe contains the normalised topic values for all 
    the variants other than the baseline. additionalTopics can be used to 
    include columns other than the topic column in the returned dataframe.
    """
    df = add_display_name(df, baseline, topic)
    items= ['name', topic, 'variant', 'display_name'] + additionalTopics
    df_filtered = df.filter(items=items)
    df_pivot = df_filtered.pivot(index='name', columns='variant', values=[topic])
    baseline_column = (topic, baseline)
    select_columns = [c for c in df_pivot.columns if c != baseline_column]
    normalised = df_pivot.div(df_pivot[baseline_column], axis=0)[select_columns]
    normalised = normalised.melt(col_level=1, ignore_index=False, value_name = 'n' + topic).reset_index()
    if not select_columns:
        print("need another variant to plot normalized graph")
    return pd.merge(normalised, df_filtered, on=['name', 'variant'])

def plot_normalised(df,variant,topic):
    df = pd.DataFrame.copy(df)
    df.sort_values(by=[topic],inplace=True)
    df[topic] = df[topic] - 1
    g = sns.catplot (x="display_name", y=topic, hue='variant', data = df, kind ='bar', aspect=8, bottom=1)
    g.set_xticklabels(rotation=90)
    g.ax.legend(loc=8)
    g._legend.remove()
    g.ax.set_xlabel("Benchmarks")
    return g
    # g.ax.set_yscale('log')

In [None]:
ndf = normalise(df,baseline,'time_secs')
plot_normalised(ndf,baseline,'ntime_secs')

## Top heap words

In [None]:
g = sns.catplot (x='name', y='gc.top_heap_words', hue='variant', data = df, kind ='bar', aspect=6)
g.set_xticklabels(rotation=90)

### Normalised

In [None]:
ndf = normalise(df,baseline,'gc.top_heap_words')
plot_normalised(ndf,baseline,'ngc.top_heap_words')

## Max RSS (KB)

In [None]:
g = sns.catplot (x='name', y='maxrss_kB', hue='variant', data = df, kind ='bar', aspect=6)
g.set_xticklabels(rotation=90)

### Normalised

In [None]:
ndf = normalise(df,baseline,'maxrss_kB')
plot_normalised(ndf,baseline,'nmaxrss_kB')

## Major Collections

In [None]:
g = sns.catplot (x='name', y='gc.major_collections', hue='variant', data = df, kind ='bar', aspect=6)
g.set_xticklabels(rotation=90)

### Normalised

In [None]:
ndf = normalise(df,baseline,'gc.major_collections')
plot_normalised(ndf,baseline,'ngc.major_collections')
ndf

## Major words

In [None]:
g = sns.catplot (x='name', y='gc.major_words', hue='variant', data = df, kind ='bar', aspect=6)
g.set_xticklabels(rotation=90)

### Normalised

In [None]:
ndf = normalise(df,baseline,'gc.major_words')
plot_normalised(ndf,baseline,'ngc.major_words')

## Minor Collections

In [None]:
g = sns.catplot (x='name', y='gc.minor_collections', hue='variant', data = df, kind ='bar', aspect=6)
g.set_xticklabels(rotation=90)

In [None]:
ndf = normalise(df,baseline,'gc.minor_collections')
plot_normalised(ndf,baseline,'ngc.minor_collections')