In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

In [None]:
import glob
import re
import os

## Regex Matching

In [None]:
LOG_REGEX = re.compile(r'INFO:root:([A-Za-z ]+): benchmarks\/data\/test-data-([0-9]+)-([0-9]+)\.csv; Size: ([0-9]+) bytes; time: ([0-9.]+)s')
group_dict = lambda matched: {
    'name': matched.group(1),
    'rows': int(matched.group(2)),
    'cols': int(matched.group(3)),
    'bytes': int(matched.group(4)),
    'time': float(matched.group(5))
}

## File Globbing

In [None]:
files = glob.glob('../benchmark-results/*.log')
log = files[0]
log

In [None]:
script_name = log.split('/')[-1].split('.')[0]
script_name

## Parsing

In [None]:
raw = [l.strip() for l in open(log).readlines()]

In [None]:
matched = LOG_REGEX.match(raw[0])
data = [group_dict(LOG_REGEX.match(line)) for line in raw]
df = pd.DataFrame.from_dict(data)

## Plotting

In [None]:
func_group = {name: df for name, df in df.groupby('name')}

In [None]:
os.makedirs(script_name, exist_ok=True)

### Heatmap

In [None]:
plot_type = 'heatmap'

In [None]:
for name, df_ in func_group.items():
    plt.figure(figsize=(8,6))
    plt.title(name + " time (s)")
    sns.heatmap(df_.pivot('rows', 'cols', 'time'), annot=True, linewidths=1)
    plt.savefig(f"{script_name}/{plot_type}-{name.replace(' ', '-')}.png")

### Scatter

In [None]:
plot_type = 'scatter'

In [None]:
for name, df_ in func_group.items():
    plt.figure(figsize=(8,6))
    plt.title(name)
    plt.scatter(df_['bytes'], df_['time'])
    plt.xlabel("Bytes")
    plt.ylabel("Time (s)")
    plt.savefig(f"{script_name}/{plot_type}-{name.replace(' ', '-')}.png")