In [1]:
import sys
sys.path.append("../analysis")

In [2]:
import parsing
import extraction
import utils

In [3]:
from extraction import pull_spec_in, select_categories
from utils import fetch, split_spec_pretty

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
%matplotlib inline

In [6]:
STATE_CACHE_FILES = {
    "Viable": '2020-06-29-cache-all-viable-with-ultimate-result',
    "Falsely viable": '2020-06-29-cache-false-on-viable-but-mistaken',
    "Truly viable": '2020-06-29-cache-on-viable-and-in-solution',
    "Nonviable": '2020-06-29-cache-only-when-nonviable',
    "All": '2020-06-29-cache-all-pruning-results'
}

In [7]:
data = {t: pull_spec_in(select_categories(fetch(f), ["search"]))
       for t, f in STATE_CACHE_FILES.items()}

In [8]:
data['Viable'].columns

Index(['category', 'key', 'success', 'mode', 'cache_reuse', 'prune_fuel',
       'time', 'spec'],
      dtype='object')

In [9]:
frames = []
for test, raw_frame in data.items():
    frame = raw_frame[['spec', 'time', 'cache_reuse']].copy()
    frame.rename(columns={'time': 'Time', 'spec': 'Spec',
                         'cache_reuse': "Cache provided"}, inplace=True)
    frame['Caching'] = test
    frames.append(frame)
# More sensible order will be applied later
df_all = pd.concat(frames, axis=0).set_index(['Spec', 'Caching']).sort_index()

In [10]:
baselines = df_all[df_all['Cache provided'] == False]['Time'].unstack()

In [11]:
baselines_centered = (baselines.T - baselines.T.mean()).T
baselines_centered

Caching,All,Falsely viable,Nonviable,Truly viable,Viable
Spec,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
l1/1d-conv,0.001595,-5.92048e-05,0.001004,-0.00013,-0.002409
l1/1d-stencil,0.001241,-0.0003361912,0.000929,-0.000435,-0.001399
l1/2d-stencil-3,-0.000283,0.0003482886,-0.000333,0.000479,-0.000211
l1/2d-stencil-5,-0.003335,0.003828324,-0.005357,0.007775,-0.002912
l1/2d-stencil-7,0.000187,-0.0001878894,0.000278,-8e-05,-0.000197
l1/mult-32-with-4,-0.001304,-0.001358929,0.002841,0.000956,-0.001134
l1/trove-cr_sum-1,-3.2e-05,1.884e-07,3.1e-05,1e-05,-9e-06
l1/trove-cr_sum-2,0.003248,-0.001047502,0.002645,-0.001438,-0.003409
l1/trove-cr_sum-3,0.003334,-0.001300992,0.003029,-0.001453,-0.00361
l1/trove-cr_sum-4,0.048966,-0.01942727,0.049,-0.021213,-0.057326


In [12]:
baselines_centered.sum()

Caching
All               2.370547
Falsely viable   -1.531158
Nonviable         2.871635
Truly viable     -1.234956
Viable           -2.476068
dtype: float64

In [13]:
new_baselines_raw = fetch("2020-06-31-baseline-timings-for-cache-experiments")
new_baselines_df = pull_spec_in(select_categories(new_baselines_raw, ["search"]))
new_baselines = new_baselines_df.set_index('spec')['time']
new_baselines.index.name = 'Spec'

In [14]:
(baselines.T - new_baselines).T.mean(axis=1)

Spec
l1/1d-conv           0.002704
l1/1d-stencil        0.001588
l1/2d-stencil-3      0.000663
l1/2d-stencil-5      0.004895
l1/2d-stencil-7      0.000302
l1/mult-32-with-4   -0.000730
l1/trove-cr_sum-1   -0.000014
l1/trove-cr_sum-2    0.003550
l1/trove-cr_sum-3    0.005207
l1/trove-cr_sum-4    0.065546
l1/trove-cr_sum-5    0.007651
l1/trove-cr_sum-7    0.010434
l1/trove-crc-1       0.000033
l1/trove-crc-2       0.003380
l1/trove-crc-3       0.005218
l1/trove-crc-4       0.018765
l1/trove-crc-5       0.007439
l1/trove-crc-7       0.014654
l1/trove-rcr-1       0.000510
l1/trove-rcr-2       0.051204
l1/trove-rcr-3       0.013289
l1/trove-rcr-4       0.094755
l1/trove-rcr-5       0.021660
l1/trove-rcr-7       0.057810
l2/1d-conv           0.004065
l2/1d-stencil        0.002034
l2/2d-stencil-3      0.001770
l2/2d-stencil-5      0.011065
l2/2d-stencil-7      0.129146
l2/mult-32-with-4   -0.000636
l3/1d-conv           0.006640
l3/1d-stencil        0.002965
l3/2d-stencil-3      0.002933
l3/2d

We can real quick conclude that there was overhead in creating the caches we need to account for

In [15]:
cache_states = df_all[df_all['Cache provided']]['Time'].unstack()
cache_states.insert(0, "Baseline", new_baselines.sort_index())

In [16]:
df = cache_states.sort_values(by="Baseline", ascending=False)
df.head()

Caching,Baseline,All,Falsely viable,Nonviable,Truly viable,Viable
Spec,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
l3/trove-rcr-4,2.555434,3.771795,2.540714,5.135037,2.938958,1.865731
l3/2d-stencil-7,2.413099,0.067489,0.989484,2.334861,1.710015,0.186668
l2/2d-stencil-7,2.39798,0.066822,0.990923,2.336182,1.689201,0.187467
l3/trove-rcr-2,1.111086,2.087196,1.398469,2.429911,1.377603,1.001386
l3/trove-rcr-7,0.299529,0.122987,0.101266,0.418696,0.269802,0.066527


In [17]:
speedups = 1.0 / (df.div(df['Baseline'], axis=0))

In [18]:
speedups.head()

Caching,Baseline,All,Falsely viable,Nonviable,Truly viable,Viable
Spec,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
l3/trove-rcr-4,1.0,0.677511,1.005793,0.497647,0.869503,1.369669
l3/2d-stencil-7,1.0,35.755353,2.438745,1.033509,1.411157,12.927239
l2/2d-stencil-7,1.0,35.886271,2.419945,1.026452,1.419594,12.791502
l3/trove-rcr-2,1.0,0.532334,0.794502,0.457254,0.806536,1.109548
l3/trove-rcr-7,1.0,2.435445,2.957827,0.715385,1.110181,4.502393


In [19]:
df.to_csv('../results/2020-06-31-effects-of-caching-on-search-time.csv')
speedups.to_csv('../results/2020-06-31-effects-of-caching-on-search-time-speedups.csv')