In [1]:
import sys
sys.path.append("../analysis")

In [2]:
import parsing
import extraction
import utils

In [3]:
from extraction import pull_spec_in, select_categories
from utils import fetch, split_spec_pretty

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
%matplotlib inline

In [6]:
STATE_CACHE_FILES = {
    "Viable (ideal results)": '2020-06-29-cache-all-viable-with-ultimate-result',
    "Viable (actual results)": '2020-06-31-cache-continuing-on-viable',
    "Falsely viable (ideal results)": '2020-06-29-cache-false-on-viable-but-mistaken',
    "Falsely viable (actual results)": '2020-06-31-cache-continuing-with-false-pasitives',
    "Truly viable": '2020-06-29-cache-on-viable-and-in-solution',
    "Nonviable": '2020-06-29-cache-only-when-nonviable',
    "All (ideal results)": '2020-06-29-cache-all-pruning-results'
}

In [7]:
data = {t: pull_spec_in(select_categories(fetch(f), ["search"]))
       for t, f in STATE_CACHE_FILES.items()}

In [8]:
data['Viable (ideal results)'].columns

Index(['category', 'key', 'success', 'mode', 'cache_reuse', 'prune_fuel',
       'time', 'spec'],
      dtype='object')

In [9]:
frames = []
for test, raw_frame in data.items():
    frame = raw_frame[['spec', 'time', 'cache_reuse']].copy()
    frame.rename(columns={'time': 'Time', 'spec': 'Spec',
                         'cache_reuse': "Cache provided"}, inplace=True)
    frame['Caching'] = test
    frames.append(frame)
# More sensible order will be applied later
df_all = pd.concat(frames, axis=0).set_index(['Spec', 'Caching']).sort_index()

In [10]:
baselines = df_all[df_all['Cache provided'] == False]['Time'].unstack()

In [11]:
baselines_centered = (baselines.T - baselines.T.mean()).T
baselines_centered

Caching,All (ideal results),Falsely viable (actual results),Falsely viable (ideal results),Nonviable,Truly viable,Viable (actual results),Viable (ideal results)
Spec,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
l1/1d-conv,0.001968,1.279757e-05,0.0003140106,0.001377,0.000243,-0.001879,-0.002036
l1/1d-stencil,0.001473,-5.647329e-05,-0.0001047823,0.001161,-0.000204,-0.001101,-0.001168
l1/2d-stencil-3,-0.000276,0.0002536891,0.0003549531,-0.000326,0.000485,-0.000287,-0.000204
l1/2d-stencil-5,-0.006853,0.0255143,0.0003098156,-0.008876,0.004257,-0.007922,-0.006431
l1/2d-stencil-7,0.000255,-0.0001804221,-0.0001200011,0.000346,-1.2e-05,-0.000159,-0.000129
l1/mult-32-with-4,-0.001348,-6.331471e-05,-0.001402347,0.002798,0.000913,0.00028,-0.001178
l1/trove-cr_sum-1,-3.2e-05,-4.528571e-08,-1.652857e-07,3e-05,9e-06,2e-06,-9e-06
l1/trove-cr_sum-2,0.003861,-0.0007648513,-0.0004347993,0.003258,-0.000825,-0.002299,-0.002796
l1/trove-cr_sum-3,0.004204,-0.0009520413,-0.0004309043,0.003899,-0.000583,-0.003398,-0.002739
l1/trove-cr_sum-4,0.0579,-0.0169555,-0.01049274,0.057935,-0.012278,-0.027717,-0.048391


In [12]:
baselines_centered.sum()

Caching
All (ideal results)                2.909365
Falsely viable (actual results)   -0.908679
Falsely viable (ideal results)    -0.992340
Nonviable                          3.410452
Truly viable                      -0.696139
Viable (actual results)           -1.785407
Viable (ideal results)            -1.937251
dtype: float64

In [13]:
new_baselines_raw = fetch("2020-06-31-baseline-timings-for-cache-experiments")
new_baselines_df = pull_spec_in(select_categories(new_baselines_raw, ["search"]))
new_baselines = new_baselines_df.set_index('spec')['time']
new_baselines.index.name = 'Spec'

In [14]:
(baselines.T - new_baselines).T.mean(axis=1)

Spec
l1/1d-conv           0.002331
l1/1d-stencil        0.001356
l1/2d-stencil-3      0.000656
l1/2d-stencil-5      0.008414
l1/2d-stencil-7      0.000234
l1/mult-32-with-4   -0.000686
l1/trove-cr_sum-1   -0.000013
l1/trove-cr_sum-2    0.002937
l1/trove-cr_sum-3    0.004337
l1/trove-cr_sum-4    0.056611
l1/trove-cr_sum-5    0.006420
l1/trove-cr_sum-7    0.008966
l1/trove-crc-1       0.000030
l1/trove-crc-2       0.002828
l1/trove-crc-3       0.004845
l1/trove-crc-4       0.014920
l1/trove-crc-5       0.006034
l1/trove-crc-7       0.011177
l1/trove-rcr-1       0.000553
l1/trove-rcr-2       0.041531
l1/trove-rcr-3       0.011519
l1/trove-rcr-4       0.074158
l1/trove-rcr-5       0.017371
l1/trove-rcr-7       0.043231
l2/1d-conv           0.003279
l2/1d-stencil        0.001498
l2/2d-stencil-3      0.001713
l2/2d-stencil-5      0.010583
l2/2d-stencil-7      0.107227
l2/mult-32-with-4   -0.000429
l3/1d-conv           0.005831
l3/1d-stencil        0.002415
l3/2d-stencil-3      0.002882
l3/2d

We can real quick conclude that there was overhead in creating the caches we need to account for

In [15]:
cache_states = df_all[df_all['Cache provided']]['Time'].unstack()
cache_states.insert(0, "Baseline", new_baselines.sort_index())

In [16]:
df = cache_states.sort_values(by="Baseline", ascending=False)
df.head()

Caching,Baseline,All (ideal results),Falsely viable (actual results),Falsely viable (ideal results),Nonviable,Truly viable,Viable (actual results),Viable (ideal results)
Spec,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
l3/trove-rcr-4,2.555434,3.771795,3.146353,2.540714,5.135037,2.938958,2.335259,1.865731
l3/2d-stencil-7,2.413099,0.067489,1.007857,0.989484,2.334861,1.710015,0.224646,0.186668
l2/2d-stencil-7,2.39798,0.066822,1.022351,0.990923,2.336182,1.689201,0.216408,0.187467
l3/trove-rcr-2,1.111086,2.087196,1.494358,1.398469,2.429911,1.377603,1.105856,1.001386
l3/trove-rcr-7,0.299529,0.122987,0.166214,0.101266,0.418696,0.269802,0.138052,0.066527


In [17]:
speedups = 1.0 / (df.div(df['Baseline'], axis=0))

In [18]:
speedups.head()

Caching,Baseline,All (ideal results),Falsely viable (actual results),Falsely viable (ideal results),Nonviable,Truly viable,Viable (actual results),Viable (ideal results)
Spec,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
l3/trove-rcr-4,1.0,0.677511,0.812189,1.005793,0.497647,0.869503,1.094283,1.369669
l3/2d-stencil-7,1.0,35.755353,2.394286,2.438745,1.033509,1.411157,10.741767,12.927239
l2/2d-stencil-7,1.0,35.886271,2.345555,2.419945,1.026452,1.419594,11.080831,12.791502
l3/trove-rcr-2,1.0,0.532334,0.743521,0.794502,0.457254,0.806536,1.00473,1.109548
l3/trove-rcr-7,1.0,2.435445,1.802062,2.957827,0.715385,1.110181,2.169683,4.502393


In [19]:
df.to_csv('../results/2020-06-31-effects-of-caching-on-search-time.csv')
speedups.to_csv('../results/2020-06-31-effects-of-caching-on-search-time-speedups.csv')