In [None]:
import pandas as pd
import json
import matplotlib.pyplot as plt
import quantiphy
import seaborn as sb

import nest_asyncio
nest_asyncio.apply()

%matplotlib inline
%run util.ipynb
%run exp_plot.ipynb

In [None]:
output_list = []
target_alg_list = ['RSS-C', 'RSS-P', 'RSS-CP', 'RSS-CL', 'RSS-PL', 'RSS-CPL']
with open('exp_vary_rhs.txt') as f:
    for line in f:
        token_list = line.strip().split('\t')
        row = {k:v for k,v in map(lambda x:x.split(':',1), token_list)}
        dict_param = {k:v for k,v in map(lambda x:x.split(':',1), row['Param'][1:-3].split(', '))}
        output = {}
        output['theta'] = float(dict_param['theta'])
        output['alg'] = get_setting_label_PrefixSearch(dict_param)
        if output['alg'] not in target_alg_list: continue
        output['data_name'] = row['Dataset_Name']
        output['Num_QS_Result'] = int(row['Num_QS_Result'])
        output['Num_TS_Result'] = int(row['Num_TS_Result'])
        output['Num_Result'] = int(row['Num_Result'])        
        output['len_rhs'] = int(row['Dataset_Name'].split('_')[7][1:])
        for key in ['Num_QS_Result', 'Num_TS_Result', 'Num_Result', 'Num_QS_Verified', 'Num_TS_Verified', ]:
            output[key] = int(row[key])
        for key in ['Time_Total', 'Time_QS_Total', 'Time_TS_Total', 'Time_QS_IndexFilter', 'Time_TS_IndexFilter', 'Time_SearchPerQuery_MEAN']:
            output[key] = float(row[key])/1000
        output['Time_TS_MEAN'] = output['Time_TS_Total']/100
        output_list.append(output)
df = pd.DataFrame.from_dict(output_list)
df = df[df.theta==0.7].groupby(by=['alg', 'len_rhs'], as_index=False).mean()
df.alg = pd.Categorical(df.alg, categories=target_alg_list, ordered=True)
df = df.sort_values(by=['alg', 'len_rhs'])
df

In [None]:
plt.subplots(nrows=1, ncols=5, figsize=[25,5])
plt.subplot(151); ax = sb.lineplot(x='len_rhs', y='Time_SearchPerQuery_MEAN', data=df, hue='alg')
# plt.subplot(152); ax = sb.lineplot(x='len_rhs', y='Time_QS_Total', data=df, hue='alg')
# plt.subplot(153); ax = sb.lineplot(x='len_rhs', y='Time_TS_Total', data=df, hue='alg')
plt.subplot(154); ax = sb.lineplot(x='len_rhs', y='Time_TS_MEAN', data=df, hue='alg')
plt.subplot(155); ax = sb.lineplot(x='len_rhs', y='Num_TS_Verified', data=df, hue='alg')

In [None]:
print('RSS-C is faster than RSS-P by:', np.array(df[df.alg=='RSS-P']['Time_TS_MEAN'])/np.array(df[df.alg=='RSS-C']['Time_TS_MEAN']))
print('RSS-CP is faster than RSS-C by:', np.array(df[df.alg=='RSS-C']['Time_TS_MEAN'])/np.array(df[df.alg=='RSS-CP']['Time_TS_MEAN']))
print('RSS-P prunes more than RSS-C by:', np.array(df[df.alg=='RSS-C']['Num_TS_Verified'])/np.array(df[df.alg=='RSS-P']['Num_TS_Verified']))
print('RSS-CP prunes more than RSS-P by:', np.array(df[df.alg=='RSS-P']['Num_TS_Verified'])/np.array(df[df.alg=='RSS-CP']['Num_TS_Verified']))

In [None]:
class TimeByRhsPlotGenerator(PlotGenerator):

    def __init__(self, data_info, theta, ymax_offset, y='Time_SearchPerQuery_MEAN', ylabel='Avg. exe. time \(sec\)'):
        super(TimeByRhsPlotGenerator, self).__init__(data_info, theta, ymax_offset)
        self.goal = 'time_by_rhs'
        self.xlabel = 'RHS size'
        self.xrange = '[1:5]'
        self.xlog = False
        self.ylog = False
        self.y = y
        self.ylabel = ylabel
        self.tidx_offset = 1
        
    def load_df(self):
        self.df = df
    
    def parse_row(self, row):
        return '{}\t{}\n'.format(row.len_rhs, row.__getattribute__(self.y))
    
    def additional_settings(self, f):
        f.write('set xtics 1,1,5\n')
        f.write('set yrange [0:{}]\n'.format(5+self.ymax_offset))
    
    def write_result(self):
        self.path_result = join('result', '{}.result'.format(self.output_name))
        df = self.df
        alg_list = list(df.alg.unique())
        with open(self.path_result, 'w') as f:
            for alg in alg_list:
                f.write("# {}\n".format(alg))
                df_target = df[df.alg==alg]
                for row in df_target.itertuples():
                    self.update_xmax(row)
                    self.update_ymax(row)
                    f.write(self.parse_row(row))
                f.write('\n\n')
                self.title_list.append(alg if df_target.size > 0 else None)

In [None]:
class NumByRhsPlotGenerator(TimeByRhsPlotGenerator):
    
    def __init__(self, data_info, theta, ymax_offset, y='Time_SearchPerQuery_MEAN', ylabel='Avg. exe. time \(sec\)'):
        super(NumByRhsPlotGenerator, self).__init__(data_info, theta, ymax_offset, y, ylabel)
        self.goal = "{}_by_rhs".format(y)

    def parse_row(self, row):
        return '{}\t{}\n'.format(row.len_rhs, row.__getattribute__(self.y)/1e7)
    
    def additional_settings(self, f):
        f.write('set xtics 1,1,5\n')
        f.write('set yrange [0:{}]\n'.format(2.1+self.ymax_offset))
        f.write('set ytics 0,0.5,2\n')

In [None]:
# DataInfo = collections.namedtuple('DataInfo', ['name', 'size', 'nr', 'qlen', 'lr', 'nar'])

In [None]:
# TimeByRhsPlotGenerator(DataInfo('SYN', '1000000', '100000', '5', '1.0', '-1'), 0, ymax_offset=-2.5).run()
TimeByRhsPlotGenerator(DataInfo('SYN', '1000000', '100000', '5', '1.0', '-1'), 0, ymax_offset=1, y='Time_TS_MEAN').run()

In [None]:
# NumByRhsPlotGenerator(DataInfo('SYN', '1000000', '100000', '5', '1.0', '-1'), 0, ymax_offset=0, y='Num_QS_Verified', ylabel='# Verified Pairs (Query-side)').run()

In [None]:
# NumByRhsPlotGenerator(DataInfo('SYN', '1000000', '100000', '5', '1.0', '-1'), 0, ymax_offset=5, y='Num_TS_Verified', ylabel='# Verified Pairs').run()
NumByRhsPlotGenerator(DataInfo('SYN', '1000000', '100000', '5', '1.0', '-1'), 0, ymax_offset=0.5, y='Num_TS_Verified', ylabel='# Verified Pairs (10^7)').run()