In [None]:
from os.path import basename, join, splitext
import itertools
import matplotlib.pyplot as plt
from numpy import ceil, log10
import pandas as pd
import seaborn as sb
from subprocess import call
%matplotlib inline
%run util.ipynb

In [None]:
class PlotGenerator:
    pt_list = [1,2,8,4,6,12,3]
    
    def __init__(self, data_info, theta):
        self.data_info = data_info
        self.theta = theta
        
#         self.goal = None
#         self.xlabel = None
#         self.ylabel = None
#         self.xrange = None
        self.xmax = 0
        self.ymax = 0
        self.ymax_offset = 1
        self.xlog = True
        
        self.title_list = []
   
    def execute(data_info, theta):
        if data_info.size == '*': TimeBySizePlotGenerator(data_info, theta).run()
        elif data_info.qlen == '*': TimeByQlenPlotGenerator(data_info, theta).run()
        elif data_info.nr == '*': TimeByNrulePlotGenerator(data_info, theta).run()

    def run(self):
        self.output_name = '{}__{}__{:.1f}'.format(self.goal, self.data_info.name, self.theta).replace('.', '_')
        self.load_df()
        self.write_result()
        self.output_script()
        self.output_plot()
        
    def load_df(self):
        pass
        
    def parse_row(self, row):
        pass

    def update_xmax(self, row):
        pass
    
    def update_ymax(self, row):
        self.ymax = max(self.ymax, row.Time_SearchPerQuery_MEAN)
    
    def additional_settings(self, f):
        pass
    
    def write_result(self):
        self.path_result = join('result', '{}.result'.format(self.output_name))
        df = self.df
        with open(self.path_result, 'w') as f:
            for setting in setting_list:
                self.title_list.append(dict_title[setting])
                f.write("# {}\n".format(setting))
                df_target = df[df.theta==self.theta][df.setting==setting]
                for row in df_target.itertuples():
                    self.update_xmax(row)
                    self.update_ymax(row)
                    f.write(self.parse_row(row))
                f.write('\n\n')
    
    def output_script(self):
        self.path_script = join('script', '{}.plot'.format(self.output_name))
        self.path_plot = join('plot', '{}.pdf'.format(self.output_name))
        with open(self.path_script, 'w') as f:
#             f.write('set terminal font \",20\"\n')
            f.write('set xlabel \"{}\" font \",20\"\n'.format(self.xlabel))
            f.write('set ylabel \"{}\" font \",20\"\n'.format(self.ylabel))
            f.write('set xtics font \",16"\n')
            f.write('set ytics font \",16\"\n')
            f.write('set xrange {}\n'.format(self.xrange))
            f.write('set yrange [*:{}]\n'.format(10**(ceil(log10(self.ymax))+self.ymax_offset)))
            f.write('set key vertical maxrows 4\n')
            if self.xlog:
                f.write('set logscale x\n')
                f.write('set format x \"10^{%L}\"\n')
            f.write('set logscale y\n')
            f.write('set format y \"10^{%L}\"\n')
            f.write('set size 0.6,0.6\n')
            self.additional_settings(f)
            

            f.write('set term postscript\n')
            f.write('set output\"| ps2pdf - {}\"\n'.format(self.path_plot))

            cmd_list = []
            for idx, title in enumerate(self.title_list):
                cmd = '\"{}\" index {} with linespoints lc \"black\" lw 2 ps 1.5 dt {} pt {} title \"{}\"'.format(
                self.path_result, idx, idx+1, PlotGenerator.pt_list[idx%len(PlotGenerator.pt_list)], self.title_list[idx])
                cmd_list.append(cmd)
            f.write('plot\\\n'+',\\\n'.join(cmd_list))    #',\\\n'.join(cmd_list)
    
    def output_plot(self):
        call(['gnuplot', self.path_script])
        call(['sleep', '0.3'])
        call(['pdfcrop', self.path_plot, 'tmp_plot'])
        call(['mv', 'tmp_plot', self.path_plot])

In [None]:
class TimeBySizePlotGenerator(PlotGenerator):

    def __init__(self, data_info, theta):
        super(TimeBySizePlotGenerator, self).__init__(data_info, theta)
        self.goal = 'time_by_size'
        self.xlabel = '#records'
        self.ylabel = 'Mean query time \(sec\)'
        self.xrange = '[10000:1000000]'
        self.xlog = True

    def load_df(self):
        self.df = df_time_by_size(self.data_info)
    
    def parse_row(self, row):
        return '{}\t{}\n'.format(row.n, row.Time_SearchPerQuery_MEAN)

In [None]:
class TimeByQlenPlotGenerator(PlotGenerator):

    def __init__(self, data_info, theta):
        super(TimeByQlenPlotGenerator, self).__init__(data_info, theta)
        self.goal = 'time_by_qlen'
        self.xlabel = 'Query length'
        self.ylabel = 'Mean query time \(sec\)'
        self.xrange = '[1:9]'
        self.xlog = False

    def load_df(self):
        self.df = df_time_by_qlen(self.data_info)
    
    def parse_row(self, row):
        return '{}\t{}\n'.format(row.qlen, row.Time_SearchPerQuery_MEAN)
    
    def additional_settings(self, f):
        f.write('set xtics 1,2,9\n')

In [None]:
class TimeByNrulePlotGenerator(PlotGenerator):

    def __init__(self, data_info, theta):
        super(TimeByNrulePlotGenerator, self).__init__(data_info, theta)
        self.goal = 'time_by_nr'
        self.xlabel = '#rules'
        self.ylabel = 'Mean query time \(sec\)'
        self.xrange = '[1000:110000]'
        self.xlog = True
        self.ymax_offset = 2
        
    def load_df(self):
        self.df = df_time_by_nr(self.data_info)
    
    def parse_row(self, row):
        return '{}\t{}\n'.format(row.nr, row.Time_SearchPerQuery_MEAN)

In [None]:
def plot_verify_by_alg(data_info, theta):
    goal = 'verify_by_alg'
    df = df_time_by_filtering(data_info)
    output_name = '{}__{}__{:.1f}'.format(goal, data_info.name, theta).replace('.', '_')
    df_target = df[df.theta==theta].sort_values(['setting'])
    title_list = []

    path_result = join('result', '{}.result'.format(output_name))
    with open(path_result, 'w') as f:
        for idx, row in enumerate(df_target.itertuples()): 
            try: 
                title = dict_title[row.setting]
                title_list.append(title)
                f.write('{}\t{}\t{}\t{}\t{}\n'.format(idx, title, row.Num_Verified, row.Num_QS_Verified, row.Num_TS_Verified))
            except: pass

    path_script = join('script', '{}.plot'.format(output_name))
    path_plot = join('plot', '{}.pdf'.format(output_name))
    with open(path_script, 'w') as f:
        f.write('set style fill solid border\n')
#         f.write('set xlabel font \",20\"\n')
        f.write('set ylabel \"#Verified pairs\" font \",20\"\n')
        f.write('set xtics font \",16"\n')
        f.write('set ytics font \",16\"\n')

        f.write('set logscale y\n')
        f.write('set format y \"10^{%L}\"\n')
    #     f.write('set style fill pattern border -1\n')
        f.write('set boxwidth 0.9\n')
        f.write('set xtics format \"\"\n')
        f.write('set size 0.6,0.6\n')

        f.write('set term postscript\n')
        f.write('set output\"| ps2pdf - {}\"\n'.format(path_plot))

        cmd_list = []
        for idx, title in enumerate(title_list):
            cmd = '\"{}\" every ::{}::{} using 1:3 with boxes fs pattern {} lw 2 title \"{}\"'.format(
            path_result, idx, idx, idx+5, title)
            cmd_list.append(cmd)
        f.write('plot\\\n'+',\\\n'.join(cmd_list))

    call(['gnuplot', path_script])
    call(['sleep', '0.3'])
    call(['pdfcrop', path_plot, 'tmp_plot'])
    call(['mv', 'tmp_plot', path_plot])