# Experiment Results

## Create Figures and Tables for Latex from ExperimentResults.ipynb


In [None]:
import sys
!{sys.executable} -m pip install --upgrade pip
!{sys.executable} -m pip install pandas markdown matplotlib import-ipynb
from IPython.core.display import display, HTML

#display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
import import_ipynb
import pandas as pd
import matplotlib.pyplot as plt
import random
from IPython.display import display, Markdown
from ExperimentResults import *;




In [None]:
def readCSV(loc, indexcol):
    return pd.read_csv(loc, index_col=indexcol).rename(columns=str.lower)

def readCSVs(folder, indexcol):
    df_main=readCSV(folder+"/experimentResultsMain.csv", indexcol='run_nr')
    df_comp=readCSV(folder+"/experimentResultsComponents.csv", indexcol='run_nr')
    return (df_main, df_comp)

def aligned(df1, df1_comp, df2, df2_comp):
    length1=df1.index.max()+1
    length2=df2.index.max()+1

    if length1 != length2:
        if length1>length2: #more df1 results than df2 results
            df1=df1[:length2]
            df1_comp=df1_comp[:length2]
        else: #more rfc than ls results
            df2=df2[:length1]
            df2_comp=df2_comp[:length1]
    return (df1, df1_comp, df2, df2_comp)

def readAndAlignCSVs(folder1, folder2, indexcol):
    df1, df1_comp=readCSVs(folder1, indexcol)
    df2, df2_comp=readCSVs(folder2, indexcol)
    return aligned(df1, df1_comp, df2, df2_comp)

In [None]:
dfs={}
plots=[]
for parser in sorted(parsers):
    dfs[parser], plot=compareGrammarResults(parser, ls_df, "Living Standard", rfc_df, "RFC")
    plots+=[plot]
    

# focusing on a range
#for parser in parsers:
#    dfs[parser]=compareGrammarResults(parser, ls_df, "Living Standard", rfc_df, "RFC", (0,10))
    



The cells below are used to create data representations in latex-friendly formatting.

In [None]:
def createMultiOverview(dfs, parsers, pdfname, includeOther=False):
    ffexcov=74.7 
    ffwptcov=84.4
    chrexcov=82.71
    chrwptcov=64.29
    
    plt.rcParams['figure.constrained_layout.use'] = True
    plt.rcParams['xtick.minor.size'] = 0
    plt.rcParams['xtick.minor.width'] = 0
    
    
    figure, axs = plt.subplots(6,2, figsize=(40,60))
    plot_index=0
    axs[-1, -1].axis('off')
    for loc in axs.flat:
        try:
            new_df=dfs[parsers[plot_index]]
            plot_index+=1
        except:
            break
        
        name=parsers[plot_index-1].replace('script', 'script ')
        
        plot=new_df.plot(title=name.capitalize()+"\n",ylim=(0,100),
                         ax=loc, style=".", color=colors_comp, ms=ms/2, legend=False, logx=True)
        plot.set_xscale('symlog', linthreshx=1, linscalex=0.08)
        
        plot.margins(x=0.025)
        
        xlim=dfs[parsers[plot_index-1]].index.max()
        if includeOther:
            if parsers[plot_index-1]=='firefox': 
                plot.hlines(y=ffexcov, xmin=0, xmax=xlim, color='black', linestyle=':', lw=4, label='Existing Test Files')
                plot.hlines(y=ffwptcov, xmin=0, xmax=xlim, color='y', linestyle=':', lw=4, label='WPT tests')
            if parsers[plot_index-1]=='chromium':
                plot.hlines(y=chrexcov, xmin=0, xmax=xlim, color='black', linestyle=':', lw=4, label='Existing Test Files')
                plot.hlines(y=chrwptcov, xmin=0, xmax=xlim, color='y', linestyle=':', lw=4, label='WPT tests')
                
            
        vals = loc.get_yticks()
        loc.set_yticklabels(['{:.0f}'.format(x)+"%" for x in vals])
        plot.grid(True)
        plot.set_xlabel("Run")
        #plot.legend(loc='best',markerscale=2.)

    use_labels=[]
    use_handles=None
    for ax in figure.get_axes():
        handles, labels = ax.get_legend_handles_labels()
        if len(labels)>len(use_labels):
            use_labels=labels
            use_handles=handles
        if ax != axs[4][1]:
            if ax != axs[5][0]:
                ax.label_outer()
            else:
                valsx = ax.get_xticks()
                ax.set_xticklabels(['{:.0f}'.format(x) for x in valsx])
        else:
            ax.set_yticklabels(['' for x in vals])
            valsx = ax.get_xticks()
            ax.set_xticklabels(['{:.0f}'.format(x) for x in valsx])
            #handles, labels = ax.get_legend_handles_labels()
            figure.legend(use_handles, use_labels, loc=(0.6,0.1), markerscale=4.)



    plt.savefig(pdfname)
#'ov_detail.pdf'
    

In [None]:
def create_comp_table(comp_df, b_df, steps=100, components=['success', 'scheme', 'username', 
                                                            'password', 'host', 'port', 'path', 
                                                            'query', 'fragment', 'reject']):
    tab_df=comp_df
    col=b_df['nr-inputs']
    tab_df['nr_inputs_unique']=col
    
    runs=tab_df.index.max()
    f='l'
    #make sure table fits on latex page: 1st column p{3.1cm}, 12+1 entries
    while runs//steps>=11:
        f='p{3.1cm}'
        steps+=4
    tab_df=tab_df.reindex(sorted(tab_df.columns), axis=1)
    last_elem=tab_df.tail(1)
    tab_df=tab_df[::steps].append(last_elem).transpose()
    

    
    for c in tab_df.columns:
        f+='r'
    table=tab_df.to_latex(column_format=f, float_format="{:.0f}".format)
    for col in comp_df.columns:
        for comp in components:
            if col.count(comp)>1:
                tcol=col.replace('_', '\_')
                table=table.replace(tcol, '\\textbf{'+tcol+'}')
    return table

In [None]:
def fixCapitalization(text, find=[], replacement=[]):
    newtext=''
    for line in text.split('\n'):
        for (s, r) in zip(find, replacement):
            line=line.replace(s, r)
        newtext+='\n'+line.capitalize()
    return newtext


In [None]:
def createMaxCovBarDiagram(dfs, pdfname):   
    plt.rcParams['font.size']='40'
    figx, figy=(35,20)
    labelrot=0
    
    columns=['Living Standard', "RFC"]
    columns2=['Living Standard', 'ls max run', "RFC", 'rfc max run']
    try:
        test=dfs[parsers[0]].tail(1)['RFC'].iloc[0]
    except:
        columns=['Living Standard']
        columns2=['Living Standard', 'ls max run']
        figx=30
        figy=15
        #labelrot=15
    
    
    overview_df=pd.DataFrame(index=[d for d in dfs], columns=columns)
    for p in dfs:
        for c in columns:
            overview_df.loc[p][c]=dfs[p].tail(1)[c].iloc[0]
    
    fig, axs=plt.subplots(figsize=(figx,figy)) 
    
    overview_df=overview_df.sort_index()  
    plot=overview_df.plot(kind='bar', title="",ylim=(0,100),
                          color=colors_comp, ax=axs)#, alpha=0.85
    plot.grid(True, alpha=0.5, zorder=2)
    
    display(overview_df)
    #axs.set_ylabel("Coverages", fontsize=40)
    vals = axs.get_yticks()
    axs.set_yticklabels(['{:.0f}'.format(x)+"%" for x in vals])
    
    labels = [item.get_text() for item in axs.get_xticklabels()]
    blabels=[]
    for label in labels:
        if len(columns)<2:
            label=label.replace('javascript', 'JS\n')
        blabels+=["\n"+label.replace('script', 'script\n').capitalize()]
    axs.set_xticklabels(blabels)
    plt.xticks(rotation=labelrot, horizontalalignment="center")
    axs.set_axisbelow(True)
    
    
    
    for i in plot.patches:
    # get_width pulls left or right; get_y pushes up or down +i.get_width()/6
        
        plot.text(i.get_x()+i.get_width()//2+0.055, i.get_height()+0.5, 
            str(i.get_height())+'%', fontsize=40,
                  horizontalalignment="left",
                  verticalalignment="bottom", rotation='vertical', 
                  family='sans serif', color='black', zorder=3)
    
    
    plt.savefig(pdfname)#'ov_max_bar.pdf'
    plt.show()
    
    
    overview_df=pd.DataFrame(index=[d for d in dfs], columns=columns2)
    for p in dfs:
        overview_df.loc[p]['Living Standard']=dfs[p].tail(1)['Living Standard'].iloc[0]
        overview_df.loc[p]['ls max run']=dfs[p]['Living Standard'].idxmax()
        if len(columns2) >2:
            overview_df.loc[p]['RFC']=dfs[p].tail(1)['RFC'].iloc[0]
            overview_df.loc[p]['rfc max run']=dfs[p]['RFC'].idxmax()
    
    for c in overview_df.columns:
        overview_df[c]=pd.to_numeric(overview_df[c])
   
    display(overview_df.describe())
    return overview_df
    
    
    

In [None]:
def createCovRunTable(overview_df):
    for col in overview_df.columns:
        display(overview_df.sort_values(by=col, ascending=False))

    overview_df=overview_df.sort_values(by='Living Standard', ascending=False)
    print(fixCapitalization(overview_df.to_latex(column_format='lcccc',float_format="{:.2f}%".format), ['script', '.00'], ['script ', '']))

In [None]:
def createComponentErrorPercentageTable(ls_df, ls_df_comp, rfc_df, rfc_df_comp):
    err_df=browserComponentDetail('firefox', ls_df_comp, abs_inputs=int(ls_df.tail(1)['nr-inputs']), 
                                 components=['success', 'scheme', 'username', 'password', 'host',
                                             'port', 'path', 'query', 'fragment', 'reject'])
    if rfc_df_comp is not None:
        err_df3=browserComponentDetail('firefox', rfc_df_comp, abs_inputs=int(rfc_df.tail(1)['nr-inputs']), 
                                     components=['success', 'reject'])
        err_df['firefox rfc']=err_df3['firefox']

    err_df2=browserComponentDetail('chromium', ls_df_comp,abs_inputs=int(ls_df.tail(1)['nr-inputs']), 
                                 components=['success', 'scheme', 'username', 'password', 'host',
                                             'port', 'path', 'query', 'fragment', 'reject'])
    err_df['chromium']=err_df2['chromium']
    
    if rfc_df_comp is not None:
        err_df4=browserComponentDetail('chromium', rfc_df_comp,abs_inputs=int(rfc_df.tail(1)['nr-inputs']), 
                                     components=['success', 'reject'])
        err_df['chromium rfc']=err_df4['chromium']
    display(err_df)

    inputsls=int(ls_df.tail(1)['nr-inputs'].max())
    
    rfccap=' unique inputs'
    if rfc_df is not None:
        inputsrfc=int(rfc_df.tail(1)['nr-inputs'].max())
        rfccap=' (Living Standard-based) and '+str(inputsrfc)+'(RFC-based) unique inputs'
        print(str(ls_df.tail(1).index.max())+" = "+str(rfc_df.tail(1).index.max()))
    caption='for run '+str(ls_df.tail(1).index.max())+' with '+str(inputsls)+rfccap
    
    
    print(err_df.to_latex(column_format='lcccc',
        float_format="{:.2%}".format).replace("nan\%", "-").replace('.00',''))
    print('\caption[Browser component errors]{Browser component errors '+caption+'}')

In [None]:
def createErrorPercentageTable(ls_df, rfc_df):
    e_f_df=pd.DataFrame()
    
    e_df=errorsOverviewPlot(ls_df, "Living Standard Exceptions Overview\n")
    e_f_df['Living Standard']=e_df.tail(1).transpose()[ls_df.tail(1).index.max()]
    
    if rfc_df is not None:
        e_df2=errorsOverviewPlot(rfc_df, "RFC Exceptions Overview\n")
        e_f_df['RFC']=e_df2.tail(1).transpose()[rfc_df.tail(1).index.max()]
        print(str(ls_df.tail(1).index.max())+" = "+str(rfc_df.tail(1).index.max()))
    
    display(e_f_df)
    
    inputsls=int(ls_df.tail(1)['nr-inputs'].max())
    
    rfccap=' unique inputs'
    if rfc_df is not None:
        inputsrfc=int(rfc_df.tail(1)['nr-inputs'].max())
        rfccap=' (Living Standard-based) and '+str(inputsrfc)+'(RFC-based) unique inputs'
        print(str(ls_df.tail(1).index.max())+" = "+str(rfc_df.tail(1).index.max()))
    caption='for run '+str(ls_df.tail(1).index.max())+' with '+str(inputsls)+rfccap
    
    print(fixCapitalization(e_f_df.sort_values(by='Living Standard').to_latex(column_format='lcc',float_format="{:.2f}%".format), ['-exceptions', 'script', '.00'], ['', 'script ', '']))
    print('\caption[Parser error rates]{Parser error rates '+caption+'}')

In [None]:
def produceExperimentResults(ls_df, ls_df_comp, rfc_df, rfc_df_comp, pdfnamesuffix, stepsize):
    dfs={}
    plots=[]
    for parser in sorted(parsers):
        dfs[parser], plot=compareGrammarResults(parser, ls_df, "Living Standard", rfc_df, "RFC")
        plots+=[plot]

    createMultiOverview(dfs, sorted(parsers), "ov_detail"+pdfnamesuffix+".pdf", True)
    
    print('rfc comp table')
    print(create_comp_table(rfc_df_comp, rfc_df, stepsize))
    print('ls comp table')
    print(create_comp_table(ls_df_comp, ls_df, stepsize))

    res_df=createMaxCovBarDiagram(dfs, "ov_max_bar"+pdfnamesuffix+".pdf")
    createCovRunTable(res_df)

    createComponentErrorPercentageTable(ls_df, ls_df_comp, rfc_df, rfc_df_comp)

    createErrorPercentageTable(ls_df, rfc_df)
    

## Basic Experiment Results

In [None]:


ls_df, ls_df_comp, rfc_df, rfc_df_comp=readAndAlignCSVs('./ls', './rfc', 'run_nr')


produceExperimentResults(ls_df, ls_df_comp, rfc_df, rfc_df_comp, "base", 100)


## Small Test Set Size Experiment Results

In [None]:

small_ls_df, small_ls_df_comp, small_rfc_df, small_rfc_df_comp=readAndAlignCSVs('./smallExp/ls', './smallExp/rfc', 'run_nr')

produceExperimentResults(small_ls_df, small_ls_df_comp, small_rfc_df, small_rfc_df_comp, "small", 10)

## Large Test Set Size Experiment Results

In [None]:
def produceSingleGrammarExperimentResults(ls_df, ls_df_comp, stepsize, pdfnamesuffix):
    dfs={}
    plots=[]
    
    for parser in sorted(parsers):
        dfs[parser], plot=compareGrammarResults(parser, ls_df, "Living Standard", None, "") 
        plots+=[plot]

    createMultiOverview(dfs, sorted(parsers), "ov_detail"+pdfnamesuffix+".pdf", True)
    
    print('comp table')
    print(create_comp_table(ls_df_comp, ls_df, stepsize))

    res_df=createMaxCovBarDiagram(dfs, "ov_max_bar"+pdfnamesuffix+".pdf") #don't use bar diagram but use res_df
    createCovRunTable(res_df)

    createComponentErrorPercentageTable(ls_df, ls_df_comp, None, None)

    createErrorPercentageTable(ls_df, None)

In [None]:

large_ls_df, large_ls_df_comp=readCSVs('./largeTestSetSize', indexcol='run_nr')
produceSingleGrammarExperimentResults(large_ls_df, large_ls_df_comp, 1, "large")
