# Experiment Results

## Details

- generation of inputs with [tribble](https://github.com/havrikov/tribble/)
- inputs generated from two grammars, one based on the current [URL standard](https://url.spec.whatwg.org/), the other one based on the RFC documents [RFC 3986](https://tools.ietf.org/html/rfc3986#appendix-A) and [RFC 6874](https://tools.ietf.org/html/rfc6874)
- experiment executed for both grammars with identical docker images
- for each run: 10 inputs selected at random from the generated inputs and added to the tests to execute
- run x contains all inputs used in runs 0..x-1 and 10 newly selected inputs
- execution of tests for 11 URL parsers:


*Language* | *URL Parser* | *Coverage Tool* 
--- | --- | ---
Firefox | nsURLParsers.cpp | grcov/genhtml
Chromium | url_parse.cc | coverage.py script
C | uriparser | LCOV 
C++ | POCO |LCOV 
Go | Package net/url | Package testing with coverage flags 
Java | java.net URL Class | JCOV 
JavaScript <br> | urijs <br> jsdom/whatwg-url| nyc/istanbul <br>
PHP | League URI | PHPUnit  
Python 3| urllib.parse | Coverage.py  
Ruby | Module URI |SimpleCov 

In [None]:
import sys
!{sys.executable} -m pip install pandas markdown matplotlib
from IPython.core.display import display, HTML
#display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import random
from IPython.display import display, Markdown


ls_df=pd.read_csv("./ls/experimentResultsMain.csv", index_col='run_nr').rename(columns=str.lower)

ls_df_comp=pd.read_csv("./ls/experimentResultsComponents.csv", index_col='run_nr').rename(columns=str.lower)

rfc_df=pd.read_csv("./rfc/experimentResultsMain.csv", index_col='run_nr').rename(columns=str.lower)
rfc_df_comp=pd.read_csv("./rfc/experimentResultsComponents.csv", index_col='run_nr').rename(columns=str.lower)

pd.set_option('display.float_format', lambda x: '%.3f' % x)

parsers=[c[:-4] for c in ls_df.columns.values if "-cov" in c]
colors_all = ['orange', 'darkblue', 'red', 'yellow', 'lightblue', 'purple', 'beige', 'grey', 'black', 'pink','green']
colors_map={}
chars = '0123456789ABCDEF'
# coloring
i=0
for p in parsers:
    c=colors_all[i] 
    colors_map[p]=c
    
    i+=1

comp_colors={}
for col in ls_df_comp.columns:
    if '_success-' in col:
        comp_colors[col]='#0'+''.join(random.sample(chars,5))
    elif '_success' in col:
        comp_colors[col]='#8'+''.join(random.sample(chars,5))
    else:
        comp_colors[col]='#F'+''.join(random.sample(chars,5))
for col in rfc_df_comp.columns:
    if col not in comp_colors:
        comp_colors[col]='#7'+''.join(random.sample(chars,5))
    
    
colors_comp = ['darkgreen', 'blue']
ms=22

def setTickDistances(n, axs):
    ticks = axs.xaxis.get_ticklocs()
    ticklabels = [l.get_text() for l in axs.xaxis.get_ticklabels()]
    axs.xaxis.set_ticks(ticks[::n])
    axs.xaxis.set_ticklabels(ticklabels[::n])
    return


## Living Standard Results
### Tabular View

In [None]:
ls_df.head(10)

In [None]:
ls_df.tail(10)

### Overview Plot 

In [None]:
def overviewPlot(df, title):
    fig, axs=plt.subplots(figsize=(40,20))
    plt.rcParams['font.size']='40'

    for p in parsers:
        plot=df[p+"-cov"].plot(title=title,
                       ylim=(0,100), ax=axs,
                       color=colors_map[p], style=".", ms=2*ms,
                      label=p.capitalize())
    


    plot.legend(bbox_to_anchor=(1,-0.2),markerscale=2.)
    plot.grid(True)

    axs.set_ylabel("Coverages", fontsize=40)
    axs.set_xlabel("Run", fontsize=40)
    plt.show()

In [None]:
overviewPlot(ls_df,"Living Standard Coverages Overview\n" )

### Errors and Exceptions

In [None]:
def errorsOverviewPlot(df, title):
    fig, axs=plt.subplots(figsize=(7,10))
    plt.rcParams['font.size']='30'


    new_df=pd.DataFrame()
    for c in df.columns:
        if 'exceptions' in c:
            new_df[c]=ls_df[c]
        
        

    plot=df.tail(1).plot(kind='bar', title=title,
                       ax=axs, width=0.9,
                       y=[ cn for cn in df.columns if 'exceptions' in cn],
                       color=colors_map.values(), rot=0) 
    
    plot.legend(markerscale=2., loc=(1.2,0))
    plot.grid(True)


    for bar in plot.patches:
        plot.annotate(format(bar.get_height(), '.0f'), 
                   (bar.get_x() + bar.get_width() / 2, bar.get_height()),
                    ha='center',  xytext=(0, 10),
                   textcoords='offset points', color='dimgrey')


    
    axs.set_ylabel("Exceptions", fontsize=40)
    axs.set_xlabel("Run", fontsize=40)
    axs.set_axisbelow(True)
    plt.show()

In [None]:

errorsOverviewPlot(ls_df, "Living Standard Exceptions Overview\n")

### Equal and Unequal Results

In [None]:
def equalResultsOverview(df, title):
    fig, axs=plt.subplots(figsize=(30,15))
    plt.rcParams['font.size']='30'

    new_df=pd.DataFrame()
    b_new_df=pd.DataFrame()
    for c in df.columns:
        if 'result' in c:
            if 'b-' in c:
                b_new_df[c]=df[c]
            else:
                new_df[c]=df[c]
    
    
    plot=new_df.plot(kind='bar', stacked=True, ax=axs, rot=0,
                     color=['g', 'pink', 'r'], title=title)
    
    for container, hatch in zip(plot.containers, ("/", ".")):
        for patch in container.patches:
            patch.set_hatch(hatch)
    
    n=df['nr-inputs'].max()//10
    setTickDistances(n, axs)
    
    plot.grid(True)
    plot.legend(loc='best')
    axs.set_ylabel("Inputs", fontsize=40)
    axs.set_axisbelow(True)
    plt.show()
    return b_new_df

In [None]:
ls_b_new_df=equalResultsOverview(ls_df, "Living Standard Parser Equality\n")

In [None]:
def browserEquality(df, title):
    fig, axs=plt.subplots(figsize=(30,15))
    plt.rcParams['font.size']='30'
    plot=df.plot(kind='bar', stacked=True, rot=0, ax=axs, 
            color=['g', 'pink', 'orange', 'lightblue', 'r'], title=title)
    axs.set_ylabel("Inputs", fontsize=40)
    plot.grid(True)
    axs.set_axisbelow(True)
    for container, hatch in zip(plot.containers, ("/", ".", "\\", "x")):
        for patch in container.patches:
            patch.set_hatch(hatch)
    nr_inputs=sum([df[col].tail(1) for col in df.columns if 'result' in col])
    n=int(nr_inputs)//10
    setTickDistances(n, axs)
    plot.grid(True)
    plot.legend(loc='best')
    plt.show()

In [None]:
browserEquality(ls_b_new_df, "Living Standard Browser Results Equality\n")

#### Browser Components

In [None]:
def browserComponentComparison(df_comp, title):
    fig, axs=plt.subplots(figsize=(30,15))
    plt.rcParams['font.size']='30'

    new_df=pd.DataFrame()
    for c in df_comp.columns:
        if 'nr' not in c:
            new_df[c]=df_comp[c]
    
    colors=[comp_colors[name] for name in new_df.columns ]
    
    plot=new_df.plot(kind='bar', stacked=True, ax=axs, rot=0, color=colors ,
                     title=title)
    axs.set_ylabel("Inputs", fontsize=40)
    nr_inputs=sum([new_df[col].tail(1) for col in new_df.columns])
    n=int(nr_inputs)//10
    setTickDistances(n, axs)
    plot.grid(True)
    axs.set_axisbelow(True)
    plt.show()

In [None]:
browserComponentComparison(ls_df_comp, "Living Standard Browser Component Equality\n")

#### Separately for each Browser

Considering the latest available run for each figure.

In [None]:
def browserComponentDetail(browser, df, run_nr=-1):
    
    # select the specified run, default: last run
    if run_nr < 0 :
        df=df.tail(1)
    else:
        df=df.head(run_nr+1).tail(1)
    fig, axs=plt.subplots( figsize=(25,15))
    plt.rcParams['font.size']='30'
    
    full_df=pd.DataFrame()
    for component in ['scheme', 'username', 'password', 'host', 'port', 'path', 'query', 'fragment']:
        # count the relevant columns
        inputs=int(df['nr_inputs'])
        new_df=pd.DataFrame()
        
        sumcomponent=0
        for c in df.columns:
            if browser+'_'+component in c:
                new_df[c]=df[c]
                sumcomponent+=int(df[c])
        
        new_df['component']=[component]
        new_df['other']=[inputs-sumcomponent]
        full_df=pd.concat([full_df, new_df], ignore_index=True, sort=True)
    display(full_df.head(10))
    comp_colors2=comp_colors
    comp_colors2['other']='dimgrey'
    
    colors=[comp_colors[name] for name in full_df.columns if name != 'component']
    
    plot=full_df.plot( kind='bar', stacked=True, ax=axs,x='component',
                         color=colors,
                         rot=0, title=browser.capitalize()+" Details\n")
    plt.show()
    

View the full results of this run [here](./ls/lastRun/resultOverview.html).

In [None]:
browserComponentDetail('firefox', ls_df_comp)
browserComponentDetail('chromium', ls_df_comp)

#### Parsing Success and Rejection

A parsing success means that the parser accepted the given input as valid URL and all URL components had the expected content. A rejection means that the parser did not accept the given input as valid URL and no component checks were performed. Between these outcomes lies a third possible outcome: the parser accepts the given input as valid URL but at least one component did not have the expected content.

The following figures show which result one browser had when the other browser successfully parsed a URL or when it rejected the URL. 

In [None]:
def browserDetail(browser, df, run_nr=-1):
    
    # select the specified run, default: last run
    if run_nr < 0 :
        df=df.tail(1)
    else:
        df=df.head(run_nr+1).tail(1)
    fig, axs=plt.subplots( figsize=(10,10))
    #plt.rcParams['font.size']='30'
    fig2, axs2=plt.subplots( figsize=(10,10))
    
    full_df=pd.DataFrame()
    for component in ['success', 'reject']:
        
        # count the relevant columns
        inputs=int(df['nr_inputs'])
        new_df=pd.DataFrame()
        
        sumcomponent=0
        for c in df.columns:
            if browser+'_'+component in c:
                new_df[c]=df[c]
                sumcomponent+=int(df[c])
        
        new_df['result']=[component]
        
        full_df=pd.concat([full_df, new_df], ignore_index=True, sort=True)
        
        
    full_df=full_df.set_index('result')
    display(full_df)    
    plot=full_df.transpose().plot( kind='pie',ax=axs,
                         colormap='Greens', y='success',
                         title=browser.capitalize()+" Details\n",
                        legend=False)
    
    plot2=full_df.transpose().plot( kind='pie',ax=axs2,
                         colormap='YlOrRd', y='reject',
                        legend=False)
    
    plt.show()
    display(Markdown("Note that these pie charts **only** show the other browsers results for \
    success or reject results of the specified browser. The different component combinations are \
    discussed above."))

In [None]:
browserDetail('firefox', ls_df_comp)

In [None]:
browserDetail('chromium', ls_df_comp)

## RFC Results

### Tabular View

In [None]:
rfc_df.head(10)

In [None]:
rfc_df.tail(10)

### Coverages Overview Plot

In [None]:
overviewPlot(rfc_df,"RFC Coverages Overview\n" )

### Errors and Exceptions Overview

In [None]:
errorsOverviewPlot(rfc_df, "RFC Exceptions Overview\n")

### Equal and Unequal Results

In [None]:
rfc_b_new_df=equalResultsOverview(rfc_df, "RFC Results Parser Equality\n")

In [None]:
browserEquality(rfc_b_new_df, "RFC Browser Results Equality\n") 

In [None]:
browserComponentComparison(rfc_df_comp, "RFC Browser Component Equality\n")

### Detailed Browser Comparison

Considering the last available run. View the full results of this run [here](./rfc/lastRun/resultoverview.html).

In [None]:
browserDetail('firefox', rfc_df_comp)


In [None]:
browserDetail('chromium', rfc_df_comp)

## Detailed per Parser Comparison of Grammars

In [None]:
from IPython.display import display, Markdown

ffexcov=74.4 # link to reports or extract from reports
ffwptcov=84.4
chrexcov=83.65
chrwptcov=64.29


def compareGrammarResults(parser, df1, newcolumn1, df2, newcolumn2):
    display(Markdown("### "+parser.capitalize()))
    fig, axs=plt.subplots(figsize=(40,20))
    plt.rcParams['font.size']='40'

    new_df=pd.DataFrame()
    new_df[newcolumn1]=df1[parser+"-cov"]
    new_df[newcolumn2]=df2[parser+"-cov"]

    plot=new_df.plot(title=parser.capitalize()+" Coverages\n",ylim=(0,100), ax=axs, style=".", color=colors_comp, ms=ms)
    
    plot.grid(True)
    

    axs.set_ylabel("Coverages", fontsize=40)
    axs.set_xlabel("Run", fontsize=40)
    otherTests=""
    
    if parser=='firefox': #TODO: add coverage reports for wpt and existing tests
        plot.axhline(y=ffexcov, color='black', linestyle=':', lw=4, label='Existing Test Files')
        otherTests+="Existing Test Files Coverage: "+str(ffexcov)+"%"
        plot.axhline(y=ffwptcov, color='y', linestyle=':', lw=4, label='WPT tests')
        otherTests+="\n\nWeb platform tests Coverage: "+str(ffwptcov)+"%"
    if parser=='chromium':
        plot.axhline(y=chrexcov, color='black', linestyle=':', lw=4, label='Existing Test Files')
        otherTests+="Existing Test Files Coverage: "+str(chrexcov)+"%"
        plot.axhline(y=chrwptcov, color='y', linestyle=':', lw=4, label='WPT tests')
        otherTests+="\n\nWeb platform tests Coverage: "+str(chrwptcov)+"%"
    plot.legend(loc='best',markerscale=2.)
    plt.show()
    display(new_df.describe())
    
    display(Markdown(newcolumn1+" Max Coverage: "+str(new_df[newcolumn1].max())+"% \
                     reached in run "+str(new_df[newcolumn1].idxmax())+" [full max run results](./ls/"+parser+"/resultoverview.html)"))
    display(Markdown(newcolumn2+" Max Coverage: "+str(new_df[newcolumn2].max())+"% \
                     reached in run "+str(new_df[newcolumn2].idxmax())+" [full max run results](./rfc/"+parser+"/resultoverview.html)"))
    display(Markdown(otherTests))
    
    return new_df



In [None]:
dfs={}

for parser in parsers:
    dfs[parser]=compareGrammarResults(parser, ls_df, "Living Standard", rfc_df, "RFC")

