In [1]:
import sys
import os
import pandas as pd
from pytablewriter import MarkdownTableWriter
from typing import Dict, List, Any

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

import warnings
warnings.filterwarnings("ignore", category=FutureWarning) 

%config Completer.use_jedi = False



KeyboardInterrupt



In [None]:

sns.set_style("whitegrid", {'axes.grid' : False})
sns.set_context("paper", font_scale=1.5)

plt.rcParams.update({'axes.edgecolor': 'black', 'axes.linewidth': 2, 
                     'axes.grid': False, 'grid.linestyle': '--'})
colors = ['#7a7ad8', '#6fcb9f', '#ffc663', '#ff7863', '#93d0fc']
sns.palplot(colors)
sns.set_palette(sns.color_palette(colors), 8, .75)
sub_figure_title = {"fontweight": 700, 'fontname':'Arial', 'fontsize': 18}
#plt.tight_layout()


In [None]:
#ALGORITHM = ["semantic-zest", "syntactic-zest", "semantic-noguidance", "syntactic-noguidance"]
ALGORITHM = ["semantic-zest", "syntactic-zest", "semantic-noguidance", "syntactic-noguidance","afl"]

RANGE=20 #exclusive range in python


In [None]:
#path = "/home/alena/source/growe/exps8"

folder = "expfinal3"
base_path = f"c:\\Users\\Alena\\source\\repos\\growe\\{folder}\\"


In [None]:

def process_cov_data(path: str) -> List[str]:
    with open(path) as f:
        return f.readlines()

def add_rolling_mean( data : pd.DataFrame, rolling : int = 10) -> pd.DataFrame:
    data['valid_inputs_percent']= data['valid_inputs'] / data['total_inputs']

    # rolling mean to valid_covered_probes
    data[f'rolling_{rolling}_valid_covered_probes'] = data['valid_covered_probes'].rolling(rolling).mean()
    
    # rolling mean to all_covered_probes
    data[f'rolling_{rolling}_all_covered_probes'] = data['all_covered_probes'].rolling(rolling).mean()
    
    # rolling mean to total_inputs
    data[f'rolling_{rolling}_total_inputs'] = data['total_inputs'].rolling(rolling).mean()

    data['rolling_algorithm'] = data['algorithm'].astype(str) + f"_rollingmean_{rolling}"
    
    return data

In [None]:
def read_plot_data(path: str, algorithm: str, idx: int) -> pd.DataFrame:
    ps=os.path.join(path, 'plot_data')
    #print(ps)
    time_axis = "# unix_time"
    if algorithm == "afl":
        data = pd.read_csv(ps, sep=",", skipinitialspace=True,
                       converters={"valid_cov": p2f, "map_size": p2f})
    else:
        data = pd.read_csv(ps, sep=",", skipinitialspace=True)

    if data.empty: return None

    data[time_axis] -= data[time_axis][0]
    data['total_inputs'] = data['valid_inputs'] + data['invalid_inputs']
    #data['total_inputs'] -= data["total_inputs"][0]
    data['valid_inputs_percent']= (data['valid_inputs'] / data['total_inputs'])

    x_axis = time_axis
    algo_data = data.copy().drop_duplicates(
        keep='first', subset=[x_axis])
    
    algo_data['algorithm'] = [algorithm] * algo_data.shape[0]
    algo_data[['generator','guidance']] = algo_data['algorithm'].str.split('-', expand=True)

    algo_data['run'] = [idx] * algo_data.shape[0]
  
    return algo_data


## READ ORIGINAL DATA

In [None]:
# READ ORIGIANL DATA

original_plot_data=[] # array of original dataframes

for algorithm in ALGORITHM:
    original_data_per_algo = []

    for idx in range(0, RANGE):
        path = os.path.join(base_path, f"nextflow-{algorithm}-{idx}", "errorDir")
        if algorithm == "afl":
            path = os.path.join(base_path, f"nextflow-{algorithm}-{idx}","fuzz-results")
        if not os.path.exists(path):
            continue
        #print(f"processing: {path}")

        # plot_data from jqf afl run differs from other 
        if not algorithm == "afl" and os.path.exists(os.path.join(path,"plot_data")): 
            original_data = read_plot_data(path,algorithm,idx)
            if original_data is None:
                continue

            original_data_per_algo.append(original_data)
        
    original_plot_data.extend([d for d in original_data_per_algo])



In [None]:

def read_exception_log(path: str, algorithm: str) -> pd.DataFrame:
    ps=os.path.join(path, 'exception_log.csv')
    #print(ps)
    columns=["exception","unique","count","file","empty"]
    if algorithm != "afl":
        data = pd.read_csv(ps, sep=";", header=None,names=columns)

    if data.empty: return None
    
    data['algorithm'] = [algorithm] * data.shape[0]
  
    return data


In [None]:
# READ Exceptions

exceptions_data = []
exceptions_per_algo = []

for algorithm in ALGORITHM:
    
    for idx in range(0, RANGE):
   
        path = os.path.join(base_path, f"nextflow-{algorithm}-{idx}", "errorDir")
        if not os.path.exists(path):
            continue
        #print(f"processing: {path}")
        if not algorithm == "afl" and os.path.exists(os.path.join(path,"exception_log.csv")): 
            exc = read_exception_log(path,algorithm)
            if exc is None:
                continue
            exc['run']= [idx] * exc.shape[0]
            exceptions_per_algo.append(exc)
            
exceptions_data = pd.concat(exceptions_per_algo)


In [None]:
a = exceptions_data[ exceptions_data['exception'].str.contains('Script')]

pd.set_option('display.max_rows', 30)
b=a[ a['algorithm'] == 'semantic-zest']  
b

## interpolated data

In [None]:
# interpolated data

reindexsteps =5  
interpolate = True

time_based_plot_data = [] #dataframe with interpolated runs 
count_based_plot_data = [] #dataframe with interpolated runs

first_idx=0
time_range=3600
for df in original_plot_data:
    algorithm = df['algorithm'][0]
    x_axis = "# unix_time"
    time_based_data = df.copy().drop_duplicates(
        keep='first', subset=[x_axis])
    if interpolate:
        time_based_data = time_based_data.set_index(x_axis).reindex(
           range(1, time_range, reindexsteps)).interpolate().reset_index()
        #resetting algo because it was overwritten by interpolation
        time_based_data['algorithm'] = [algorithm] * time_based_data.shape[0]
        time_based_data[['generator','guidance']] = time_based_data['algorithm'].str.split('-', expand=True)

    time_based_data['valid_inputs_percent']= (time_based_data['valid_inputs'] / time_based_data['total_inputs'])

    time_based_plot_data.append( time_based_data)
    
    x_axis = "total_inputs"
    count_based_data = df.copy().drop_duplicates(
        keep='first', subset=[x_axis])
    if interpolate:
        count_based_data = count_based_data.set_index(x_axis).reindex(
           range(1, count_based_data[x_axis].max(), reindexsteps*8)).interpolate().reset_index()
        #resetting algo because it was overwritten by interpolation
        count_based_data['algorithm'] = [algorithm] * count_based_data.shape[0]
        count_based_data[['generator','guidance']] = count_based_data['algorithm'].str.split('-', expand=True)
        
    count_based_data['valid_inputs_percent']= (count_based_data['valid_inputs'] / count_based_data['total_inputs'])
    
    count_based_plot_data.append(count_based_data)

    
    first_idx = first_idx +1
#
time_based_plot_data = pd.concat(time_based_plot_data, ignore_index=True, sort=False)
count_based_plot_data = pd.concat(count_based_plot_data, ignore_index=True, sort=False)
 

In [None]:
time_based_plot_data


## rolling mean data

In [None]:
# rolling mean data

ROLLING_MEAN=50

rolling_time_based_plot_data = [] #dataframe with rolling mean over all runs
rolling_count_based_plot_data = [] #dataframe with rolling mean over all runs

complete_data_frame = pd.concat(original_plot_data, ignore_index=True, sort=False)
for algorithm in ALGORITHM:
    data_per_algo = complete_data_frame[complete_data_frame['algorithm'] == algorithm]

    if data_per_algo is None or len(data_per_algo) == 0: continue

    rolling_time_based_data_per_algo = data_per_algo.sort_values(by=['# unix_time'])
    rolling_time_based_data_per_algo = add_rolling_mean(rolling_time_based_data_per_algo, ROLLING_MEAN)
    rolling_time_based_plot_data.append(rolling_time_based_data_per_algo)

    rolling_count_based_data_per_algo = data_per_algo.sort_values(by=['total_inputs'])
    rolling_count_based_data_per_algo = add_rolling_mean(rolling_count_based_data_per_algo, ROLLING_MEAN)
    rolling_count_based_plot_data.append(rolling_count_based_data_per_algo)



 
rolling_time_based_plot_data = pd.concat(rolling_time_based_plot_data, ignore_index=True, sort=True)
rolling_count_based_plot_data = pd.concat(rolling_count_based_plot_data, ignore_index=True, sort=True)



In [None]:
complete_data_frame


In [None]:
def generate_plot_data_base(data: pd.DataFrame, rolling_data:pd.DataFrame, x_axis: str, y_axis: str, path: str, errorbarname: str = 'se'):
    print(x_axis, y_axis)
    fig, ax1 = plt.subplots(figsize=(8,6))
    #ax2 = ax1.twinx()

    if errorbarname == 'se':
        lineplot1 = sns.lineplot(x=x_axis, y=y_axis, hue='algorithm', errorbar="se", hue_order=sorted(data['algorithm'].unique()), data=data, ax=ax1)
   
    elif errorbarname == 'sd':
        lineplot1 = sns.lineplot(x=x_axis, y=y_axis, hue='algorithm', errorbar=("sd",95), hue_order=sorted(data['algorithm'].unique()), data=data, ax=ax1)    
    '''
    y2=f"rolling_{ROLLING_MEAN}_{y_axis}"
    if not y2 in rolling_data.columns:
        y2 = y_axis
    
    lineplot2 = sns.lineplot(x=x_axis, 
                             y=y2, 
                             hue='rolling_algorithm', 
                             errorbar=None, 
                             hue_order=sorted(rolling_data['rolling_algorithm'].unique()), 
                             data=rolling_data, 
                             ax=ax1, 
                             palette="Set2",
                             linestyle="dotted")
    legend2 = lineplot2.legend()

    leg_lines = legend2.get_lines()
    for line in leg_lines[4:]:
        line.set_linestyle(":")
    
    '''
        
    ax1.set_xlabel(x_axis)
    ax1.set_ylabel(y_axis)
    
    lineplot1.set(title=path.split("/")[-1][:-4].replace("_"," "))
   
    # ax2.set_ylabel(f"rolling_{ROLLING_MEAN}_{y_axis}")

    fig = ax1.get_figure()
    fig.show()
    fig.savefig(path)


In [None]:
errorbrar = 'se'
pdf_dir=f'pdfs_{folder}/'

if not os.path.exists(os.path.join(".",pdf_dir)):
    os.mkdir(os.path.join(".",pdf_dir))

    

In [None]:
generate_plot_data_base(time_based_plot_data, rolling_time_based_plot_data, "# unix_time", "valid_covered_probes", f"{pdf_dir}valid_cov_over_time.pdf", 'se')


In [None]:
fig, ax1 = plt.subplots(1,2,figsize=(16,5))
x='# unix_time'
lineplot1 = sns.lineplot(x=x, y='total_inputs', hue='algorithm', errorbar="se", hue_order=sorted(time_based_plot_data['algorithm'].unique()), data=time_based_plot_data, ax=ax1[0])
lineplot1.set_title("Inputs over time")
lineplot1.legend(loc='upper left')
lineplot2 = sns.lineplot(x=x, 
                         y='execs_per_sec', 
                         hue='algorithm', 
                         errorbar="se", 
                         hue_order=sorted(time_based_plot_data['algorithm'].unique()), 
                         data=time_based_plot_data, 
                         ax=ax1[1])
lineplot2.set_title("Execution speed over time")
lineplot2.legend(loc='upper right')

path=f"{pdf_dir}/inputs_over_time.pdf"


fig.savefig(path)


In [None]:
fig, ax1 = plt.subplots(1,2,figsize=(16,5))
x='# unix_time'
lineplot1 = sns.lineplot(x=x, y='valid_covered_probes', 
                         hue='algorithm',
                         errorbar="se",
                         hue_order=sorted(time_based_plot_data['algorithm'].unique()), 
                         data=time_based_plot_data, 
                         ax=ax1[0])
lineplot1.set_title("valid covered branches over time")
lineplot1.legend(loc='lower right')
x2='total_inputs'
lineplot2 = sns.lineplot(x=x2, 
                         y='valid_covered_probes', 
                         hue='algorithm', 
                         hue_order=sorted(count_based_plot_data['algorithm'].unique()), 
                         errorbar="se",
                         data=count_based_plot_data, 
                         ax=ax1[1])
lineplot2.set_title("valid covered branches over inputs")
lineplot2.legend(loc='lower right')

path=f"{pdf_dir}/covered_probes_time_and_inputs.pdf"


fig.savefig(path)


In [None]:
#generate_plot_data_base(time_based_plot_data, rolling_time_based_plot_data, "# unix_time", "valid_inputs" , f"{pdf_dir}valid_inputs_over_time.pdf", errorbrar)

#generate_plot_data_base(time_based_plot_data, rolling_time_based_plot_data, "# unix_time", "invalid_inputs" , f"{pdf_dir}invalid_inputs_over_time.pdf", errorbrar)
#generate_plot_data_base(time_based_plot_data, rolling_time_based_plot_data, "# unix_time", "all_covered_probes", f"{pdf_dir}all_cov_over_time.pdf", errorbrar)

#generate_plot_data_base(time_based_plot_data, rolling_time_based_plot_data, "# unix_time", "total_inputs" , f"{pdf_dir}inputs_over_time.pdf", errorbrar)
#generate_plot_data_base(time_based_plot_data, rolling_time_based_plot_data, "# unix_time", "valid_inputs", f"{pdf_dir}all_cov_over_time.pdf", errorbrar)

#generate_plot_data_base(time_based_plot_data, rolling_time_based_plot_data, "# unix_time", "unique_crashes", f"{pdf_dir}crashes_over_time.pdf", errorbrar)


In [None]:
#sns.pairplot(rolling_time_based_plot_data)

In [None]:
l ,axes  = plt.subplots(figsize=(8,6))
plot= sns.lineplot(time_based_plot_data, 
                 y='valid_inputs_percent',
                 x='# unix_time',
                 hue='algorithm', 
                 errorbar='se',
                 hue_order=sorted(time_based_plot_data['algorithm'].unique()),
                 ax= axes)
'''
sns.lineplot(rolling_time_based_plot_data, 
                 y='valid_inputs_percent',
                 x='# unix_time',
                 hue='algorithm', 
                   errorbar='se',
                 hue_order=ALGORITHM,
                 ax= axes[1])
                 
'''
plot.get_figure().savefig(f"{pdf_dir}/valid_inputs_percent_over_time.pdf")


In [None]:
exceptions_data[10:100]


In [None]:
for algo in ALGORITHM:
    algo_data = exceptions_data[exceptions_data['algorithm'] == algo]
    #print( algo)
    #print(algo_data['exception'].value_counts())
exceptions_count = exceptions_data.groupby(['exception','algorithm'])['algorithm'].count().to_frame()
print(exceptions_count)

exceptions_count.T

In [None]:
unique=exceptions_data[exceptions_data['unique'] == True]

plot=sns.displot(exceptions_data, x='exception',hue='algorithm', height=4, aspect=1.5, multiple="dodge")
plot.tight_layout()

for ax in plot.axes.flat:
    for label in ax.get_xticklabels():
        label.set_rotation(40)
        label.set_ha('right')
    for p in ax.patches:
        _x = p.get_x() + p.get_width() / 2
        _y = p.get_y() + p.get_height() + 10
        value = str(int(p.get_height()))
        ax.text(_x, _y, value, ha="center")    


plot.savefig(f'{pdf_dir}/exceptions.pdf')

In [None]:
plot.savefig((os.path.join( pdf_dir, "exceptions_logged.pdf" )))

In [None]:
for algo in ALGORITHM:
    algodata = complete_data_frame[complete_data_frame['algorithm']==algo]
    if not algodata.empty: 
        val1 = 0
        val2 = 0
        vt = 0
        val3 = 0
        val4 = 0
        for i in range(RANGE):
            d = algodata[algodata["run"]==i]
            m1 = d['valid_inputs'].max()
            m2 =  d['invalid_inputs'].max()
            mt = d['total_inputs'].max()
            val1+=m1
            val2+=m2
            vt+=mt
            m3=m1/mt
            m4=m2/mt
            v=d['valid_inputs_percent'].max()
            #print(f"algo {algo} run {i} valid:{m1} ({m3}%)  - invalid: {m2} ({m4}%) {v}")
        val3=(val1/vt)*100
        val4=(val2/vt)*100
        print(f" {algo} & {vt} & {val1} ({val3:.2f}\%) & {val2} ({val4:.2f}\%) \\\\ \\ addlinespace")
        print("")

In [None]:
#unique = unique[unique['algorithm'].isin(['semantic-noguidance','semantic-zest'])]
l, axes = plt.subplots(2,2, figsize=(10,10),sharey=False)
i=0
cols = sns.color_palette("husl",20)

algo = 'semantic-zest'
unique=rolling_time_based_plot_data[rolling_time_based_plot_data['algorithm'] == algo]
if not unique.empty: 
    plot = sns.lineplot(data=unique, 
              x='# unix_time',
              y='unique_crashes' ,
              hue='run',
              hue_order=sorted(unique['run'].unique()),
              ax=axes[0,0],
                  palette=cols)
    plot.get_legend().set_visible(False)
    plot.set_title(algo)

algo = 'semantic-noguidance'
unique=rolling_time_based_plot_data[rolling_time_based_plot_data['algorithm'] == algo]
if not unique.empty: 
    plot = sns.lineplot(data=unique, 
              x='# unix_time',
              y='unique_crashes' ,
              hue='run',
              hue_order=sorted(unique['run'].unique()),
              ax=axes[0,1],
                  palette=cols)
    plot.get_legend().set_visible(False)
    plot.set_title(algo)


algo = 'syntactic-zest'
unique=rolling_time_based_plot_data[rolling_time_based_plot_data['algorithm'] == algo]
if not unique.empty: 
    plot = sns.lineplot(data=unique, 
              x='# unix_time',
              y='unique_crashes' ,
              hue='run',
              hue_order=sorted(unique['run'].unique()),
              ax=axes[1,0],
                  palette=cols)
    plot.get_legend().set_visible(False)
    plot.set_title(algo)



algo = 'syntactic-noguidance'
unique=rolling_time_based_plot_data[rolling_time_based_plot_data['algorithm'] == algo]
if not unique.empty: 
    plot = sns.lineplot(data=unique, 
              x='# unix_time',
              y='unique_crashes' ,
              hue='run',
              hue_order=sorted(unique['run'].unique()),
              ax=axes[1,1],
                  palette=cols)
    plot.get_legend().set_visible(False)
    plot.set_title(algo)



In [None]:
l.savefig(os.path.join( pdf_dir, "unique_crashes_per_run.pdf" ))

In [None]:
#unique = unique[unique['algorithm'].isin(['semantic-noguidance','semantic-zest'])]
l, axes = plt.subplots(1,4, figsize=(25,5),sharey=True)
i=0
cols = sns.color_palette("husl")

for algo in ALGORITHM:

    unique=rolling_time_based_plot_data[rolling_time_based_plot_data['algorithm'] == algo]
    if not unique.empty: 
        plot = sns.lineplot(data=unique, 
                  x='# unix_time',
                  y='valid_covered_probes' ,
                  hue='run',
                  hue_order=sorted(unique['run'].unique()),
                  ax=axes[i],
                  palette=cols)
        plot.set_title(algo)
        plot.get_legend().set_visible(False)
    i+=1

for ax in l.axes:
    ax.tick_params(axis='y', labelleft=True)
    
l.savefig(os.path.join( pdf_dir, "cov_per_run.pdf" ))


In [None]:
#unique = unique[unique['algorithm'].isin(['semantic-noguidance','semantic-zest'])]
l, axes = plt.subplots(1,4, figsize=(25,5),sharey=True)
i=0
cols = sns.color_palette("husl")

for algo in ALGORITHM:

    unique=rolling_count_based_plot_data[rolling_count_based_plot_data['algorithm'] == algo]
    if not unique.empty: 
        plot = sns.lineplot(data=unique, 
                  x='total_inputs',
                  y='valid_covered_probes' ,
                  hue='run',
                  hue_order=sorted(unique['run'].unique()),
                  ax=axes[i],
                  palette=cols)
        plot.set_title(algo)
        plot.get_legend().set_visible(False)
    i+=1

for ax in l.axes:
    ax.tick_params(axis='y', labelleft=True)
    
l.savefig(os.path.join( pdf_dir, "cov_per_run_over_inputs.pdf" ))


In [None]:
for algo in ALGORITHM:
    df = rolling_time_based_plot_data[rolling_time_based_plot_data['algorithm']==algo]
    print(df.groupby(['run'])['valid_covered_probes'].max())
    des1 =df['valid_covered_probes'].describe()
    des2 = rolling_count_based_plot_data[rolling_time_based_plot_data['algorithm']==algo]['valid_inputs'].describe()
    print(f"-----{algo}----probes")
    print(des1)
    print(f"-----{algo}----inputs")
    print(des2)



In [None]:
import scipy.stats as stats
# perform two-sided test. You can use 'greater' or 'less' for one-sided test
df1 = rolling_time_based_plot_data[rolling_time_based_plot_data['generator'].str.match('semantic')].groupby(['run','generator'])['valid_inputs_percent'].last()#
#df1 = time_based_plot_data[time_based_plot_data['algorithm'].str.match('semantic')]['valid_inputs_percent']
df2 = rolling_time_based_plot_data[rolling_time_based_plot_data['generator'].str.match('syntactic')].groupby(['run','generator'])['valid_inputs_percent'].last()
#df2 = time_based_plot_data[time_based_plot_data['algorithm'].str.match('syntactic')]['valid_inputs_percent']
print(stats.mannwhitneyu(x=df1, y=df2, alternative = 'two-sided'))
print(stats.mannwhitneyu(x=df1, y=df2, alternative = 'greater'))
print(stats.mannwhitneyu(x=df1, y=df2, alternative = 'less'))
print(stats.mannwhitneyu(x=df1, y=df2, method = 'exact'))
print(stats.mannwhitneyu(x=df1, y=df2))

print(df1.describe())
print(df2.describe())



In [None]:
rolling_time_based_plot_data[rolling_time_based_plot_data['algorithm'].str.match('semantic')].groupby(['run','algorithm'])['valid_inputs_percent'].max()

In [None]:
# perform two-sided test. You can use 'greater' or 'less' for one-sided test

d = complete_data_frame

df1 = d[d['algorithm'].str.match('semantic-noguidance')].groupby(['run'])['valid_inputs_percent'].last()
df2 = d[d['algorithm'].str.match('syntactic-noguidance')].groupby(['run'])['valid_inputs_percent'].last()
print(stats.mannwhitneyu(x=df1, y=df2))
print(df1.describe())
print(df2.describe())

df1 = d[d['algorithm'].str.match('semantic-noguidance')].groupby(['run'])['valid_covered_probes'].last()
df2 = d[d['algorithm'].str.match('syntactic-noguidance')].groupby(['run'])['valid_covered_probes'].last()
print(stats.mannwhitneyu(x=df1, y=df2))
print(df1.describe())
print(df2.describe())

In [None]:
d = complete_data_frame

df1 = d[d['algorithm'].str.match('semantic-zest')].groupby(['run'])['valid_inputs_percent'].last()
df2 = d[d['algorithm'].str.match('semantic-noguidance')].groupby(['run'])['valid_inputs_percent'].last()
print(stats.mannwhitneyu(x=df1, y=df2))
print(df1.describe())
print(df2.describe())


df1 = d[d['algorithm'].str.match('syntactic-zest')].groupby(['run'])['valid_inputs_percent'].last()
df2 = d[d['algorithm'].str.match('syntactic-noguidance')].groupby(['run'])['valid_inputs_percent'].last()
print(stats.mannwhitneyu(x=df1, y=df2))
print(df1.describe())
print(df2.describe())


In [None]:

d = complete_data_frame
df1 = d[d['algorithm'].str.match('semantic-zest')].groupby(['run'])['valid_covered_probes'].last()
df2 = d[d['algorithm'].str.match('semantic-noguidance')].groupby(['run'])['valid_covered_probes'].last()
print(stats.mannwhitneyu(x=df1, y=df2))
print(df1.describe())
print(df2.describe())

df1 = d[d['algorithm'].str.match('syntactic-zest')].groupby(['run'])['valid_covered_probes'].last()
df2 = d[d['algorithm'].str.match('syntactic-noguidance')].groupby(['run'])['valid_covered_probes'].last()
print(stats.mannwhitneyu(x=df1, y=df2))
print(df1.describe())
print(df2.describe())

In [None]:
d=complete_data_frame
a = d[d['algorithm'].str.match('semantic-noguidance')]
b = d[d['algorithm'].str.match('syntactic-noguidance')]

c = d[d['algorithm'].str.match('semantic-zest')]
d = d[d['algorithm'].str.match('syntactic-zest')]

print(a['valid_inputs_percent'].std()) 
print(a.groupby(['run'])['valid_inputs_percent'].last().describe()) 


print(b['valid_inputs_percent'].std())
print(b.groupby(['run'])['valid_inputs_percent'].last().describe())






In [None]:
d= complete_data_frame
d[d['algorithm'].str.match('syntactic-noguidance')]['valid_inputs_percent'].std()


In [None]:
cv = lambda x: np.std(x, ddof=1) / np.mean(x) * 100

print(cv(c.groupby(['run'])['total_inputs'].last()) )
print(cv(d.groupby(['run'])['total_inputs'].last()) )
print(cv(a.groupby(['run'])['total_inputs'].last()) )
print(cv(b.groupby(['run'])['total_inputs'].last()) )
