In [1]:
%matplotlib notebook
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
FILE_NAME = '../tests/data/stats.csv'

### Create a DataFrame from the stats csv file

In [3]:
df = pd.read_csv(FILE_NAME)

### Peeks into the data frame

In [4]:
df.info

<bound method DataFrame.info of       test_case  branching_factor              method_name      time
0          text                 2  frequency_table_to_heap  0.000249
1          text                 2             heap_to_tree  0.000763
2          text                 2                 _heapify  0.000176
3          text                 2  frequency_table_to_heap  0.000200
4          text                 2             heap_to_tree  0.000661
5          text                 2                 _heapify  0.000108
6          text                 2  frequency_table_to_heap  0.000237
7          text                 2             heap_to_tree  0.000819
8          text                 2                 _heapify  0.000120
9          text                 2  frequency_table_to_heap  0.000107
10         text                 2             heap_to_tree  0.000481
11         text                 2                 _heapify  0.000076
12         text                 2  frequency_table_to_heap  0.000109
13

### Retrieve all test cases

In [5]:
test_cases = df['test_case'].unique()
print(test_cases)

['text' 'image']


## Plot Stats per method, per test case

In [6]:
def plot_test_case_stats(df: pd.DataFrame, test_case: str):
    df_test_case = df[df['test_case']==test_case]
    method_names = df_test_case['method_name'].unique()
    
    for method_name in method_names:
        plot_method_stats(df_test_case, method_name)

In [7]:
def plot_method_stats(df: pd.DataFrame, method_name: str, time_field: str = 'total_time'):
    df_method: pd.DataFrame = df[df['method_name']==method_name]
    
    branching_factors = df_method['branching_factor'].unique()
    data = [df_method[df_method['branching_factor']==b][time_field] for b in branching_factors]
    
    fig, axe = plt.subplots()

    axe.set_title(method_name, fontsize=16)
    axe.set_xlabel('branching factor')
    axe.set_ylabel('running time')

    plt.boxplot(data, labels=branching_factors, showfliers=False)
    plt.show()

### Text files

In [8]:
plot_test_case_stats(df, 'text')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Bitmaps

In [9]:
plot_test_case_stats(df, 'image')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>