In [1]:
%matplotlib notebook
import pandas as pd
import matplotlib.pyplot as plt
import math

In [2]:
HUFFMAN_STATS = '../tests/data/stats_huffman.csv'
HEAP_STATS = '../tests/data/stats_heap.csv'
HEAP_MIXED_STATS = '../tests/data/stats_heap_mixed.csv'
HEAPIFY_STATS = '../tests/data/stats_heapify.csv'

## Plot Stats per method, per test case

In [3]:
def plot_test_case_stats(df: pd.DataFrame, test_case: str, time_field: str = 'cumulative_time'):
    df_test_case = df[df['test_case']==test_case]
    method_names = df_test_case['method_name'].unique()
    
    for method_name in method_names:
        plot_method_stats(df_test_case, method_name, time_field)

In [4]:
def plot_method_stats(df: pd.DataFrame, method_name: str, time_field: str):
    df_method: pd.DataFrame = df[df['method_name']==method_name]
    
    branching_factors = df_method['branching_factor'].unique()
    data = [df_method[df_method['branching_factor']==b][time_field] for b in branching_factors]
    
    fig, axe = plt.subplots()

    axe.set_title(method_name, fontsize=16)
    axe.set_xlabel('branching factor')
    axe.set_ylabel('running time')

    plt.boxplot(x=data, labels=branching_factors, showfliers=False)
    plt.show()

In [5]:
def plot_test_case_means(df: pd.DataFrame, test_case: str, time_field: str = 'cumulative_time'):
    df_test_case = df[df['test_case']==test_case]
    method_names = df_test_case['method_name'].unique()
    
    for method_name in method_names:
        plot_method_mean(df_test_case, method_name, time_field)

In [6]:
def plot_method_mean(df: pd.DataFrame, method_name: str, time_field: str):
    df_method: pd.DataFrame = df[df['method_name']==method_name]
    
    branching_factors = df_method['branching_factor'].unique()
    data = df_method.groupby('branching_factor').mean().reset_index()
    
    fig, axe = plt.subplots()

    axe.set_title(method_name, fontsize=16)
    axe.set_xlabel('branching factor')
    axe.set_ylabel('running time')

    plt.plot(branching_factors, data[time_field])
    plt.show()

### Create a DataFrame from the stats csv file

In [7]:
df = pd.read_csv(HUFFMAN_STATS)

### Peeks into the data frame

In [8]:
df.info

<bound method DataFrame.info of Empty DataFrame
Columns: [test_case, branching_factor, method_name, total_time, cumulative_time, per_call_time]
Index: []>

### Retrieve all test cases

In [9]:
test_cases = df['test_case'].unique()
print(test_cases)

['text' 'image']


### Text files

In [10]:
plot_test_case_stats(df, 'text', 'per_call_time')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [11]:
plot_test_case_means(df, 'text', 'per_call_time')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [12]:
plot_test_case_stats(df, 'text', 'cumulative_time')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

In [13]:
plot_test_case_means(df, 'text')

  import sys


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [14]:
plot_test_case_means(df, 'text', 'total_time')

  import sys


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Bitmaps

In [15]:
plot_test_case_stats(df, 'image', 'per_call_time')

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

In [16]:
plot_test_case_stats(df, 'image', 'cumulative_time')

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

## Heap

In [17]:
dfh = pd.read_csv(HEAP_STATS)

In [18]:
plot_test_case_stats(dfh, 'heap')

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

  import sys


<IPython.core.display.Javascript object>

In [19]:
plot_test_case_means(dfh, 'heap')

  import sys


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [20]:
dfh_mix = pd.read_csv(HEAP_MIXED_STATS)

In [21]:
plot_test_case_means(dfh_mix, 'heap')

  import sys


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [22]:
dfhy = pd.read_csv(HEAPIFY_STATS)

In [23]:
plot_test_case_means(dfhy, 'heap', 'per_call_time')

  import sys


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Heapify Number of Swaps

In [24]:
def heapify_swaps(n: int, D: int) -> float:
    return n / D * sum([math.ceil(h / (D**h)) for h in range(int(math.log(n, D)))])

In [25]:
heapify_swaps_data = [ ]

In [26]:
xs = range(2, 23)
fig, axe = plt.subplots()

axe.set_title('heapify swaps', fontsize=16)
axe.set_xlabel('branching factor')
axe.set_ylabel('swaps')
axe.set_yscale('log')

for n in [100, 1000, 10000, 100000]:
    plt.plot(xs, [heapify_swaps(n, D) for D in xs])
axe.legend()

plt.show(fig)

  


<IPython.core.display.Javascript object>

No handles with labels found to put in legend.
  del sys.path[0]
