# Levenshtein Compilation Perf Comparison

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
sns.set(style="whitegrid")

file_0 = 'perf_stats0.csv'
file_1 = 'perf_stats1.csv'

DATA_DIR = 'evaluations'

# Time
time_stats0_long1_long2 = 'time_stats0_long1.txt_long2.txt.csv'
time_stats1_long1_long2 = 'time_stats1_long1.txt_long2.txt.csv'

time_stats0_long1_long1 = 'time_stats0_long1.txt_long1.txt.csv'
time_stats1_long1_long1 = 'time_stats1_long1.txt_long1.txt.csv'

time_stats0_long1_short = 'time_stats0_long1.txt_short.txt.csv'
time_stats1_long1_short = 'time_stats1_long1.txt_short.txt.csv'

time_stats_0_list = [time_stats0_long1_long1, time_stats0_long1_long2, time_stats0_long1_short]
time_stats_1_list = [time_stats1_long1_long1, time_stats1_long1_long2, time_stats1_long1_short]


# Perf
perf_stats0_long1_long2 = 'perf_stats0_long1.txt_long2.txt.csv'
perf_stats1_long1_long2 = 'perf_stats1_long1.txt_long2.txt.csv'

perf_stats0_long1_long1 = 'perf_stats0_long1.txt_long1.txt.csv'
perf_stats1_long1_long1 = 'perf_stats1_long1.txt_long1.txt.csv'

perf_stats0_long1_short = 'perf_stats0_long1.txt_short.txt.csv'
perf_stats1_long1_short = 'perf_stats1_long1.txt_short.txt.csv'

perf_stats_0_list = [perf_stats0_long1_long1, perf_stats0_long1_long2, perf_stats0_long1_short]
perf_stats_1_list = [perf_stats1_long1_long1, perf_stats1_long1_long2, perf_stats1_long1_short]

# Time Comparison

In [None]:
# file_title_mapping = {
#     time_stats0_long1_long1: 'O0 Long1 Long1',
#     time_stats1_long1_long1: 'O1 Long1 Long1',
#     time_stats0_long1_long2: 'O0 Long1 Long2',
#     time_stats1_long1_long2: 'O1 Long1 Long2',
#     time_stats0_long1_short: 'O0 Long1 Short',
#     time_stats1_long1_short: 'O1 Long1 Short'
# }

file_title_mapping = {
    time_stats0_long1_long1: 'Long-Long (Identical)',
    time_stats1_long1_long1: 'Long-Long (Identical)',
    time_stats0_long1_long2: 'Long-Long (Different)',
    time_stats1_long1_long2: 'Long-Long (Different)',
    time_stats0_long1_short: 'Long-Short (Identical Prefix)',
    time_stats1_long1_short: 'Long-Short (Identical Prefix)'
}

for time0, time1 in zip(time_stats_0_list, time_stats_1_list):
    data0 = pd.read_csv(os.path.join(DATA_DIR, time0))
    data1 = pd.read_csv(os.path.join(DATA_DIR, time1))

    print(f"Time: {file_title_mapping[time0]}")
    print(f"O1 Time: {data1['CPU_Time_Used'].mean()}")
    print(f"O0 Time: {data0['CPU_Time_Used'].mean()}")

for time0, time1 in zip(time_stats_0_list, time_stats_1_list):
    data0 = pd.read_csv(os.path.join(DATA_DIR, time0))
    data1 = pd.read_csv(os.path.join(DATA_DIR, time1))
    # Plot Time
    plt.figure(figsize=(12, 12))
    sns.histplot(data0['CPU_Time_Used'], bins=100, kde=True, color='blue', label='-O0', alpha=0.5)
    sns.histplot(data1['CPU_Time_Used'], bins=100, kde=True, color='green', label='-O1', alpha=0.5)
    plt.title(f'Distribution of Time: {file_title_mapping[time0]}')
    plt.xlabel('Time')
    plt.ylabel('Frequency')
    plt.legend()
    plt.show()

# Perf Comparison

In [None]:
file_title_mapping = {
    perf_stats0_long1_long1: 'Long-Long (Identical)',
    perf_stats1_long1_long1: 'Long-Long (Identical)',
    perf_stats0_long1_long2: 'Long-Long (Different)',
    perf_stats1_long1_long2: 'Long-Long (Different)',
    perf_stats0_long1_short: 'Long-Short (Identical Prefix)',
    perf_stats1_long1_short: 'Long-Short (Identical Prefix)'
}
for perf0, perf1 in zip(perf_stats_0_list, perf_stats_1_list):
    data0 = pd.read_csv(os.path.join(DATA_DIR, perf0))
    data1 = pd.read_csv(os.path.join(DATA_DIR, perf1))

    # Perf Evaluations
    print(f"Perf: {file_title_mapping[perf0]}")
    data0['Branch_Misses_%'] = data0['Branch_Misses'] / data0['Branches'] * 100
    data1['Branch_Misses_%'] = data1['Branch_Misses'] / data1['Branches'] * 100
    print(f"O0 Branch Misses: {data0['Branch_Misses'].mean()}")
    print(f"O1 Branch Misses: {data1['Branch_Misses'].mean()}")

    print(f"O0 Branches: {data0['Branches'].mean()}")
    print(f"O1 Branches: {data1['Branches'].mean()}")

    print(f"O0 Branch Misses %: {data0['Branch_Misses_%'].mean()}")
    print(f"O1 Branch Misses %: {data1['Branch_Misses_%'].mean()}")

    print(f"O0 Instructions: {data0['Instructions'].mean()}")
    print(f"O1 Instructions: {data1['Instructions'].mean()}")
    print('\n')


for perf0, perf1 in zip(perf_stats_0_list, perf_stats_1_list):
    data0 = pd.read_csv(os.path.join(DATA_DIR, perf0))
    data1 = pd.read_csv(os.path.join(DATA_DIR, perf1))

    # Perf Evaluations
    data0['Branch_Misses_%'] = data0['Branch_Misses'] / data0['Branches'] * 100
    data1['Branch_Misses_%'] = data1['Branch_Misses'] / data1['Branches'] * 100

    # Plot Branches
    plt.figure(figsize=(12, 12))
    sns.histplot(data0['Branches'], bins=100, kde=True, color='blue', label='-O0', alpha=0.5)
    sns.histplot(data1['Branches'], bins=100, kde=True, color='green', label='-O1', alpha=0.5)
    plt.title(f'Distribution of Branches: {file_title_mapping[perf0]}')
    plt.xlabel('Branches')
    plt.ylabel('Frequency')
    plt.legend()
    # plt.xlim(3e5, 4e5)
    plt.show()

for perf0, perf1 in zip(perf_stats_0_list, perf_stats_1_list):
    data0 = pd.read_csv(os.path.join(DATA_DIR, perf0))
    data1 = pd.read_csv(os.path.join(DATA_DIR, perf1))

    # Perf Evaluations
    data0['Branch_Misses_%'] = data0['Branch_Misses'] / data0['Branches'] * 100
    data1['Branch_Misses_%'] = data1['Branch_Misses'] / data1['Branches'] * 100

    # Plot Branch Misses
    plt.figure(figsize=(12, 12))
    sns.histplot(data0['Branch_Misses'], bins=100, kde=True, color='blue', label='-O0', alpha=0.5)
    sns.histplot(data1['Branch_Misses'], bins=100, kde=True, color='green', label='-O1', alpha=0.5)
    plt.title(f'Distribution of Branch Misses: {file_title_mapping[perf0]}')
    plt.xlabel('Branch Misses')
    plt.ylabel('Frequency')
    plt.legend()
    # plt.xlim(5000,10000)
    plt

for perf0, perf1 in zip(perf_stats_0_list, perf_stats_1_list):
    data0 = pd.read_csv(os.path.join(DATA_DIR, perf0))
    data1 = pd.read_csv(os.path.join(DATA_DIR, perf1))

    # Perf Evaluations
    data0['Branch_Misses_%'] = data0['Branch_Misses'] / data0['Branches'] * 100
    data1['Branch_Misses_%'] = data1['Branch_Misses'] / data1['Branches'] * 100
    # Plot Branch Misses %
    plt.figure(figsize=(12, 12))
    sns.histplot(data0['Branch_Misses_%'], bins=100, kde=True, color='blue', label='-O0', alpha=0.5)
    sns.histplot(data1['Branch_Misses_%'], bins=100, kde=True, color='green', label='-O1', alpha=0.5)
    plt.title(f'Distribution of Branch Misses %: {file_title_mapping[perf0]}')
    plt.xlabel('Branch Misses %')
    plt.ylabel('Frequency')
    plt.legend()
    plt.xlim(0,3.6)
    plt.show()