In [16]:
import glob

import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

import plotly.express as px
import plotly.graph_objects as go

In [17]:
glob.glob('results/*.csv')

['results/results_final.csv',
 'results/results.csv',
 'results/results_latest.csv']

In [24]:
columns = ['Algorithm','Construct','Threads','Size','Cutoff','Duration']
results = pd.read_csv('results/results_final.csv', names=columns)

### With cutoff (hybrid parallelism)

In [35]:
# take average over trials
df = results.groupby(['Algorithm', 'Construct', 'Threads', 'Size', 'Cutoff'])['Duration'].mean().to_frame('Duration')

cutoff = 100
if_cutoff = 'No Hybrid Parallelism' if cutoff == 0 else 'Hybrid Parallelism'

# exclude parallel implementations without cutoffs
df = df.loc[
    (
        (df.index.get_level_values('Construct') == 'serial')
        | (df.index.get_level_values('Cutoff') == cutoff)
    )
#     & (df.index.get_level_values('Algorithm') != 'enumeration')
#     & (df.index.get_level_values('Construct') != 'sections')
    & (df.index.get_level_values('Size') <= 1000000)
]

df = df.reset_index(['Cutoff'], drop=True)

# copy single threaded "serial" to one each for tasks and sections for visualisation
sections = df.loc[df.index.get_level_values('Construct') == 'serial'].reset_index()
tasks = sections.copy()

sections.Construct = sections.Construct.str.replace('serial', 'sections')
sections.set_index(df.index.names, inplace=True)
# sections.set_index(df.'Construct', inplace=True, append=True)

tasks.loc[tasks.Algorithm != 'enumeration', 'Construct'] = (
    tasks.Construct.str.replace('serial', 'tasks')
)
tasks.set_index(df.index.names, inplace=True)
# sections.set_index('Construct', inplace=True, append=True)

# merge copies together
df = pd.concat([df.loc[df.index.get_level_values('Construct') != 'serial'], sections, tasks]).reset_index()
df = df.sort_values(['Threads', 'Size'])

# the way we have copied serial implementations to tasks/sections for visualisation
# has meant we have duplicated num_threads==1 entries for enumeration
# this is not the most elegant solution, but this should replace duplicate enumeration serial entries
df.loc[df['Algorithm'] == 'enumeration', 'Construct'] = (
    np.repeat('parallel', len(df.loc[df['Algorithm'] == 'enumeration', 'Construct']))
)

assert (len(df) - len(df.drop_duplicates())) == 5

df = df.drop_duplicates()

In [41]:
# log-log plot for all algorithms
fig = px.line(
    df.loc[
        (df['Construct'] != 'sections') &
        (df['Size'] <= 100000)
    ],
    x='Size', y='Duration', color='Threads',
    facet_col='Algorithm', facet_col_wrap=3,
    facet_col_spacing=0.05,
    template='seaborn',
    log_x=True,
    log_y=True,
    title=f'Algorithm Runtime Comparisons in C / OpenMP ({if_cutoff})',
    height=400,
    labels={
        "Duration": "Duration (s)",
        "Size": "Array Size",
     },
)

fig.update_yaxes(matches=None)
fig.update_yaxes(showticklabels=True, col=2) # assuming second facet
fig.update_yaxes(showticklabels=True, col=3) # assuming third facet
fig.show()

# log-linear plot for all algorithms
fig = px.line(
    df.loc[
        (df['Construct'] != 'sections') &
        (df['Size'] <= 100000)
    ],
    x='Size', y='Duration', color='Threads',
    facet_col='Algorithm', facet_col_wrap=3,
    facet_col_spacing=0.05,
    template='seaborn',
    log_x=True,
#     log_y=True,
    title=f'Algorithm Runtime Comparisons in C / OpenMP ({if_cutoff})',
    height=400,
    labels={
        "Duration": "Duration (s)",
        "Size": "Array Size",
     },
)

# plots for sections/tasks for quick/merge sort
fig.update_yaxes(matches=None)
fig.update_yaxes(showticklabels=True, col=2) # assuming second facet
fig.update_yaxes(showticklabels=True, col=3) # assuming third facet
fig.show()

fig = px.line(
    df.loc[df.Algorithm != 'enumeration'],
    x='Size', y='Duration', color='Threads',
    facet_col='Algorithm', facet_col_wrap=2,
    facet_col_spacing=0.05,
    facet_row='Construct',
    template='seaborn',
    log_x=True,
#     log_y=True,
    title=f'Algorithm Runtime Comparisons in C / OpenMP ({if_cutoff})',
    height=600,
    labels={
        "Duration": "Duration (s)",
        "Size": "Array Size",
     },
)

fig.update_yaxes(matches=None)
fig.update_yaxes(showticklabels=True, col=[2,3]) # assuming second facet
fig.show()

#### Without cutoff (no hybrid parallelism)

In [42]:
# take average over trials
df = results.groupby(['Algorithm', 'Construct', 'Threads', 'Size', 'Cutoff'])['Duration'].mean().to_frame('Duration')

cutoff = 0
if_cutoff = 'No Hybrid Parallelism' if cutoff == 0 else 'Hybrid Parallelism'

# exclude parallel implementations without cutoffs
df = df.loc[
    (
        (df.index.get_level_values('Construct') == 'serial')
        | (df.index.get_level_values('Cutoff') == cutoff)
    )
#     & (df.index.get_level_values('Algorithm') != 'enumeration')
#     & (df.index.get_level_values('Construct') != 'sections')
    & (df.index.get_level_values('Size') <= 1000000)
]

df = df.reset_index(['Cutoff'], drop=True)

# copy single threaded "serial" to one each for tasks and sections for visualisation
sections = df.loc[df.index.get_level_values('Construct') == 'serial'].reset_index()
tasks = sections.copy()

sections.Construct = sections.Construct.str.replace('serial', 'sections')
sections.set_index(df.index.names, inplace=True)
# sections.set_index(df.'Construct', inplace=True, append=True)

tasks.loc[tasks.Algorithm != 'enumeration', 'Construct'] = (
    tasks.Construct.str.replace('serial', 'tasks')
)
tasks.set_index(df.index.names, inplace=True)
# sections.set_index('Construct', inplace=True, append=True)

# merge copies together
df = pd.concat([df.loc[df.index.get_level_values('Construct') != 'serial'], sections, tasks]).reset_index()
df = df.sort_values(['Threads', 'Size'])

# the way we have copied serial implementations to tasks/sections for visualisation
# has meant we have duplicated num_threads==1 entries for enumeration
# this is not the most elegant solution, but this should replace duplicate enumeration serial entries
df.loc[df['Algorithm'] == 'enumeration', 'Construct'] = (
    np.repeat('parallel', len(df.loc[df['Algorithm'] == 'enumeration', 'Construct']))
)

assert (len(df) - len(df.drop_duplicates())) == 5

df = df.drop_duplicates()

In [44]:
# log-log plot for all algorithms
fig = px.line(
    df.loc[
        (df['Construct'] != 'sections') &
        (df['Size'] <= 100000)
    ],
    x='Size', y='Duration', color='Threads',
    facet_col='Algorithm', facet_col_wrap=3,
    facet_col_spacing=0.05,
    template='seaborn',
    log_x=True,
    log_y=True,
    title=f'Algorithm Runtime Comparisons in C / OpenMP ({if_cutoff})',
    height=400,
    labels={
        "Duration": "Duration (s)",
        "Size": "Array Size",
     },
)

fig.update_yaxes(matches=None)
fig.update_yaxes(showticklabels=True, col=2) # assuming second facet
fig.update_yaxes(showticklabels=True, col=3) # assuming third facet
fig.show()

# log-linear plot for all algorithms
fig = px.line(
    df.loc[
        (df['Construct'] != 'sections') &
        (df['Size'] <= 100000)
    ],
    x='Size', y='Duration', color='Threads',
    facet_col='Algorithm', facet_col_wrap=3,
    facet_col_spacing=0.05,
    template='seaborn',
    log_x=True,
#     log_y=True,
    title=f'Algorithm Runtime Comparisons in C / OpenMP ({if_cutoff})',
    height=400,
    labels={
        "Duration": "Duration (s)",
        "Size": "Array Size",
     },
)

# plots for sections/tasks for quick/merge sort
fig.update_yaxes(matches=None)
fig.update_yaxes(showticklabels=True, col=2) # assuming second facet
fig.update_yaxes(showticklabels=True, col=3) # assuming third facet
fig.show()

fig = px.line(
    df.loc[df.Algorithm != 'enumeration'],
    x='Size', y='Duration', color='Threads',
    facet_col='Algorithm', facet_col_wrap=2,
    facet_col_spacing=0.05,
    facet_row='Construct',
    template='seaborn',
    log_x=True,
#     log_y=True,
    title=f'Algorithm Runtime Comparisons in C / OpenMP ({if_cutoff})',
    height=600,
    labels={
        "Duration": "Duration (s)",
        "Size": "Array Size",
     },
)

fig.update_yaxes(matches=None)
fig.update_yaxes(showticklabels=True, col=[2,3]) # assuming second facet
fig.show()

### Miscellaneous plots

In [43]:
fig = px.line(
    df.loc[
        (df['Construct'] != 'sections') &
        (df['Size'] <= 100000)
    ],
    x='Size', y='Duration', color='Threads',
    facet_col='Algorithm', facet_col_wrap=3,
    facet_col_spacing=0.05,
    template='seaborn',
    log_x=True,
    log_y=True,
    title='Algorithm Runtime Comparisons in C / OpenMP (No Hybrid Parallelism)',
    height=400,
    labels={
        "Duration": "Duration (s)",
        "Size": "Array Size",
     },
)

fig.update_yaxes(matches=None)
fig.update_yaxes(showticklabels=True, col=2) # assuming second facet
fig.update_yaxes(showticklabels=True, col=3) # assuming third facet
fig.show()

In [33]:
fig = px.line(
    df.loc[df.Algorithm != 'enumeration'],
    x='Size', y='Duration', color='Threads',
    facet_col='Algorithm', facet_col_wrap=2,
    facet_col_spacing=0.05,
    facet_row='Construct',
    template='seaborn',
    log_x=True,
#     log_y=True,
    title=f'Algorithm Runtime Comparisons in C / OpenMP ({if_cutoff})',
    height=600,
    labels={
        "Duration": "Duration (s)",
        "Size": "Array Size",
     },
)

fig.update_yaxes(matches=None)
fig.update_yaxes(showticklabels=True, col=[2,3]) # assuming second facet
fig.show()

In [58]:
fig = px.line(
    df,
    x='Size', y='Duration', color='Threads',
    facet_col='Algorithm', facet_col_wrap=2,
    facet_col_spacing=0.05,
    facet_row='Construct',
    template='seaborn',
    log_x=True,
#     log_y=True,
    title='Algorithm Runtime Comparisons in C / OpenMP (No Hybrid Parallelism)',
    height=600,
    labels={
        "Duration": "Duration (s)",
        "Size": "Array Size",
     },
)

fig.update_yaxes(matches=None)
fig.update_yaxes(showticklabels=True, col=2) # assuming second facet
fig.show()

In [40]:
# deprecated matplotlib visualisation

# # fig = plt.figure(figsize=(20, 12))
# nrows, ncols = 3, 2
# fig, axes = plt.subplots(nrows, ncols, figsize=(16,5*nrows), sharex=True, sharey=True)

# data_df = (
#     df.loc[
#         (df.Construct.isin(('serial','sections'))),
#         ['Algorithm', 'Threads', 'Cutoff', 'Size', 'Duration']
#     ]
#     .groupby(['Algorithm','Threads', 'Cutoff', 'Size'])['Duration']
#     .agg({'mean', 'std'})
# )

# for i, algorithm in enumerate(('quick', 'merge')):
#     for j, cutoff in enumerate(data_df.index.get_level_values('Cutoff').unique()):
#         ax = axes[j, i]
#         ax.set_title(f'{algorithm.title()} Sort Runtime vs. Array Size (Cutoff={cutoff})')

#         # plot data
#         for k, n_threads in enumerate(data_df.index.get_level_values('Threads').unique()):
#             data = data_df.loc[
#                 (data_df.index.get_level_values('Algorithm') == algorithm) &
#                 (data_df.index.get_level_values('Threads') == n_threads) &
#                 (data_df.index.get_level_values('Cutoff') == cutoff)
#             ].reset_index(['Algorithm','Threads','Cutoff'], drop=True)

#             array_sizes = data_df.index.get_level_values('Size').unique()

#             color=list(mcolors.TABLEAU_COLORS)[k]
#             label = f'{n_threads} Threads' if n_threads != 1 else 'Serial'
#             ax.plot(data.index, data['mean'], label=label, color=color)
#             ax.errorbar(data.index, data['mean'], yerr=data['std'] , fmt='o', markersize=4, color=color)

#         if j == 0:
#             ax.set_ylabel('Runtime (s)')
#         ax.legend(loc='upper left')

#         ax.set_xlabel('Array Size')
#         ax.set_xscale('log')
# #         ax.set_yscale('log')
#         ax.grid(axis='y')

# fig.suptitle('Sorting Algorithm Runtimes using Tasks in C/OpenMP (Log-Log Scale)', y=0.99, fontsize=14)
# fig.tight_layout()
# fig.show()