In [42]:
import glob

import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

import plotly.express as px
import plotly.graph_objects as go

In [87]:
columns=['approach', 'sort', 'world_size', 'construct', 'threads', 'size', 'duration']
results = pd.read_csv('../results/final/mpi_sort_results.csv', names=columns)

In [88]:
results

Unnamed: 0,approach,sort,world_size,construct,threads,size,duration
0,merge,enumeration_sort,16,single,1,10000,0.013800
1,partition,enumeration_sort,16,single,1,10000,0.033126
2,merge,merge_sort,16,single,1,10000,0.001347
3,partition,merge_sort,16,single,1,10000,0.001445
4,merge,quick_sort,16,single,1,10000,0.001191
...,...,...,...,...,...,...,...
1291,partition,quick_sort,2,tasks,8,100000000,15.529089
1292,merge,merge_sort,2,tasks,8,100000000,14.271214
1293,partition,merge_sort,2,tasks,8,100000000,14.898220
1294,merge,quick_sort,2,tasks,8,100000000,15.227301


In [89]:
for col in results.columns:
    if col != 'duration':
        print(f'{col}: {results[col].unique()}')

approach: ['merge' 'partition' 'serial']
sort: ['enumeration_sort' 'merge_sort' 'quick_sort']
world_size: [16  8  4  1  2]
construct: ['single' 'parallel' 'tasks']
threads: [1 2 4 8]
size: [    10000    100000   1000000  10000000 100000000]


In [90]:
df = (
    results
    .groupby(['approach', 'sort', 'world_size', 'construct', 'threads', 'size'])['duration'].mean()
    .to_frame('duration')
    .reset_index()
)
df

Unnamed: 0,approach,sort,world_size,construct,threads,size,duration
0,merge,enumeration_sort,2,parallel,2,10000,0.066418
1,merge,enumeration_sort,2,parallel,2,100000,6.140236
2,merge,enumeration_sort,2,parallel,4,10000,0.035189
3,merge,enumeration_sort,2,parallel,4,100000,3.110686
4,merge,enumeration_sort,2,parallel,8,10000,0.021299
...,...,...,...,...,...,...,...
427,serial,quick_sort,1,tasks,8,10000,0.001542
428,serial,quick_sort,1,tasks,8,100000,0.018815
429,serial,quick_sort,1,tasks,8,1000000,0.220739
430,serial,quick_sort,1,tasks,8,10000000,2.501303


In [96]:
pd.set_option('display.max_rows', None)

In [288]:
summary_table = (
    df
    .loc[df['size'] == 100000]
    .drop(columns=['construct'])
    .pivot(index=['world_size','threads','size'], columns=['approach','sort'])
    ['duration']
)

parallel = summary_table.loc[summary_table.index.get_level_values('world_size') != 1, ['merge','partition']]
serial = summary_table.loc[summary_table.index.get_level_values('world_size') == 1, ['serial']].reset_index('world_size', drop=True)

In [291]:
parallel_df = (
    parallel.loc[:, (slice(None), ('merge_sort', 'quick_sort'))]
    .reset_index()
    .melt(id_vars=['world_size', 'threads', 'size'], value_name='duration')
)

In [301]:
fig = px.line(
    parallel_df.loc[(parallel_df['approach'] == 'merge')],
    x='world_size', y='duration', color='threads',
    facet_col='sort', log_x=True,
    title='Performance Comparison of MPI Merge: Processes vs. Runtime (Array Size = 100000)'
)


fig.show()

In [302]:
fig = px.line(
    parallel_df.loc[(parallel_df['approach'] == 'partition')],
    x='world_size', y='duration', color='threads',
    facet_col='sort', log_x=True,
    title='Performance Comparison of MPI Partition: Processes vs. Runtime (Array Size = 100000)'
)


fig.show()

In [233]:
display(serial, parallel)

Unnamed: 0_level_0,approach,serial,serial,serial
Unnamed: 0_level_1,sort,enumeration_sort,merge_sort,quick_sort
threads,size,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,10000,0.2368,0.0013,0.0015
1,100000,24.0071,0.0137,0.0158
1,1000000,,0.1674,0.2033
1,10000000,,1.9961,2.2887
1,100000000,,22.7555,26.6629
2,10000,0.2453,0.0009,0.0015
2,100000,24.3789,0.0111,0.0185
2,1000000,,0.1216,0.2215
2,10000000,,1.2654,2.4834
2,100000000,,13.8979,28.5926


Unnamed: 0_level_0,Unnamed: 1_level_0,approach,merge,merge,merge,partition,partition,partition
Unnamed: 0_level_1,Unnamed: 1_level_1,sort,enumeration_sort,merge_sort,quick_sort,enumeration_sort,merge_sort,quick_sort
world_size,threads,size,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
2,1,10000,0.06,0.0009,0.0009,0.0948,0.0011,0.0011
2,1,100000,6.0387,0.0083,0.0089,8.6578,0.0091,0.0105
2,1,1000000,,0.093,0.1114,,0.1681,0.1981
2,1,10000000,,1.0982,1.2184,,1.3993,1.5689
2,1,100000000,,12.4001,14.111,,12.7311,14.3739
2,2,10000,0.0664,0.0012,0.001,0.1051,0.0013,0.0013
2,2,100000,6.1402,0.0103,0.0098,8.8102,0.0113,0.0118
2,2,1000000,,0.109,0.1201,,0.2033,0.2152
2,2,10000000,,1.2773,1.3176,,1.6306,1.7104
2,2,100000000,,14.2537,15.168,,14.7826,15.4453


In [219]:
serial_df = (
    serial
    .reset_index('world_size', drop=True)
    .reset_index()
    .melt(id_vars=['threads'], value_name='duration')
)

serial_df.drop(columns='approach', inplace=True)
serial_df.sort = serial_df.sort.str.replace('_sort', '')
serial_df = serial_df.rename(columns={'duration': 'Duration (s)', 'threads': 'OpenMP Threads', 'sort': 'Sorting Algorithm'})
serial_df

Unnamed: 0,OpenMP Threads,Sorting Algorithm,Duration (s)
0,1,enumeration,24.0071
1,2,enumeration,24.3789
2,4,enumeration,12.2966
3,8,enumeration,6.218
4,1,merge,0.0137
5,2,merge,0.0111
6,4,merge,0.0083
7,8,merge,0.0065
8,1,quick,0.0158
9,2,quick,0.0185


### IO Experiments

In [144]:
io = pd.concat([
    pd.read_csv(
        f'../results/final/{approach}_io_results.csv',
        names=['io', 'approach', 'world_size', 'size', 'duration']
    ) for approach in ('serial', 'parallel')
])

In [165]:
pd.options.display.float_format = '{:,.4f}'.format

In [166]:
io_summary = (
    io.drop(columns='approach')
    .sort_values(['io', 'world_size'])
    .pivot(index=['size'], columns=['io', 'world_size'])
    ['duration']
)

display(io_summary)

io,read,read,read,read,read,read,write,write,write,write,write,write
world_size,1,2,4,8,16,24,1,2,4,8,16,24
size,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
1000,0.0,0.0002,0.0004,0.0006,0.0013,0.1044,0.0001,0.0013,0.0018,0.0032,0.0032,0.0832
10000,0.0,0.0002,0.0003,0.0005,0.0009,0.0505,0.0001,0.0002,0.0004,0.0006,0.001,0.0587
100000,0.0003,0.0004,0.0004,0.0006,0.001,0.0459,0.0005,0.0005,0.0009,0.0009,0.0018,0.0612
1000000,0.0036,0.0025,0.0015,0.0014,0.0016,0.0609,0.0075,0.0035,0.0051,0.0048,0.0053,0.0895
10000000,0.0378,0.0206,0.0127,0.0098,0.008,0.0442,0.086,0.0303,0.0293,0.0355,0.0374,0.1366
100000000,0.3518,0.2037,0.1122,0.0843,0.0694,0.1439,0.928,0.2622,0.2761,0.2923,0.313,0.4672
1000000000,3.0032,1.722,0.9151,0.6454,0.5662,0.5831,7.2551,2.3322,2.5684,2.5032,2.8186,3.228


In [None]:

    facet_col_spacing=0.05,
    facet_row='Construct',
    template='seaborn',
    log_x=True,
#     log_y=True,
    title=f'Algorithm Runtime Comparisons in C / OpenMP ({if_cutoff})',
    height=600,
    labels={
        "Duration": "Duration (s)",
        "Size": "Array Size",
     },

In [186]:
px.line(
    (
        io_summary['read']
        .reset_index()
        .melt(id_vars='size', value_name='duration')
        .rename(columns={'world_size': 'World Size'})
    ),
    x='size', y='duration', color='World Size',
    height=500, width=700, template='seaborn',
    log_x=True, #log_y=True,
    labels={'duration': 'Duration (s)', 'size': 'Array Size'},
    title='Duration for IO Read Time vs. Array Size'
)
    

In [187]:
px.line(
    (
        io_summary['write']
        .reset_index()
        .melt(id_vars='size', value_name='duration')
        .rename(columns={'world_size': 'World Size'})
    ),
    x='size', y='duration', color='World Size',
    height=500, width=700, template='seaborn',
    log_x=True, #log_y=True,
    labels={'duration': 'Duration (s)', 'size': 'Array Size'},
    title='Duration for IO Write Time vs. Array Size'
)
    

In [210]:
fig = px.line(
    (
        io_summary
        .reset_index()
        .melt(id_vars='size', value_name='duration')
        .rename(columns={'world_size': 'World Size', 'io': "IO"})
    ),
    x='size', y='duration', color='World Size',
    facet_col='IO', facet_col_spacing=0.03,
    height=450, width=900, template='seaborn',
    log_x=True, #log_y=True,
    labels={'duration': 'Duration (s)', 'size': 'Array Size'},
    title='Performance Chart for IO Time vs. Array Size'
)

# plots for sections/tasks for quick/merge sort
fig.update_yaxes(matches=None)
fig.update_yaxes(showticklabels=True, col=2) # assuming second facet
fig.show()