In [None]:
import matplotlib
import matplotlib.pyplot as plt
from nbmetalog import nbmetalog as nbm
import numpy as np
import pandas as pd
import seaborn as sns
import teeplot as tp

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

# set to True if you want all graphs saved as PDFs
save_graphs = True


In [None]:
nbm.print_metadata()


## Import data


In [None]:
ds_thread = pd.read_csv("https://osf.io/t8wrf/download")
nbm.print_dataframe_summary(*eval(nbm.nvp_expr(
    'ds_thread'
)))


In [None]:
ds_proc = pd.read_csv("https://osf.io/dkj9n/download")
nbm.print_dataframe_summary(*eval(nbm.nvp_expr(
    'ds_proc'
)))


In [None]:
ds_control = pd.read_csv("https://osf.io/3jz4w/download")
nbm.print_dataframe_summary(*eval(nbm.nvp_expr(
    'ds_control'
)))


### Concatenate data


In [None]:
pd.concat([ds_control, ds_thread, ds_proc])


# Graphing


### Determine number of updates per CPU (threads * processes)


In [None]:
ds_thread["ncpus"] = ds_thread["nthreads"] * ds_thread["nprocs"]
ds_thread["conflicts per cpu"] = ds_thread["conflicts total"] / ds_thread["ncpus"]
ds_thread["updates per cpu-second"] = ds_thread["updates total"] / ds_thread["ncpus"] / ds_thread["seconds"]


In [None]:
ds_proc["ncpus"] = ds_proc["nthreads"] * ds_proc["nprocs"]
ds_proc["conflicts per cpu"] = ds_proc["conflicts total"] / ds_proc["ncpus"]
ds_proc["updates per cpu-second"] = ds_proc["updates total"] / ds_proc["ncpus"] / ds_proc["seconds"]


In [None]:
ds_control["ncpus"] = ds_control["nthreads"] * ds_control["nprocs"]
ds_control["updates per cpu-second"] = ds_control["updates total"] / ds_control["ncpus"] / ds_control["seconds"]


## Graph coloring problem


#### This problem is described in [_WLAN channel selection without communication_](https://dl.acm.org/doi/abs/10.1016/j.comnet.2011.12.015). In basic terms it consists of assigning a separate color to nodes in a graph, so that each node has a different color.


We first select only the data corresponding to the multithread graph coloring runs.


In [None]:
filtered_threads = ds_thread[
    (ds_thread["executable"]=="channel_selection") &
    (ds_thread["nprocs"] == 1)
]


Next, we graph it as a bar plot.


In [None]:
g = sns.barplot(x="ncpus", y="updates per cpu-second", hue="asynchronicity mode", data=filtered_threads)

g.set_title('Multithread Graph Coloring')

if save_graphs:
    plt.savefig('multithread-graph-coloring.pdf', bbox_inches='tight')


 Now, we proceed to graph the number of conflicts per CPU. They are inversely proportional to the solution quality. Note the y axis is logarithmic.


In [None]:
g = sns.barplot(x="ncpus", y="conflicts per cpu", hue="asynchronicity mode", data=filtered_threads)

g.set_title('Multithread Graph Coloring Solution Quality')
g.set_yscale('log')

if save_graphs:
    plt.savefig('multithread-graph-coloring-solution-quality.pdf', bbox_inches='tight')


We now select only the multiprocess graph coloring data...


In [None]:
filtered_procs = ds_proc[
    (ds_proc["executable"]=="channel_selection") &
    (ds_proc["nthreads"] == 1)
]


...and graph it the same way.


In [None]:
g = sns.barplot(x="ncpus", y="updates per cpu-second", hue="asynchronicity mode", data=filtered_procs)

g.set_title('Multiprocess Graph Coloring')

if save_graphs:
    plt.savefig('multiprocess-graph-coloring.pdf', bbox_inches='tight')


 Now, we proceed to graph the number of conflicts per CPU. They are inversely proportional to the solution quality. Note the y-axis is logarithmic.


In [None]:
g = sns.barplot(x="ncpus", y="conflicts per cpu", hue="asynchronicity mode", data=filtered_procs)

g.set_title('Multiprocess Graph Coloring Solution Quality')
g.set_yscale('log')

if save_graphs:
    plt.savefig('multiprocess-graph-coloring-solution-quality.pdf', bbox_inches='tight')


## Digital evolution problem


#### This problem consists of evolving a system of digital cells. You can read more about dishtiny [here](https://dishtiny.readthedocs.io/en/latest/).


Once again, we first only select the multithreaded data corresponding to this problem.


In [None]:
ds_control["nthreads"] = ds_control["nprocs"]
ds_control["nprocs"] = 1

ds_both = pd.concat([ds_thread, ds_control])

filtered_threads_dish = ds_both[
    (ds_both["executable"] == "dishtiny") &
    (ds_both["nprocs"] == 1) &
    (ds_both["ncpus"] != 32) # we remove this because we only have control data for it
]


And we graph it.


f = sns.barplot(x="ncpus", y="updates per cpu-second", hue="asynchronicity mode", data=filtered_threads_dish)

f.set_title('Multithread Digital Evolution')

if save_graphs:
    plt.savefig('multithread-digital-evolution.pdf', bbox_inches='tight')


Finally, we select and graph the multiprocess digital evolution data.


In [None]:
filtered_procs_dish = ds_proc[
    (ds_proc["executable"]=="dishtiny") &
    (ds_proc["nthreads"] == 1)
]


In [None]:
g = sns.barplot(x="ncpus", y="updates per cpu-second", hue="asynchronicity mode", data=filtered_procs_dish)

g.set_title('Multiprocess Digital Evolution')

if save_graphs:
    plt.savefig('multiprocess-digital-evolution.pdf', bbox_inches='tight')


# Computing relative performance


In [None]:
1.0 - ds_thread[
    (ds_thread['asynchronicity mode'] == 4)
    & (ds_thread['ncpus'] == 4)
    & (ds_thread['ncpus'] == ds_thread['nthreads'])
    & (ds_thread['executable'] == 'channel_selection')
]['updates per cpu-second'].mean() / ds_thread[
    (ds_thread['asynchronicity mode'] == 4)
    & (ds_thread['ncpus'] == 1)
    & (ds_thread['ncpus'] == ds_thread['nthreads'])
    & (ds_thread['executable'] == 'channel_selection')
]['updates per cpu-second'].mean()


In [None]:
1.0 - ds_thread[
    (ds_thread['asynchronicity mode'] == 4)
    & (ds_thread['ncpus'] == 64)
    & (ds_thread['ncpus'] == ds_thread['nthreads'])
    & (ds_thread['executable'] == 'channel_selection')
]['updates per cpu-second'].mean() / ds_thread[
    (ds_thread['asynchronicity mode'] == 4)
    & (ds_thread['ncpus'] == 4)
    & (ds_thread['ncpus'] == ds_thread['nthreads'])
    & (ds_thread['executable'] == 'channel_selection')
]['updates per cpu-second'].mean()


In [None]:
ds_control[
    (ds_control['asynchronicity mode'] == 4)
    & (ds_control['nthreads'] == 64)
    & (ds_control['executable'] == 'dishtiny')
]['updates per cpu-second'].mean() / ds_control[
    (ds_control['asynchronicity mode'] == 4)
    & (ds_control['nthreads'] == 1)
    & (ds_control['executable'] == 'dishtiny')
]['updates per cpu-second'].mean()


In [None]:
ds_thread[
    (ds_thread['asynchronicity mode'] == 3)
    & (ds_thread['ncpus'] == 64)
    & (ds_thread['ncpus'] == ds_thread['nthreads'])
    & (ds_thread['executable'] == 'dishtiny')
]['updates per cpu-second'].mean() / ds_thread[
    (ds_thread['asynchronicity mode'] == 3)
    & (ds_thread['ncpus'] == 1)
    & (ds_thread['ncpus'] == ds_thread['nthreads'])
    & (ds_thread['executable'] == 'dishtiny')
]['updates per cpu-second'].mean()


In [None]:
ds_thread[
    (ds_thread['asynchronicity mode'] == 3)
    & (ds_thread['ncpus'] == 64)
    & (ds_thread['ncpus'] == ds_thread['nthreads'])
    & (ds_thread['executable'] == 'dishtiny')
]['updates per cpu-second'].mean() / ds_thread[
    (ds_thread['asynchronicity mode'] == 0)
    & (ds_thread['ncpus'] == 64)
    & (ds_thread['ncpus'] == ds_thread['nthreads'])
    & (ds_thread['executable'] == 'dishtiny')
]['updates per cpu-second'].mean()


In [None]:
ds_proc[
    (ds_proc['asynchronicity mode'] == 3)
    & (ds_proc['ncpus'] == 64)
    & (ds_proc['executable'] == 'channel_selection')
]['updates per cpu-second'].mean() / ds_proc[
    (ds_proc['asynchronicity mode'] == 3)
    & (ds_proc['ncpus'] == 1)
    & (ds_proc['executable'] == 'channel_selection')
]['updates per cpu-second'].mean()


In [None]:
ds_proc[
    (ds_proc['asynchronicity mode'] == 3)
    & (ds_proc['ncpus'] == 64)
    & (ds_proc['executable'] == 'channel_selection')
]['updates per cpu-second'].mean() / ds_proc[
    (ds_proc['asynchronicity mode'] == 0)
    & (ds_proc['ncpus'] == 64)
    & (ds_proc['executable'] == 'channel_selection')
]['updates per cpu-second'].mean()


In [None]:
ds_proc[
    (ds_proc['asynchronicity mode'] == 3)
    & (ds_proc['ncpus'] == 64)
    & (ds_proc['executable'] == 'dishtiny')
]['updates per cpu-second'].mean() / ds_proc[
    (ds_proc['asynchronicity mode'] == 3)
    & (ds_proc['ncpus'] == 1)
    & (ds_proc['executable'] == 'dishtiny')
]['updates per cpu-second'].mean()


In [None]:
ds_proc[
    (ds_proc['asynchronicity mode'] == 3)
    & (ds_proc['ncpus'] == 64)
    & (ds_proc['executable'] == 'dishtiny')
]['updates per cpu-second'].mean() / ds_proc[
    (ds_proc['asynchronicity mode'] == 0)
    & (ds_proc['ncpus'] == 64)
    & (ds_proc['executable'] == 'dishtiny')
]['updates per cpu-second'].mean()
