In [5]:
import sqlite3
from IPython.display import HTML, display

db = 'file:results.db?mode=ro'
con = sqlite3.connect(db, uri=True)

cur = con.cursor()
cur.execute('SELECT * FROM runs ORDER BY date DESC LIMIT 15')

def display_result(cur, data=cur.fetchall()):
    html = "<table>"
    headers = list(map(lambda x: x[0], cur.description))
    
    html += "<thead><tr>" + "".join(["<td>" + h + "</td>" for h in headers]) + "</tr></thead>"
    for row in data:
        html += "<tr>"
        for field in row:
            html += "<td>%s</td>"%(field)
        html += "</tr>"
    html += "</table>"
    display(HTML(html))
display_result(cur)

id,date,hostname,revision,cluster_size,description,flags
76,2022-02-16 12:55:05,uc2n009.localdomain,01305bb08623e2d18215de50dd6af4bfa65c0b41,640,bwUniCluster job 20474365,--verbose
75,2022-02-08 09:42:03,i10pc138,313b2fac4ce413bcb2f91365298886912750bb1b,256,static accumulation buffer,--verbose --workload=0
74,2022-02-02 10:15:57,i10pc138,812f1f2ffdd2655ddd09838ca1e5ca06c1a9424f,256,strong scaling,
73,2022-02-02 10:13:58,i10pc138,812f1f2ffdd2655ddd09838ca1e5ca06c1a9424f,256,strong scaling,
72,2022-01-26 22:19:25,uc2n041.localdomain,5f08b0aa82371bb8cfd33a8bcf74f5b7820e1b80,1280,bwUniCluster job 20413365 INCOMPLETE,--verbose
71,2022-01-26 15:29:05,i10pc138,fb02e8cae63e23e8445bbe7abad940a3cbdaa43f,32,low core count (64),--verbose
70,2022-01-26 15:13:37,i10pc138,8cd9287c68401e48d103fdee2334342d65a438cc,256,workload loop iterations = 1,--verbose --workload=1
69,2022-01-26 15:12:02,i10pc138,8cd9287c68401e48d103fdee2334342d65a438cc,256,workload loop iterations = 0,--verbose
68,2022-01-24 16:06:58,uc2n067.localdomain,795ec9d5161d57e1170bec08444dd23fe6bd71b5,640,bwUniCluster,--verbose
67,2022-01-24 15:43:09,uc2n087.localdomain,795ec9d5161d57e1170bec08444dd23fe6bd71b5,640,bwUniCluster,--verbose


In [6]:
run_id = 70

In [7]:
%matplotlib notebook
import sqlite3
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import EngFormatter
from scipy.stats import linregress

# warning: not safe from SQL injection
def fetch_data(mode):
    cur.execute(f"SELECT n_summands, datafile, time_ns, stddev, repetitions FROM results WHERE mode = ?" \
                "AND run_id = ? ORDER BY n_summands",
               (mode, run_id))

def datasets():
    with sqlite3.connect('file:results.db?mode=ro', uri=True) as con:
        cur = con.cursor()

def xye(data):
    x = list(map(lambda x: x[0], data)) # n_summands
    y = list(map(lambda x: x[2] / 1e9, data)) # time_ns
    e = list(map(lambda x: x[3] / 1e9, data)) # stddev
    return x, y, e

settings = {
    "allreduce": {
        "fmt": "bo",
        "label": "MPI_Allreduce",
        "include": True,
    },
    "tree": {
        "fmt": "ro",
        "label": "Tree Reduce",
        "include": True
    },
    "reproblas": {
        "fmt": "co",
        "label": "ReproBLAS",
        "include": True
    },
    "baseline": {
        "fmt": "go",
        "label": "Sequential Baseline",
        "include": False
    }
}
linreg = False

f = plt.figure()
ax = f.subplots(1)
ax.set_ylabel('Accumulate Time')
ax.set_xlabel('number of summands')

formatter0 = EngFormatter(unit='s', places=2)
formatter1 = EngFormatter(places=2)
ax.yaxis.set_major_formatter(formatter0)
ax.xaxis.set_major_formatter(formatter1)

for mode, v in settings.items():
    if not v["include"]:
        continue
        
    fetch_data(mode)
    data = cur.fetchall()
    display(HTML(f"<h4>{mode}</h4>"))
    display_result(cur, data)
    
    ar_x, ar_y, ar_error = xye(data)
    ax.errorbar(ar_x, ar_y, yerr=ar_error, fmt=v["fmt"], label=v["label"], capsize=4.0)
    
    ticks = []
    for x in ar_x:
        lastX = ticks[-1] if len(ticks) > 0 else -1e10
        if x - lastX > 2e6:
            ticks.append(x)
    ax.xaxis.set_ticks(ticks)
        
    
    
    if linreg:
        settings[mode]["linreg"] = linregress(ar_x, ar_y)
        gradient, intercept, r_value, p_value, std_err = settings[mode]["linreg"]
        x1 = np.linspace(min(ar_x), max(ar_x), 50)
        y1 = gradient * x1 + intercept
        plt.plot(x1, y1, v["fmt"][0])

ax.legend(loc='upper left')

ax.set_xlim(left=0)
#ax.set_ylim(bottom=0)

plt.show()
plt.savefig(fname="figure.svg")



<IPython.core.display.Javascript object>

n_summands,datafile,time_ns,stddev,repetitions
460,data/354.binpsllh,19214.0,29627.9,100
767,data/multi100.binpsllh,21788.1,28203.4,100
898,data/prim.binpsllh,19347.7,22192.0,100
1602,data/fusob.binpsllh,16322.5,19938.9,100
239763,data/dna_rokasD4.binpsllh,23311.2,21516.0,100
504850,data/aa_rokasA8.binpsllh,28802.4,21391.0,100
1327505,data/dna_rokasD1.binpsllh,47451.3,20072.9,100
1806035,data/aa_rokasA4.binpsllh,59622.3,25752.4,100
3011099,data/dna_PeteD8.binpsllh,83797.8,24114.8,100
21410970,data/dna_rokasD7.binpsllh,566374.0,31909.5,100


n_summands,datafile,time_ns,stddev,repetitions
460,data/354.binpsllh,15569.4,10202.1,100
767,data/multi100.binpsllh,17930.9,9931.46,100
898,data/prim.binpsllh,21685.6,14025.0,100
1602,data/fusob.binpsllh,20697.5,13657.7,100
239763,data/dna_rokasD4.binpsllh,28948.9,11530.0,100
504850,data/aa_rokasA8.binpsllh,35039.0,8509.62,100
1327505,data/dna_rokasD1.binpsllh,53575.5,11146.1,100
1806035,data/aa_rokasA4.binpsllh,63081.9,11217.8,100
3011099,data/dna_PeteD8.binpsllh,87245.3,13649.4,100
21410970,data/dna_rokasD7.binpsllh,594597.0,15749.3,100


n_summands,datafile,time_ns,stddev,repetitions
460,data/354.binpsllh,13730.7,11303.1,100
767,data/multi100.binpsllh,14554.5,11697.7,100
898,data/prim.binpsllh,14729.7,13225.3,100
1602,data/fusob.binpsllh,13548.4,11634.3,100
239763,data/dna_rokasD4.binpsllh,20413.1,13742.4,100
504850,data/aa_rokasA8.binpsllh,26978.6,16016.0,100
1327505,data/dna_rokasD1.binpsllh,45239.9,14937.9,100
1806035,data/aa_rokasA4.binpsllh,56350.8,19678.1,100
3011099,data/dna_PeteD8.binpsllh,83197.5,25480.3,100
21410970,data/dna_rokasD7.binpsllh,590265.0,19607.1,100


In [8]:
def display_table2(data, header = []):
    html = "<table>"
    if len(header) > 0:
        html += "<tr>" + "".join(["<th>" + h + "</th>" for h in header])
    for row in data:
        html += "<tr>"
        for field in row:
            html += "<td>%s</td>"%(field)
        html += "</tr>"
    html += "</table>"
    display(HTML(html))

if linreg:
    header = ["Mode", "Gradient", "R"]
    t = [((mode,
          settings[mode]["linreg"][0],
          settings[mode]["linreg"][2]) if settings[mode]["include"] else []) for mode in settings.keys()]
    display_table2(t, header=header)
    

In [9]:
if linreg:
    slowdown = settings["tree"]["linreg"][0] / settings["allreduce"]["linreg"][0]
    slodown2 = settings["tree"]["linreg"][0] / settings["reproblas"]["linreg"][0]
    display(HTML(f"<h3>Tree Reduce slowdown compared to Allreduce: {round(slowdown,2)}"))
    display(HTML(f"<h3>Tree Reduce slowdown compared to ReproBLAS: {round(slodown2,2)}"))

## Violin plot

In [10]:
from statistics import median

def fetch_durations(run_id):
    plot_data = [[]]
    runs = cur.execute(f"SELECT id, mode, n_summands, datafile, ranks FROM results" \
                    " WHERE run_id = ? ORDER BY n_summands, datafile, mode", (run_id,))
    prev_datafile = None
    
    for result_id, mode, n_summands, datafile, ranks in runs.fetchall():
        if mode == "baseline":
            continue

        if prev_datafile is not None and prev_datafile != datafile:
            plot_data.append([])
        cur.execute("SELECT time_ns / 1e9 FROM durations WHERE result_id = ?", (result_id,))
        
        durations = list(map(lambda x: x[0], cur.fetchall()))
        plot_data[-1].append({
            "mode": mode,
            "datafile": datafile,
            "n": n_summands,
            "m": ranks,
            "durations": durations
        })
        
        
        prev_datafile = datafile
    return plot_data

plot_data = fetch_durations(run_id)
rows = len(plot_data)
cols = max(map(lambda x: len(x), plot_data))
formatter0 = EngFormatter(unit='s', places=2)

f, axs = plt.subplots(nrows=rows, figsize=(7, 4 * rows))

i = 0

colorpalette = ["#034670ff", "#2f779dff", "#887006ff", "#ac360bff"]

for dataset in plot_data:
    ax = axs[i] if rows > 1 else axs
    
    mode_durations = list(map(lambda x: x["durations"], dataset))

    ax.set_ylabel("Time")
    ax.yaxis.set_major_formatter(formatter0)
    ax.xaxis.can_zoom = False
    
    ax.get_xaxis().set_visible(True)
    
    # Ticks for each mode
    ax.get_xaxis().set_ticks(range(1, len(dataset)+1))
    
    medians = list(map(lambda d: median(d), mode_durations))
    labels = map(lambda x: x["mode"] + f" ({formatter0.format_eng(median(x['durations']))}s)", dataset)
    ax.get_xaxis().set_ticklabels(labels)
    
    
    #ax.yaxis.set_ticks(y_ticks)
    #ax.yaxis.set_ticks_position('both')
    #plt.setp(ax.get_yticklabels(), rotation=30, horizontalalignment='right')
    
    n = format(dataset[0]["n"], ",d").replace(",", " ")
    m = dataset[0]["m"]
    filename = dataset[0]["datafile"][5:]
    ax.set_title(f"{filename}, N={n}, p={m}")
    
    violins = ax.violinplot(mode_durations, showmedians=True, widths=0.5)
    for body, color in zip(violins["bodies"], colorpalette):
        body.set_color(color)
    
    i += 1
margin_y = 0.08 / rows
plt.subplots_adjust(left=0.15, bottom=0.0 + margin_y, right=0.98, top=1.0 - margin_y)
plt.savefig("violin_runtime.svg")

<IPython.core.display.Javascript object>

# Slowdown compared to ReproBLAS

In [11]:
plot_data = fetch_durations(run_id)
formatter0 = EngFormatter(places=2)

X = []
slowdowns = []

n_cores = plot_data[0][0]['m']
for p in plot_data:
    n = p[0]['n']
    mode = lambda mode: (lambda x: x['mode'] == mode)
    tree_data = next(filter(mode('tree'), p))
    reproblas_data = next(filter(mode('reproblas'), p))
    
    tree_median = median(tree_data['durations'])
    reproblas_median = median(reproblas_data['durations'])
    
    slowdown = tree_median / reproblas_median
    X.append(n)
    slowdowns.append(slowdown)

f, ax = plt.subplots(1)

ax.set_title(f"Slowdown of Tree compared to ReproBLAS with p={n_cores} cores")
ax.yaxis.set_label("Slowdown")
ax.xaxis.set_label("number of summands")
ax.xaxis.set_major_formatter(formatter0)

ax.scatter(X, slowdowns)

plt.show()

<IPython.core.display.Javascript object>