In [1]:
%matplotlib

Using matplotlib backend: TkAgg


In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import itertools

### Plot

In [133]:
# Peak performance
cpu = {}
cpu["haswell"] = {
    "peak_flop": 2.6e9 * 8 * 4 * 2 * 2,  # clock x cores x simd x fma x port
    "peak_flop_linpack": 262.5e9,  # 1 core = 37.4
    "peak_bw": 38.48e9,
}
cpu["skylake"] = {
    "peak_flop": 2.1e9 * 16 * 8 * 2 * 2,  # clock x cores x simd x fma x port
    "peak_flop_linpack": 678.8e9, # 1 core = 62.8
    "peak_bw": 36.6e9,
}
print(cpu["haswell"]["peak_flop_linpack"] / cpu["haswell"]["peak_flop"])
print(cpu["skylake"]["peak_flop_linpack"] / cpu["skylake"]["peak_flop"])

0.7887620192307693
0.6313244047619048


In [134]:
cpu

{'haswell': {'peak_flop': 332800000000.0,
  'peak_flop_linpack': 262500000000.0,
  'peak_bw': 38480000000.0},
 'skylake': {'peak_flop': 1075200000000.0,
  'peak_flop_linpack': 678800000000.0,
  'peak_bw': 36600000000.0}}

In [135]:
def compute(df, platform):
    df["flop"] = df["add"] + df["sub"] + df["mul"] + df["div"]
    df['bw / peak_l'] = df['byte'] / df['time'] / cpu[platform]["peak_bw"]
    df['bw / peak_u'] = df['mem'] * df['cell'] * 8 / df['time'] / cpu[platform]["peak_bw"]
    df['flop / s'] = df['flop'] * df['cell'] / df['time']
    df['flop / peak'] = df['flop'] * df['cell'] / df['time'] / cpu[platform]["peak_flop"]
    df["ai"] = df["flop"] * df["cell"] / df["byte"]
    df["time / cell"] = df["time"] / df["cell"]
    df["time / dof"] = df["time"] / df["dof"]
    df = df[:6]
    return df

In [155]:
# forms by meshes
plt.close('all')
plt.figure(figsize=(12, 6))

# forms = ["mass", "helmholtz", "laplacian", "elasticity", "hyperelasticity"]
forms = ["helmholtz", "elasticity"]
meshes = ["tri", "quad", "tet", "hex"]
platform = "skylake"  # haswell or skylake
hyperthreading = True
vec = "omp"  # omp or ve

if platform == "haswell":
    simd = "4"
    if hyperthreading:
        threads = "16"
    else:
        threads = "8"
else:
    simd = "8"
    threads = "32" if hyperthreading else "16"
    
compilers = ["icc", "gcc", "clang"]
x = "p"
y = "flop / peak"
linpack_scale = cpu[platform]['peak_flop'] / cpu[platform]['peak_flop_linpack']

_color = ("red", "blue", "goldenrod", "black")

for form_id, form in enumerate(forms):
    for mesh_id, mesh in enumerate(meshes):
        dfs = []
        filename = "_".join([platform, form, mesh, threads, "1", "omp", "gcc"]) + ".csv"
        base_df = pd.read_csv("csv/" + filename)
        base_df = compute(base_df, platform)
        for compiler in compilers:
            filename = "_".join([platform, form, mesh, threads, simd, vec, compiler]) + ".csv"
            df = pd.read_csv("csv/" + filename)
            df = compute(df, platform)
            df["speed up"] = base_df["time"] / df["time"]
            dfs.append(df)
        
        dfs.append(base_df)
        ax1 = plt.subplot(len(forms), len(meshes), mesh_id + form_id*len(meshes) + 1)
        marker = itertools.cycle(('o', 's', '*', '^'))
        color = itertools.cycle(_color)
        linestyle = itertools.cycle(('-', '--', '-.', ':',))
        names = compilers + ["baseline"]
        plots = []
        for df, n in zip(dfs, names):
            plot, = ax1.plot(df[x], df[y], marker=next(marker), color=next(color), linestyle=next(linestyle),
                            label=n, linewidth=2, markersize=5)
            if form_id == len(forms) - 1 and mesh_id == len(meshes) - 1:
                plots.append(plot)

        ax1.set_xticks(dfs[0][x])
        ax1.set_ylim(bottom=0, top=1.0)
        ax1.set_yticks([0.25, 0.5, 0.75, 1.0])
        ax1.set_title(form + " - " + mesh)
        plot = ax1.hlines(cpu[platform]["peak_flop_linpack"]/cpu[platform]["peak_flop"], 1, 6, 
                           color="grey", linestyle=":")
        
        ax2 = ax1.twinx()
        ax2.set_ylim(bottom=0, top=linpack_scale)
        ax2.set_yticks([0.25, 0.5, 0.75, 1])
        if form_id == len(forms) - 1 and mesh_id == len(meshes) - 1:
            plots.append(plot)
        
        if mesh_id == 0:
            ax1.set_ylabel("FLOP/s / Peak FLOP/s")
        else:
            plt.setp(ax1.get_yticklabels(), visible=False)
        
        if mesh_id == len(meshes) - 1:
            ax2.set_ylabel("FLOP/s / LINPACK FLOP/s")
        else:
            plt.setp(ax2.get_yticklabels(), visible=False)
            
        if form_id == len(forms) - 1:
            ax1.set_xlabel("Polynomial degree")
        else:
            plt.setp(ax1.get_xticklabels(), visible=False)

plt.figlegend(plots, ["ICC", "GCC", "CLANG", "baseline", "LINPACK"], ncol=5,
              loc = "center", bbox_to_anchor=[0.5, 0.04], frameon=True)

plt.tight_layout()
plt.subplots_adjust(bottom=0.15)
plt.savefig(platform + "-" + vec + ".pdf", format="pdf")

In [98]:
# roofline
plt.close('all')
plt.figure(figsize=(16, 5))

platform = "haswell"  # haswell or skylake
forms = ["mass", "helmholtz", "laplacian", "elasticity", "hyperelasticity"]
forms = ["mass", "helmholtz", "laplacian", "elasticity", "hyperelasticity"]
meshes = ["tri", "quad", "tet", "hex"]
compiler = "gcc"
vec = "ve"

setting = {
    "haswell": {
        "simds": ["1", "4"],
        "proc": "16",
        "yticks": [5, 10, 20, 50, 100, 200, 300, 500],
        "ytop": 500,
        "ybottom": 3,
        "xleft": 0.1,
    },
    "skylake": {
        "simds": ["1",  "8"],
        "proc": "32",
        "yticks": [10, 20, 50, 100, 200, 500, 1000, 2000],
        "ytop": 2000,
        "ybottom": 10,
        "xleft": 0.15,
    }
}

x = "ai"
y = "flop / s"

plots = []

for idx, simd in enumerate(setting[platform]["simds"]):
    ax = plt.subplot(1, 2, idx+1)
    ax.set_xscale("log")
    ax.set_yscale("log")
    ax.set_yticks(setting[platform]["yticks"])
    ax.get_yaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())

    rate = cpu[platform]['peak_bw'] / 1e9
    plot, = ax.plot([0.1, cpu[platform]['peak_flop']/1e9/rate, 3000],
                    [rate*0.1, cpu[platform]['peak_flop']/1e9, cpu[platform]['peak_flop']/1e9], linewidth=2)
    plot, = ax.plot([cpu[platform]['peak_flop_linpack']/1e9/rate, 3000],
                    [cpu[platform]['peak_flop_linpack']/1e9, cpu[platform]['peak_flop_linpack']/1e9],
                   linestyle=':', color='grey')
    if idx == 1:
        linpack = [plot]

    markers = itertools.cycle(('o', 's', '*', '^', 'v'))
    colors = itertools.cycle(("red", "blue", "goldenrod", "green"))
    names = []
    for form_id, form in enumerate(forms):
        marker = next(markers)
        for mesh_id, mesh in enumerate(meshes):
            color = next(colors)
            filename = "_".join([platform, form, mesh, setting[platform]["proc"], simd, vec, compiler]) + ".csv"
            df = pd.read_csv("csv/" + filename)
            df = compute(df, platform)
            plot, = ax.plot(df[x], df[y]/1e9, label=form+" - "+mesh, markersize=7, marker=marker, color=color,
                            linestyle='None')
            names.append(form+" - "+mesh)
            if idx == 1:
                plots.append(plot)

    ax.set_ylim(bottom=setting[platform]["ybottom"], top=setting[platform]["ytop"])
    ax.set_xlim(left=setting[platform]["xleft"], right=3000)
    ax.set_title(platform.capitalize() + (" baseline" if simd == "1" else " cross-element vectorization"))
    ax.set_ylabel("GFLOPS / s")
    ax.set_xlabel("Arithmetic intensity")

plt.subplots_adjust(bottom=0.3)
lgd = plt.figlegend(plots, names, ncol=5, 
                    loc = "center", bbox_to_anchor=[0.35, 0.1], frameon=False)
plt.figlegend(linpack, ["LINPACK"], loc = "center", bbox_to_anchor=[0.7, 0.1], frameon=False)
plt.savefig("roofline-" + platform + ".pdf", format="pdf", bbox_extra_artists=(lgd,), bbox_inches='tight')

In [105]:
# populate table in paper
forms = ["mass", "helmholtz", "laplacian", "elasticity", "hyperelasticity"]
meshes = ["tri", "quad", "tet", "hex"]
compiler = "gcc"
vec = "ve"

setting = {
    "haswell": {
        "simd":"4",
        "proc": "16",
    },
    "skylake": {
        "simd": "8",
        "proc": "32",
    }
}

from collections import defaultdict

result = defaultdict(dict)

for form in forms:
    for mesh in meshes:
        for platform in ["haswell", "skylake"]:
            # baseline
            filename = "_".join([platform, form, mesh, setting[platform]["proc"], "1", vec, compiler]) + ".csv"
            df = pd.read_csv("csv/" + filename)
            df = compute(df, platform)
            
            filename = "_".join([platform, form, mesh, setting[platform]["proc"], setting[platform]["simd"], vec, compiler]) + ".csv"
            df_speed = pd.read_csv("csv/" + filename)
            df_speed = compute(df_speed, platform)
            df["speed up " + platform] = df["time"] / df_speed["time"]

            for idx, row in df.iterrows():
                result[(form, mesh, int(row["p"]))]['ai'] = "{0:.1f}".format(row["ai"])
                result[(form, mesh, int(row["p"]))]['extend_dof'] = "{0:d}".format(int(row["extend_dof"]))
                result[(form, mesh, int(row["p"]))]['extend_quad'] = "{0:d}".format(int(row["extend_quad"]))
                result[(form, mesh, int(row["p"]))]['speed up ' + platform] = "{0:.1f}".format(row["speed up " + platform])

In [106]:
string = ""
for form in forms:
    for p in range(1, 7):
        line = ["", str(p)]
        for mesh in meshes:
            res = result[(form, mesh, p)]
            line.extend([res['ai'], res['extend_dof'], res['extend_quad'], res['speed up haswell'], res['speed up skylake']])
        string += " & ".join(line)
        string += "\\\\\n"
    string += "\\hline\n"
print(string)

 & 1 & 1.2 & 3 & 3 & 1.0 & 1.0 & 4.7 & 2 & 3 & 1.1 & 1.5 & 2.7 & 4 & 4 & 1.2 & 0.7 & 16.9 & 2 & 3 & 1.8 & 2.8\\
 & 2 & 1.7 & 6 & 6 & 1.3 & 1.0 & 3.9 & 3 & 4 & 0.8 & 1.0 & 5.9 & 10 & 14 & 1.7 & 2.4 & 10.8 & 3 & 4 & 1.1 & 1.5\\
 & 3 & 3.0 & 10 & 12 & 2.0 & 1.3 & 3.9 & 4 & 5 & 0.8 & 1.0 & 8.7 & 20 & 24 & 0.9 & 1.8 & 8.5 & 4 & 5 & 1.8 & 2.5\\
 & 4 & 5.6 & 15 & 25 & 2.4 & 2.6 & 3.9 & 5 & 6 & 2.2 & 1.9 & 39.2 & 35 & 125 & 1.0 & 1.6 & 7.4 & 5 & 6 & 2.1 & 2.8\\
 & 5 & 7.5 & 21 & 36 & 1.1 & 2.0 & 3.9 & 6 & 7 & 2.3 & 1.5 & 55.9 & 56 & 216 & 0.7 & 1.0 & 7.0 & 6 & 7 & 2.0 & 2.7\\
 & 6 & 9.7 & 28 & 49 & 0.8 & 1.6 & 4.1 & 7 & 8 & 2.5 & 1.9 & 81.2 & 84 & 343 & 1.1 & 1.9 & 6.9 & 7 & 8 & 2.2 & 2.7\\
\hline
 & 1 & 1.8 & 3 & 3 & 1.2 & 1.0 & 10.7 & 2 & 3 & 2.0 & 2.9 & 3.9 & 4 & 4 & 1.6 & 1.6 & 45.5 & 2 & 3 & 2.5 & 3.5\\
 & 2 & 5.7 & 6 & 6 & 2.2 & 1.7 & 10.6 & 3 & 4 & 1.0 & 1.3 & 27.3 & 10 & 14 & 2.3 & 5.5 & 34.9 & 3 & 4 & 1.8 & 3.3\\
 & 3 & 9.6 & 10 & 12 & 2.3 & 5.6 & 10.5 & 4 & 5 & 1.3 & 2.1 & 37.5 & 20 