In [None]:
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import MO, TU, WE, TH, FR, SA, SU
from matplotlib.gridspec import GridSpec
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.ticker as ticker
import matplotlib
matplotlib.rcParams.update(matplotlib.rcParamsDefault)
import seaborn as sns

### Which setting to use? mpi-only vs hybrid (8 proc per node, 32 thread per proc)
Use only cpp implementation of the general matmul algorithm to determine this answer

In [None]:
csv_file = 'matmul-results.csv'  # Replace with your actual CSV file path
data = pd.read_csv(csv_file)
data = data[(data["alg"] == "matmul")]

fig = plt.figure(figsize=(6, 4))
gs = GridSpec(nrows=1, ncols=1)
ax = fig.add_subplot(gs[0,0])

width=0.3 # Bar width

xticks=np.array([1,2,3,4])
nnodes=np.array([1,2,4,8])
settings = ["mpionly", "hybrid"]

metrics = ["gather_a_time", "gen_b_time", "gather_b_time", "local_multiply_time", "scatter_reduce_time"]
colors = {
    "gather_a_time": "tab:blue",
    "gen_b_time": "tab:brown",
    "gather_b_time": "tab:orange",
    "local_multiply_time": "tab:green",
    "scatter_reduce_time": "tab:red"
}

data = data[(data["alg"] == "matmul") & (data["impl"] == "cpp")]
#data = data[(data["impl"] == "cpp"]

#print(data)


pos=0
for setting in settings:
    
    #target_data = data[data["impl"] == impl]
    
    nproc_per_node=128 # mpionly
    if setting == "hybrid":
        nproc_per_node = 8 
    nprocs = nnodes * nproc_per_node

    # bottoms of the bars for the stacked bar chart. Initially all zero
    bottom = np.zeros((xticks.shape[0]))

    # Prevent same item to appear multiple times in the legend
    legend = False
    if setting == "mpionly":
        legend = True

    # Finding appropriate x-axis coordinates for the setting
    x = xticks - width/2 + (pos*width) 
    
    # Stack one bar for each metric on the tick corresponding to the number of nodes
    for metric in metrics:
        y = []
        for i in range(xticks.shape[0]):
            nproc = nprocs[i]
            y.append(data[data["nproc"] == nproc][metric].to_numpy()[0])
        y = np.array(y)
        #print(metric, x, y)
        if legend == True:
            ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric], label=metric)
        else:
            ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric])
        bottom += y # Update the bottom for stacking next set of bars
    pos = pos + 1 # Update the group position, for placing the next group

ax.legend()
ax.set_title("matmul (cpp): mpionly vs hybrid")
ax.set_xticks(xticks)
ax.set_xticklabels(nnodes)

ax.minorticks_on()
ax.grid(axis='y', which='both')
plt.grid(which='major', linestyle='-', linewidth='0.5', color='black')  # Major gridlines
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='gray')

ax.set_xlabel("number of nodes (Perlmutter CPU)")
ax.set_ylabel("sec")

plt.tight_layout()
plt.savefig("matmul-cpp-mpionly-v-hybrid.pdf")
#plt.show()
#plt.close()


### For hybrid setting what is the comparison between two general matmul implementations? cpp vs python 

In [None]:
csv_file = 'matmul-results.csv'  # Replace with your actual CSV file path
data = pd.read_csv(csv_file)
data = data[(data["alg"] == "matmul")]

fig = plt.figure(figsize=(6, 4))
gs = GridSpec(nrows=1, ncols=1)
ax = fig.add_subplot(gs[0,0])

width=0.3

nproc_per_node = 8 # Hybrid parallelism
xticks=np.array([1,2,3,4])
nnodes=np.array([1,2,4,8])
nprocs= nnodes * nproc_per_node

metrics = ["gather_a_time", "gen_b_time", "gather_b_time", "local_multiply_time", "scatter_reduce_time"]
colors = {
    "gather_a_time": "tab:blue",
    "gen_b_time": "tab:brown",
    "gather_b_time": "tab:orange",
    "local_multiply_time": "tab:green",
    "scatter_reduce_time": "tab:red"
}

pos=0
for impl in ["cpp", "python"]:
    target_data = data[data["impl"] == impl]
    
    bottom = np.zeros((xticks.shape[0]))
    legend = False
    if impl == "cpp":
        legend = True

    x = xticks - width/2 + (pos*width)
    
    for metric in metrics:    
        y = []
        for i in range(xticks.shape[0]):
            nproc = nprocs[i]
            y.append(target_data[target_data["nproc"] == nproc][metric].to_numpy()[0])
        y = np.array(y)
        #print(metric, x, y)
        if legend == True:
            ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric], label=metric,)
        else:
            ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric])
        bottom += y
    
    pos = pos + 1

ax.legend()
ax.set_title("matmul: cpp vs python")
ax.set_xticks(xticks)
ax.set_xticklabels(nnodes)

ax.minorticks_on()
ax.grid(axis='y', which='both')
plt.grid(which='major', linestyle='-', linewidth='0.5', color='black')  # Major gridlines
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='gray')

ax.set_xlabel("number of nodes (Perlmutter CPU)")
ax.set_ylabel("sec")

plt.tight_layout()
plt.savefig("matmul-cpp-v-py.pdf")
#plt.show()

### For hybrid setting what is the comparison between four Nystrom matmul1 implementations? (cpp vs python) x (matmul1gen vs matmul1comm)

In [None]:
csv_file = 'matmul-results.csv'  # Replace with your actual CSV file path
data = pd.read_csv(csv_file)
data = data[(data["alg"] != "matmul") & (data["impl"] == "python")]

fig = plt.figure(figsize=(6, 8))
gs = GridSpec(nrows=2, ncols=1)

ax = fig.add_subplot(gs[0,0])

width=0.3

nproc_per_node = 8 # Hybrid parallelism
xticks=np.array([1,2,3,4])
nnodes=np.array([1,2,4,8])
nprocs= nnodes * nproc_per_node

grid_3d = ["8x4x4", "8x8x4", "8x8x8", "16x8x8"]
grid_1d = ["128", "256", "512", "1024"]
metrics = ["gather_a_time", "gen_b_time", "gather_b_time", "local_multiply_time", "scatter_reduce_time"]
colors = {
    "gather_a_time": "tab:blue",
    "gen_b_time": "tab:brown",
    "gather_b_time": "tab:orange",
    "local_multiply_time": "tab:green",
    "scatter_reduce_time": "tab:red"
}

pos=0
for alg in ["matmul1gen", "matmul1comm"]:
    target_data = data[data["alg"] == alg]
    
    bottom = np.zeros((xticks.shape[0]))
    legend = False
    if alg == "matmul1gen":
        legend = True
        
    for metric in metrics:
        x = xticks - width/2 + (pos*width)
        y = []
        for i in range(xticks.shape[0]):
            nproc = nprocs[i]
            y.append(target_data[target_data["nproc"] == nproc][metric].to_numpy()[0])
        y = np.array(y)
        print(metric, x, y)
        if legend == True:
            ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric], label=metric,)
        else:
            ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric])
        bottom += y
    
    pos = pos + 1

ax.legend()
ax.set_title("matmul1: Python, gen vs comm Omega")
ax.set_xticks(xticks)
ax.set_xticklabels(nprocs)

ax.minorticks_on()
ax.grid(axis='y', which='both')
plt.grid(which='major', linestyle='-', linewidth='0.5', color='black')  # Major gridlines
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='gray')

ax.set_xlabel("nprocs")
ax.set_ylabel("sec")

ax = fig.add_subplot(gs[1,0])

csv_file = 'matmul-results.csv'  # Replace with your actual CSV file path
data = pd.read_csv(csv_file)
data = data[(data["alg"] != "matmul") & (data["impl"] == "cpp")]

pos=0
for alg in ["matmul1gen", "matmul1comm"]:
    target_data = data[data["alg"] == alg]
    
    bottom = np.zeros((xticks.shape[0]))
    legend = False
    if alg == "matmul1gen":
        legend = True
        
    for metric in metrics:
        x = xticks - width/2 + (pos*width)
        y = []
        for i in range(xticks.shape[0]):
            nproc = nprocs[i]
            y.append(target_data[target_data["nproc"] == nproc][metric].to_numpy()[0])
        y = np.array(y)
        print(metric, x, y)
        if legend == True:
            ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric], label=metric,)
        else:
            ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric])
        bottom += y
    
    pos = pos + 1

ax.legend(ncol=2)
ax.set_title("matmul1: C++, gen vs comm Omega")
ax.set_xticks(xticks)
ax.set_xticklabels(nprocs)

ax.minorticks_on()
ax.grid(axis='y', which='both')
plt.grid(which='major', linestyle='-', linewidth='0.5', color='black')  # Major gridlines
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='gray')

ax.set_xlabel("nprocs")
ax.set_ylabel("sec")

plt.tight_layout()
plt.savefig("matmul1-gen-v-comm.pdf")
#plt.show()

# Nystrom 1D-1D performance analysis

### Comparison between noredist and redist variants of C++ 1d-1d implementation 

In [None]:
csv_file = 'nystrom-results.csv'  # Replace with your actual CSV file path
data = pd.read_csv(csv_file)

fig = plt.figure(figsize=(6*2, 4))
gs = GridSpec(nrows=1, ncols=2)
axes = []
for gc in range(2):
    axes.append(fig.add_subplot(gs[0,gc]))

width=0.3

nproc_per_node = 8 # Hybrid parallelism
xticks=np.array([1,2,3,4])
nnodes=np.array([1,2,4,8])
nprocs= nnodes * nproc_per_node
metrics = ["gen_omega_time", "first_dgemm_time", "redistrib_y_time", "second_dgemm_time", "reduce_scatter_z_time"]
algs = ["nystrom-1d-noredist-1d", "nystrom-1d-redist-1d"]
n = 50000
rs = [5000, 500]

colors = {
    "gen_omega_time": "tab:brown",
    "first_dgemm_time": "tab:blue",
    "redistrib_y_time": "tab:pink",
    "second_dgemm_time": "tab:orange",
    "reduce_scatter_z_time": "tab:red"
}

for gc in range(2):
    ax = axes[gc]
    r = rs[gc]
    
    pos=0 # Bar position in the group
    for alg in algs:
        target_data = data[(data["alg"] == alg) & (data["impl"] == "cpp") & (data["n"] == n) & (data["r"] == r)]
        if target_data.empty:
            pass
        
        bottom = np.zeros((xticks.shape[0]))
        legend = False
        if alg == "nystrom-1d-noredist-1d":
            legend = True

        for metric in metrics:
            x = xticks - width/2 + (pos*width)
            y = []
            for i in range(xticks.shape[0]):
                nproc = nprocs[i]
                y.append(target_data[target_data["nproc"] == nproc][metric].to_numpy()[0])
            y = np.array(y)
            #print(metric, x, y)
            if legend == True:
                ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric], label=metric,)
            else:
                ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric])
            bottom += y
        pos = pos + 1
        

    ax.legend(ncol=2)
    title = "noredist vs redist" + " (n=" + str(n) + " r=" + str(r) + ")"
    ax.set_title(title)
    ax.set_xticks(xticks)
    ax.set_xticklabels(nprocs)
    
    ax.minorticks_on()
    ax.grid(axis='y', which='both')
    ax.grid(which='major', linestyle='-', linewidth='0.5', color='black')  # Major gridlines
    ax.grid(which='minor', linestyle=':', linewidth='0.5', color='gray')
    
    ax.set_xlabel("nprocs")
    ax.set_ylabel("runtime(sec)")

plt.suptitle("1D-1D (Impl: C++)")
plt.tight_layout()
#plt.show()
plt.savefig("nystrom-1d-1d-cpp-noredist-v-redist.pdf")

### Comparison between C++ and Python implementation of `noredist` variant 

In [None]:
csv_file = 'nystrom-results.csv'  # Replace with your actual CSV file path
data = pd.read_csv(csv_file)

fig = plt.figure(figsize=(6*2, 4))
gs = GridSpec(nrows=1, ncols=2)
axes = []
for gc in range(2):
    axes.append(fig.add_subplot(gs[0,gc]))

width=0.3

nproc_per_node = 8 # Hybrid parallelism
xticks=np.array([1,2,3,4])
nnodes=np.array([1,2,4,8])
nprocs= nnodes * nproc_per_node
metrics = ["gen_omega_time", "first_dgemm_time", "second_dgemm_time", "reduce_scatter_z_time"]
#algs = ["nystrom-1d-noredist-1d", "nystrom-1d-redist-1d"]
impls = ["cpp", "python"]
n = 50000
rs = [5000, 500]

colors = {
    "gen_omega_time": "tab:brown",
    "first_dgemm_time": "tab:blue",
    "redistrib_y_time": "tab:pink",
    "second_dgemm_time": "tab:orange",
    "reduce_scatter_z_time": "tab:red"
}

for gc in range(2):
    ax = axes[gc]
    r = rs[gc]
    
    pos=0 # Bar position in the group
    for impl in impls:
        target_data = data[(data["alg"] == "nystrom-1d-noredist-1d") & (data["impl"] == impl) & (data["n"] == n) & (data["r"] == r)]
        if target_data.empty:
            pass
        
        bottom = np.zeros((xticks.shape[0]))
        legend = False
        if impl == "python":
            legend = True

        for metric in metrics:
            x = xticks - width/2 + (pos*width)
            y = []
            for i in range(xticks.shape[0]):
                nproc = nprocs[i]
                y.append(target_data[target_data["nproc"] == nproc][metric].to_numpy()[0])
            y = np.array(y)
            #print(metric, x, y)
            if legend == True:
                ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric], label=metric,)
            else:
                ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric])
            bottom += y
        pos = pos + 1
        

    ax.legend(ncol=2)
    title = "cpp vs python" + " (n=" + str(n) + " r=" + str(r) + ")"
    ax.set_title(title)
    ax.set_xticks(xticks)
    ax.set_xticklabels(nprocs)
    
    ax.minorticks_on()
    ax.grid(axis='y', which='both')
    ax.grid(which='major', linestyle='-', linewidth='0.5', color='black')  # Major gridlines
    ax.grid(which='minor', linestyle=':', linewidth='0.5', color='gray')
    
    ax.set_xlabel("nprocs")
    ax.set_ylabel("runtime(sec)")

plt.suptitle("1d-noredist-1d")
plt.tight_layout()
#plt.show()
plt.savefig("nystrom-1d-noredist-1d-cpp-v-python.pdf")

### Comparison between C++ and Python implementation of `redist` variant 

In [None]:
csv_file = 'nystrom-results.csv'  # Replace with your actual CSV file path
data = pd.read_csv(csv_file)

fig = plt.figure(figsize=(6*2, 4))
gs = GridSpec(nrows=1, ncols=2)
axes = []
for gc in range(2):
    axes.append(fig.add_subplot(gs[0,gc]))

width=0.3

nproc_per_node = 8 # Hybrid parallelism
xticks=np.array([1,2,3,4])
nnodes=np.array([1,2,4,8])
nprocs= nnodes * nproc_per_node
metrics = ["gen_omega_time", "first_dgemm_time", "redistrib_y_time", "second_dgemm_time"]
#algs = ["nystrom-1d-noredist-1d", "nystrom-1d-redist-1d"]
impls = ["cpp", "python"]
n = 50000
rs = [5000, 500]

colors = {
    "gen_omega_time": "tab:brown",
    "first_dgemm_time": "tab:blue",
    "redistrib_y_time": "tab:pink",
    "second_dgemm_time": "tab:orange",
    "reduce_scatter_z_time": "tab:red"
}

for gc in range(2):
    ax = axes[gc]
    r = rs[gc]
    
    pos=0 # Bar position in the group
    for impl in impls:
        target_data = data[(data["alg"] == "nystrom-1d-redist-1d") & (data["impl"] == impl) & (data["n"] == n) & (data["r"] == r)]
        if target_data.empty:
            pass
        
        bottom = np.zeros((xticks.shape[0]))
        legend = False
        if impl == "python":
            legend = True

        for metric in metrics:
            x = xticks - width/2 + (pos*width)
            y = []
            for i in range(xticks.shape[0]):
                nproc = nprocs[i]
                y.append(target_data[target_data["nproc"] == nproc][metric].to_numpy()[0])
            y = np.array(y)
            #print(metric, x, y)
            if legend == True:
                ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric], label=metric,)
            else:
                ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric])
            bottom += y
        pos = pos + 1
        

    ax.legend(ncol=2)
    title = "cpp vs python" + " (n=" + str(n) + " r=" + str(r) + ")"
    ax.set_title(title)
    ax.set_xticks(xticks)
    ax.set_xticklabels(nprocs)
    
    ax.minorticks_on()
    ax.grid(axis='y', which='both')
    ax.grid(which='major', linestyle='-', linewidth='0.5', color='black')  # Major gridlines
    ax.grid(which='minor', linestyle=':', linewidth='0.5', color='gray')
    
    ax.set_xlabel("nprocs")
    ax.set_ylabel("runtime(sec)")

plt.suptitle("1d-redist-1d")
plt.tight_layout()
#plt.show()
plt.savefig("nystrom-1d-redist-1d-cpp-v-python.pdf")