In [None]:
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import MO, TU, WE, TH, FR, SA, SU
from matplotlib.gridspec import GridSpec
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.ticker as ticker
import matplotlib
matplotlib.rcParams.update(matplotlib.rcParamsDefault)
import seaborn as sns

### Comparison between matmul1 variants when GPU is used (comm vs gen variant)

In [None]:
n=50000
rs = [500, 5000]

fig = plt.figure(figsize=(2 * 6, 4))
gs = GridSpec(nrows=1, ncols=2)

for i in range(len(rs)):
    r = rs[i]
    csv_file = 'matmul-results.csv'  # Replace with your actual CSV file path
    data = pd.read_csv(csv_file)
    data = data[(data["alg"] != "matmul") & (data["impl"] == "cpp") & (data["system"] == "perlmutter-gpu") & (data["b_cols"] == r)]
    #print(data)
    
    ax = fig.add_subplot(gs[0,i])
    
    width=0.3
    
    nproc_per_node = 4 # because 4 GPU per node
    #xticks=np.array([1,2,3,4, 5, 6, 7, 8])
    #nnodes=np.array([1,2,4,8, 16, 32, 64, 128])
    xticks=np.array([1,2,3,4, 5, 6])
    nnodes=np.array([1,2,4,8, 16, 32])
    nprocs= nnodes * nproc_per_node
    
    metrics = ["gen_b_time", "gather_b_time", "local_multiply_time"]
    #metrics = ["gen_b_time", "gather_b_time"]
    colors = {
        "gather_a_time": "tab:green",
        "gen_b_time": "tab:brown",
        "gather_b_time": "tab:pink",
        "local_multiply_time": "tab:blue",
        "scatter_reduce_time": "tab:red",
        "cpu_gpu_data_move_time": "tab:purple"
    }
    
    pos=0
    for alg in ["matmul1gen", "matmul1comm"]:
        target_data = data[data["alg"] == alg]
        
        bottom = np.zeros((xticks.shape[0]))
        legend = False
        if alg == "matmul1gen":
            legend = True
            
        for metric in metrics:
            x = xticks - width/2 + (pos*width)
            y = []
            for i in range(xticks.shape[0]):
                nproc = nprocs[i]
                if target_data[target_data["nproc"] == nproc][metric].empty:
                    #pass
                    y.append(0)
                else:
                    y.append(target_data[target_data["nproc"] == nproc][metric].to_numpy()[0])
                #y.append(target_data[target_data["nproc"] == nproc][metric].to_numpy()[0])
            y = np.array(y)
            #print(metric, x, y)
            if legend == True:
                ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric], label=metric,)
            else:
                ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric])
            bottom += y
        
        pos = pos + 1
    
    ax.legend()
    ax.set_title("n=" + str(n) + ", r=" +str(r)+"")
    ax.set_xticks(xticks)
    #ax.set_xticklabels(nprocs)
    ax.set_xticklabels(nnodes)
    
    ax.minorticks_on()
    ax.grid(axis='y', which='both')
    plt.grid(which='major', linestyle='-', linewidth='0.5', color='black')  # Major gridlines
    plt.grid(which='minor', linestyle=':', linewidth='0.5', color='gray')
    
    ax.set_xlabel("nnodes")
    ax.set_ylabel("sec")

plt.suptitle("matmul1 on GPU, gen vs comm Omega")
plt.tight_layout()
plt.savefig("matmul1-gpu-gen-v-comm.pdf")
#plt.show()

### Comparison between matmul1 variants when CPU is used (comm vs gen variant)

In [None]:
n=50000
rs = [500, 5000]

fig = plt.figure(figsize=(2 * 6, 4))
gs = GridSpec(nrows=1, ncols=2)

for i in range(len(rs)):
    r = rs[i]
    csv_file = 'matmul-results.csv'  # Replace with your actual CSV file path
    data = pd.read_csv(csv_file)
    data = data[(data["alg"] != "matmul") & (data["impl"] == "cpp") & (data["system"] == "perlmutter-gpu-cpu") & (data["b_cols"] == r)]
    #print(data)
    
    ax = fig.add_subplot(gs[0,i])
    
    width=0.3
    
    nproc_per_node = 4 # because 4 GPU per node
    #xticks=np.array([1,2,3,4, 5, 6, 7, 8])
    #nnodes=np.array([1,2,4,8, 16, 32, 64, 128])
    xticks=np.array([1,2,3,4, 5, 6])
    nnodes=np.array([1,2,4,8, 16, 32])
    nprocs= nnodes * nproc_per_node
    
    #metrics = ["gen_b_time", "gather_b_time", "local_multiply_time"]
    metrics = ["gen_b_time", "gather_b_time"]
    colors = {
        "gather_a_time": "tab:green",
        "gen_b_time": "tab:brown",
        "gather_b_time": "tab:pink",
        "local_multiply_time": "tab:blue",
        "scatter_reduce_time": "tab:red",
        "cpu_gpu_data_move_time": "tab:purple"
    }
    
    pos=0
    for alg in ["matmul1gen", "matmul1comm"]:
        target_data = data[data["alg"] == alg]
        
        bottom = np.zeros((xticks.shape[0]))
        legend = False
        if alg == "matmul1gen":
            legend = True
            
        for metric in metrics:
            x = xticks - width/2 + (pos*width)
            y = []
            for i in range(xticks.shape[0]):
                nproc = nprocs[i]
                if target_data[target_data["nproc"] == nproc][metric].empty:
                    #pass
                    y.append(0)
                else:
                    y.append(target_data[target_data["nproc"] == nproc][metric].to_numpy()[0])
                #y.append(target_data[target_data["nproc"] == nproc][metric].to_numpy()[0])
            y = np.array(y)
            #print(metric, x, y)
            if legend == True:
                ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric], label=metric,)
            else:
                ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric])
            bottom += y
        
        pos = pos + 1
    
    ax.legend()
    ax.set_title("n=" + str(n) + ", r=" +str(r)+"")
    ax.set_xticks(xticks)
    #ax.set_xticklabels(nprocs)
    ax.set_xticklabels(nnodes)
    
    ax.minorticks_on()
    ax.grid(axis='y', which='both')
    plt.grid(which='major', linestyle='-', linewidth='0.5', color='black')  # Major gridlines
    plt.grid(which='minor', linestyle=':', linewidth='0.5', color='gray')
    
    ax.set_xlabel("nnodes")
    ax.set_ylabel("sec")

plt.suptitle("matmul1 on CPU, gen vs comm Omega")
plt.tight_layout()
plt.savefig("matmul1-cpu-gen-v-comm.pdf")
#plt.show()

### For hybrid setting what is the comparison between four Nystrom matmul1 implementations? (cpp vs python) x (matmul1gen vs matmul1comm)

In [None]:
csv_file = 'matmul-results.csv'  # Replace with your actual CSV file path
data = pd.read_csv(csv_file)
data = data[(data["alg"] != "matmul") & (data["impl"] == "python")]

fig = plt.figure(figsize=(6, 8))
gs = GridSpec(nrows=2, ncols=1)

ax = fig.add_subplot(gs[0,0])

width=0.3

nproc_per_node = 8 # Hybrid parallelism
xticks=np.array([1,2,3,4])
nnodes=np.array([1,2,4,8])
nprocs= nnodes * nproc_per_node

grid_3d = ["8x4x4", "8x8x4", "8x8x8", "16x8x8"]
grid_1d = ["128", "256", "512", "1024"]
metrics = ["gather_a_time", "gen_b_time", "gather_b_time", "local_multiply_time", "scatter_reduce_time"]
colors = {
    "gather_a_time": "tab:blue",
    "gen_b_time": "tab:brown",
    "gather_b_time": "tab:orange",
    "local_multiply_time": "tab:green",
    "scatter_reduce_time": "tab:red"
}

pos=0
for alg in ["matmul1gen", "matmul1comm"]:
    target_data = data[data["alg"] == alg]
    
    bottom = np.zeros((xticks.shape[0]))
    legend = False
    if alg == "matmul1gen":
        legend = True
        
    for metric in metrics:
        x = xticks - width/2 + (pos*width)
        y = []
        for i in range(xticks.shape[0]):
            nproc = nprocs[i]
            y.append(target_data[target_data["nproc"] == nproc][metric].to_numpy()[0])
        y = np.array(y)
        print(metric, x, y)
        if legend == True:
            ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric], label=metric,)
        else:
            ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric])
        bottom += y
    
    pos = pos + 1

ax.legend()
ax.set_title("matmul1: Python, gen vs comm Omega")
ax.set_xticks(xticks)
ax.set_xticklabels(nprocs)

ax.minorticks_on()
ax.grid(axis='y', which='both')
plt.grid(which='major', linestyle='-', linewidth='0.5', color='black')  # Major gridlines
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='gray')

ax.set_xlabel("nprocs")
ax.set_ylabel("sec")

ax = fig.add_subplot(gs[1,0])

csv_file = 'matmul-results.csv'  # Replace with your actual CSV file path
data = pd.read_csv(csv_file)
data = data[(data["alg"] != "matmul") & (data["impl"] == "cpp")]

pos=0
for alg in ["matmul1gen", "matmul1comm"]:
    target_data = data[data["alg"] == alg]
    
    bottom = np.zeros((xticks.shape[0]))
    legend = False
    if alg == "matmul1gen":
        legend = True
        
    for metric in metrics:
        x = xticks - width/2 + (pos*width)
        y = []
        for i in range(xticks.shape[0]):
            nproc = nprocs[i]
            y.append(target_data[target_data["nproc"] == nproc][metric].to_numpy()[0])
        y = np.array(y)
        print(metric, x, y)
        if legend == True:
            ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric], label=metric,)
        else:
            ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric])
        bottom += y
    
    pos = pos + 1

ax.legend(ncol=2)
ax.set_title("matmul1: C++, gen vs comm Omega")
ax.set_xticks(xticks)
ax.set_xticklabels(nprocs)

ax.minorticks_on()
ax.grid(axis='y', which='both')
plt.grid(which='major', linestyle='-', linewidth='0.5', color='black')  # Major gridlines
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='gray')

ax.set_xlabel("nprocs")
ax.set_ylabel("sec")

plt.tight_layout()
plt.savefig("matmul1-gen-v-comm.pdf")
#plt.show()

# Nystrom 1D-1D performance analysis

### Comparison between nystrom variants on CPU 

In [None]:
csv_file = 'nystrom-results.csv'
data = pd.read_csv(csv_file)
#print(data.columns)
gb = ['alg', 'impl', 'system', 'nnode', 'nproc', 'nthread', 'n', 'r', 'matmul1p1', 'matmul1p2', 'matmul1p3', 'matmul2p1', 'matmul2p2', 'matmul2p3']
data = (
    data
    .groupby(gb, as_index=False)   # as_index=False keeps the grouping columns as regular columns
    .mean()                                          # .mean() automatically averages all numeric columns
)
print(data.columns)

In [None]:
colors = {
    "gen_omega_time": "tab:brown",
    "dgemm1_time": "tab:blue",
    "all2all_time": "tab:pink",
    "dgemm2_time": "tab:orange",
    "reduce_scatter_time": "tab:red",
    "unpack_time": "tab:cyan",
    "pack_time": "tab:olive"
}

csv_file = 'nystrom-results.csv'
data = pd.read_csv(csv_file)
#print(data.columns)
gb = ['alg', 'impl', 'system', 'nnode', 'nproc', 'nthread', 'n', 'r', 'matmul1p1', 'matmul1p2', 'matmul1p3', 'matmul2p1', 'matmul2p2', 'matmul2p3']
data = (
    data
    .groupby(gb, as_index=False)   # as_index=False keeps the grouping columns as regular columns
    .mean()                                          # .mean() automatically averages all numeric columns
)
#print(data)

fig = plt.figure(figsize=(6*2, 4))
gs = GridSpec(nrows=1, ncols=2)
axes = []
for gc in range(2):
    axes.append(fig.add_subplot(gs[0,gc]))

width=0.3

nproc_per_node = 4
xticks=np.array([1,2,3,4,5,6])
nnodes=np.array([1,2,4,8,16,32])
nprocs= nnodes * nproc_per_node
#metrics = ["gen_omega_time", "dgemm1_time", "all2all_time", "unpack_time", "dgemm2_time", "reduce_scatter_time"]
metrics = ["all2all_time", "unpack_time", "reduce_scatter_time"]
algs = ["nystrom-1d-noredist-1d", "nystrom-1d-redist-1d"]

n = 50000
rs = [5000, 500]

for gc in range(2):
    ax = axes[gc]
    r = rs[gc]
    
    pos=0 # Bar position in the group
    for alg in algs:
        target_data = data[(data["alg"] == alg) & 
            (data["impl"] == "cpp") & 
            (data["n"] == n) & 
            (data["r"] == r) & 
            (data["system"] == "perlmutter-gpu-cpu")]
        if target_data.empty:
            pass
        #print(target_data)
        
        bottom = np.zeros((xticks.shape[0]))
        legend = False
        if alg == "nystrom-1d-noredist-1d":
            legend = True

        for metric in metrics:
            x = xticks - width/2 + (pos*width)
            y = []
            for i in range(xticks.shape[0]):
                nproc = nprocs[i]
                #print(nproc)
                y.append(target_data[target_data["nproc"] == nproc][metric].to_numpy()[0])
            y = np.array(y)
            #print(metric, x, y)
            if legend == True:
                ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric], label=metric,)
            else:
                ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric])
            bottom += y
        pos = pos + 1
        

    ax.legend(ncol=2)
    title = "n=" + str(n) + " r=" + str(r)
    ax.set_title(title)
    ax.set_xticks(xticks)
    ax.set_xticklabels(nprocs)
    
    ax.minorticks_on()
    ax.grid(axis='y', which='both')
    ax.grid(which='major', linestyle='-', linewidth='0.5', color='black')  # Major gridlines
    ax.grid(which='minor', linestyle=':', linewidth='0.5', color='gray')
    
    ax.set_xlabel("nprocs")
    ax.set_ylabel("runtime(sec)")

plt.suptitle("nystrom on CPU, 1d-noredist-1d vs 1d-redist-1d")
plt.tight_layout()
#plt.show()
plt.savefig("nystrom-cpu.pdf")

### Comparison between nystrom variants on GPU 

In [None]:
colors = {
    "gen_omega_time": "tab:brown",
    "dgemm1_time": "tab:blue",
    "all2all_time": "tab:pink",
    "dgemm2_time": "tab:orange",
    "reduce_scatter_time": "tab:red",
    "unpack_time": "tab:cyan",
    "pack_time": "tab:olive"
}

csv_file = 'nystrom-results.csv'
data = pd.read_csv(csv_file)
print(data.columns)
gb = ['alg', 'impl', 'system', 'nnode', 'nproc', 'nthread', 'n', 'r', 'matmul1p1', 'matmul1p2', 'matmul1p3', 'matmul2p1', 'matmul2p2', 'matmul2p3']
data = (
    data
    .groupby(gb, as_index=False)   # as_index=False keeps the grouping columns as regular columns
    .mean()                                          # .mean() automatically averages all numeric columns
)
#print(data)

fig = plt.figure(figsize=(6*2, 4))
gs = GridSpec(nrows=1, ncols=2)
axes = []
for gc in range(2):
    axes.append(fig.add_subplot(gs[0,gc]))

width=0.3

nproc_per_node = 4
xticks=np.array([1,2,3,4,5,6])
nnodes=np.array([1,2,4,8,16,32])
nprocs= nnodes * nproc_per_node
#metrics = ["gen_omega_time", "dgemm1_time", "all2all_time", "unpack_time", "dgemm2_time", "reduce_scatter_time"]
metrics = ["all2all_time", "unpack_time", "reduce_scatter_time"]
algs = ["nystrom-1d-noredist-1d", "nystrom-1d-redist-1d"]
n = 50000
rs = [5000, 500]

for gc in range(2):
    ax = axes[gc]
    r = rs[gc]
    
    pos=0 # Bar position in the group
    for alg in algs:
        target_data = data[(data["alg"] == alg) & 
            (data["impl"] == "cpp") & 
            (data["n"] == n) & 
            (data["r"] == r) & 
            (data["system"] == "perlmutter-gpu")]
        if target_data.empty:
            pass
        #print(target_data)
        
        bottom = np.zeros((xticks.shape[0]))
        legend = False
        if alg == "nystrom-1d-noredist-1d":
            legend = True

        for metric in metrics:
            x = xticks - width/2 + (pos*width)
            y = []
            for i in range(xticks.shape[0]):
                nproc = nprocs[i]
                #print(nproc)
                y.append(target_data[target_data["nproc"] == nproc][metric].to_numpy()[0])
            y = np.array(y)
            #print(metric, x, y)
            if legend == True:
                ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric], label=metric,)
            else:
                ax.bar(x, y, width = width, bottom=bottom, edgecolor="black", linewidth=0.5, color=colors[metric])
            bottom += y
        pos = pos + 1
        

    ax.legend(ncol=2)
    title = "n=" + str(n) + " r=" + str(r)
    ax.set_title(title)
    ax.set_xticks(xticks)
    ax.set_xticklabels(nprocs)
    
    ax.minorticks_on()
    ax.grid(axis='y', which='both')
    ax.grid(which='major', linestyle='-', linewidth='0.5', color='black')  # Major gridlines
    ax.grid(which='minor', linestyle=':', linewidth='0.5', color='gray')
    
    ax.set_xlabel("nprocs")
    ax.set_ylabel("runtime(sec)")

plt.suptitle("nystrom on GPU, 1d-noredist-1d vs 1d-redist-1d")
plt.tight_layout()
#plt.show()
plt.savefig("nystrom-gpu.pdf")

### Line plot comparing Nystrom variants implemented for two systems (CPU and GPU)

In [None]:
csv_file = 'nystrom-results.csv'
data = pd.read_csv(csv_file)
#print(data.columns)
col_to_sum = ['gen_omega_time', 'dgemm1_time',
       'dgemm2_time', 'all2all_time', 'reduce_scatter_time',
       'reduce_scatter_1_time', 'reduce_scatter_2_time', 'unpack_time',
       'pack_time']
data["breakdown_total_time"] = data[col_to_sum].sum(axis=1)
gb = ['alg', 'impl', 'system', 'nnode', 'nproc', 'nthread', 'n', 'r', 'matmul1p1', 'matmul1p2', 'matmul1p3', 'matmul2p1', 'matmul2p2', 'matmul2p3']
data = (
    data
    .groupby(gb, as_index=False)   # as_index=False keeps the grouping columns as regular columns
    .mean()                                          # .mean() automatically averages all numeric columns
)
#print(data)
data = data[data["nnode"] <= 64]

fig = plt.figure(figsize=(6*2, 4))
gs = GridSpec(nrows=1, ncols=2)
axes = []
for gc in range(2):
    axes.append(fig.add_subplot(gs[0,gc]))

width=0.3

nproc_per_node = 4
xticks=np.array([1,2,3,4,5,6])
nnodes=np.array([1,2,4,8,16,32])
nprocs= nnodes * nproc_per_node

algs = ["nystrom-1d-noredist-1d", "nystrom-1d-redist-1d"]
systems = ["perlmutter-gpu", "perlmutter-gpu-cpu"]
n = 50000
rs = [5000, 500]

for gc in range(2):
    ax = axes[gc]
    r = rs[gc]
    
    pos=0 # Bar position in the group
    for alg in algs:
        for system in systems:
            label = alg +","+ system
            #print(label)
            target_data = data[(data["alg"] == alg) & 
                (data["impl"] == "cpp") & 
                (data["n"] == n) & 
                (data["r"] == r) & 
                (data["system"] == system)]
            if target_data.empty:
                pass
            #print(target_data)
            ax.plot(target_data["nproc"], target_data["breakdown_total_time"], label=label, marker="o")
            #print(label, target_data["breakdown_total_time"])

    title = "n=" + str(n) + " r=" + str(r)
    ax.set_title(title)
    
    ax.set_xticks(xticks)
    ax.set_xticklabels(nprocs)

    ax.set_xscale('log', base=2)
    ax.set_yscale('log', base=2)
    
    ax.minorticks_on()
    ax.grid(axis='y', which='both')
    ax.grid(which='major', linestyle='-', linewidth='0.5', color='black')  # Major gridlines
    ax.grid(which='minor', linestyle=':', linewidth='0.5', color='gray')
    
    ax.set_xlabel("nprocs")
    ax.set_ylabel("runtime(sec)")

    ax.legend()

plt.suptitle("Comparison of different algorithms on different systems")
plt.tight_layout()
plt.savefig("nystrom-allfour-comparison.pdf")