# Header

In [1]:
import numpy as np
from numpy import array, newaxis, expand_dims
import matplotlib.pyplot as plt
import pandas as pd
from copy import deepcopy as copy
import matplotlib.backends.backend_pdf as pdflib
import scipy as sp
import scipy.stats as stats
import math
%matplotlib inline
%config InlineBackend.figure_format = 'svg'

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_predict
# plotly.tools.set_credentials_file(username='mhossa10', api_key='CSWpOV3xHUVRzimjfHSB')
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV

# Color Lists

In [2]:
color_list1 = ['#1b9e77', '#d95f02', '#7570b3', '#e7298a', '#66a61e', '#e6ab02']
color_list2 = ['#e41a1c', '#377eb8', '#4daf4a', '#984ea3', '#ff7f00', '#ffff33']
color_list3 = ['#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c']

# Test Matrices

In [3]:
test_matrices = [
    "delaunay_n20"
    , "NACA0015"
#     , "belgium.osm"
    , "AS365"
#     , "roadNet-TX"
    , "road_central"
    , "NLR"
    , "hugetrace-00010"
    , "nlpkkt200"
#     , "uk-2002"
]

rmat_test_matrices = [
    "rmat_100M2M"
    , "rmat_100M3M"
    , "rmat_200M4M"
    , "rmat_500M3M"
    , "rmat_500M4M"
]

# Dynamic CSR Local SpMV(LK-SpMV)

In [6]:
csr_dyn_lk_spmv_pred = pd.DataFrame()
mat = [] 
node = []
prcs = []
rows = []
nnz_r = []
a_time = []
p_time = []
err = []
spmv_model = []
m_size = []
csr_dyn_lk_spmv = pd.read_csv("../../SpMV_Model/src/SkylakeResults/MPI_Local_KWAY_CSR_SpMV.csv")
csr_dyn_lk_spmv = csr_dyn_lk_spmv[~csr_dyn_lk_spmv['Name'].str.contains("rmat")].reset_index(drop=True)
csr_dyn_lk_spmv = csr_dyn_lk_spmv.groupby(["Name", "Nodes", "nProcess", "DataType"]).mean().reset_index().sort_values(by="Name").reset_index()
csr_dyn_lk_spmv.dropna(
    axis=0,
    how='any',
    thresh=None,
    subset=None,
    inplace=True
)
csr_dyn_lk_spmv = csr_dyn_lk_spmv.reset_index()
csr_dyn_lk_spmv = csr_dyn_lk_spmv.sort_values(by=["AvgRows", "AvgNPR"]).reset_index(drop=True)

density = []
min_time_factor = 1.0
if min(csr_dyn_lk_spmv["AvgTime"]) < 1.0:
    min_time_factor = 1/min(csr_dyn_lk_spmv["AvgTime"])
for i,r in csr_dyn_lk_spmv.iterrows():
    csr_dyn_lk_spmv.at[i, "AvgTime"] = min_time_factor*r["AvgTime"]
    density.append(float(r["AvgNNZ"])/float(r["AvgRows"] * r["AvgRows"]))
csr_dyn_lk_spmv["Density"] = density

column = "AvgTime"
csr_dyn_lk_spmv_train_data = csr_dyn_lk_spmv[~csr_dyn_lk_spmv["Name"].isin(test_matrices)].reset_index(drop=True)
csr_dyn_lk_spmv_test_data = csr_dyn_lk_spmv[csr_dyn_lk_spmv["Name"].isin(test_matrices)].reset_index(drop=True)

#     csr_local_kway_train = csr_local_kway_train_data[["AvgRows", "AvgNNZ", "AvgNPR", "AvgNPRSD", "Density"]].to_numpy()
#     csr_local_kway_test = csr_local_kway_test_data[["AvgRows", "AvgNNZ", "AvgNPR", "AvgNPRSD", "Density"]].to_numpy()

csr_dyn_lk_spmv_train = csr_dyn_lk_spmv_train_data[["Nodes", "nProcess", "AvgRows", "AvgNNZ", "AvgNPR", "AvgLocalNNZ", "AvgGlobalNNZ", "AvgInterProcessCall", "AvgDataSend", "AvgNPRSD", "Density"]].to_numpy()
csr_dyn_lk_spmv_test = csr_dyn_lk_spmv_test_data[["Nodes", "nProcess", "AvgRows", "AvgNNZ", "AvgNPR", "AvgLocalNNZ", "AvgGlobalNNZ", "AvgInterProcessCall", "AvgDataSend", "AvgNPRSD", "Density"]].to_numpy()

csr_dyn_lk_spmv_X = np.concatenate((csr_dyn_lk_spmv_train, csr_dyn_lk_spmv_test), axis=0)
scaler = MinMaxScaler()  # Default behavior is to scale to [0,1]
csr_dyn_lk_spmv_X = scaler.fit_transform(csr_dyn_lk_spmv_X)
X = csr_dyn_lk_spmv_X[0:len(csr_dyn_lk_spmv_train)]
csr_dyn_lk_spmv_test_X = csr_dyn_lk_spmv_X[len(csr_dyn_lk_spmv_train):]
train_y = np.array(csr_dyn_lk_spmv_train_data[column])
test_y = np.array(csr_dyn_lk_spmv_test_data[column])

scores = []
best = 0
# c=20e4
# e=0.001
for c in np.linspace(2e1,6e4, num=50):
    for e in np.linspace(0.0001, 0.9, 20):
        best_svr = SVR(kernel='poly', C=c, gamma='auto', degree=3, epsilon=e, coef0=1)
        cv = KFold(n_splits=5, random_state=42, shuffle=True)
        for train_index, test_index in cv.split(X):
            X_train, X_test, y_train, y_test = X[train_index], X[test_index], train_y[train_index], train_y[test_index]
            best_svr.fit(X_train, y_train)
            scores.append(best_svr.score(X_test, np.array(y_test)))
            if best < np.mean(scores):
                best = np.mean(scores)
                best_param = {"C": c, "epsilon": e}
svr = SVR(kernel='poly', C=best_param['C'], gamma='auto', degree=3, epsilon=best_param['epsilon'], coef0=1)
svr.fit(X, train_y)
y_pred = svr.predict(csr_dyn_lk_spmv_test_X)

max_err = 0
avg_err = 0
for i,row in csr_dyn_lk_spmv_test_data.iterrows():
#     print(y[index])
#     g = row["Name"].split("_")
    print(row["Name"], '{0:.5g}'.format(test_y[i]/min_time_factor),"&",'{0:.5g}'.format(y_pred[i]/min_time_factor), "&", 
          '{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])+"\\%", "\\\ \hline")           
    if max_err < abs(test_y[i]-y_pred[i])*100/test_y[i]:
        max_err = abs(test_y[i]-y_pred[i])*100/test_y[i]
    avg_err += abs(test_y[i]-y_pred[i])*100/test_y[i]
    mat.append(row["Name"])
    node.append(row["Nodes"])
    prcs.append(row["nProcess"])
    rows.append(row["AvgRows"])
    nnz_r.append(row["AvgNPR"])
    a_time.append(float('{0:.5g}'.format(test_y[i]/min_time_factor)))
    p_time.append(float('{0:.5g}'.format(y_pred[i]/min_time_factor)))
    err.append(float('{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])))
    spmv_model.append("CSR LK-SPMV")
    m_size.append(row["MatrixSize"])
# omp_data.groupby("Threads").count()
print("best param: ", best_param)
print("max error: ", max_err, " avg error: ", avg_err/csr_dyn_lk_spmv_test_data.shape[0])
    
csr_dyn_lk_spmv_pred["Name"] = mat
csr_dyn_lk_spmv_pred["Model"] = spmv_model
csr_dyn_lk_spmv_pred["Nodes"] = node
csr_dyn_lk_spmv_pred["Process"] = prcs
csr_dyn_lk_spmv_pred["Matrix Size"] = m_size
csr_dyn_lk_spmv_pred["Avg Row"] = rows
csr_dyn_lk_spmv_pred["Nonzero per Row"] = nnz_r
csr_dyn_lk_spmv_pred["Actual Time"] = a_time
csr_dyn_lk_spmv_pred["Predicted Time"] = p_time
csr_dyn_lk_spmv_pred["Error"] = err

NACA0015 0.30221 & 0.24576 & 18.68\% \\ \hline
delaunay_n20 0.31753 & 0.24183 & 23.84\% \\ \hline
NACA0015 0.31422 & 0.23726 & 24.49\% \\ \hline
delaunay_n20 0.32653 & 0.23238 & 28.83\% \\ \hline
NACA0015 0.34119 & 0.28642 & 16.05\% \\ \hline
delaunay_n20 0.3412 & 0.28099 & 17.65\% \\ \hline
NACA0015 0.38137 & 0.32512 & 14.75\% \\ \hline
delaunay_n20 0.41961 & 0.31857 & 24.08\% \\ \hline
AS365 0.8264 & 0.69037 & 16.46\% \\ \hline
NLR 0.93805 & 0.74977 & 20.07\% \\ \hline
AS365 0.90991 & 0.73445 & 19.28\% \\ \hline
NLR 0.99397 & 0.79966 & 19.55\% \\ \hline
AS365 1.1269 & 0.92885 & 17.58\% \\ \hline
NLR 1.263 & 1.0125 & 19.84\% \\ \hline
AS365 1.3116 & 1.0661 & 18.72\% \\ \hline
NLR 1.4049 & 1.1679 & 16.86\% \\ \hline
hugetrace-00010 1.6499 & 1.5191 & 7.926\% \\ \hline
hugetrace-00010 1.802 & 1.6708 & 7.28\% \\ \hline
road_central 1.7711 & 1.5841 & 10.55\% \\ \hline
road_central 1.902 & 1.7518 & 7.896\% \\ \hline
nlpkkt200 5.5953 & 5.8932 & 5.325\% \\ \hline
hugetrace-00010 2.2245 & 2.16

# Dynamic COO Local SpMV(LK-SpMV)

In [7]:
coo_dyn_lk_spmv_pred = pd.DataFrame()
mat = [] 
node = []
prcs = []
rows = []
nnz_r = []
a_time = []
p_time = []
err = []
spmv_model = []
m_size = []
coo_dyn_lk_spmv = pd.read_csv("../../SpMV_Model/src/SkylakeResults/MPI_Local_KWAY_COO_SpMV.csv")
coo_dyn_lk_spmv = coo_dyn_lk_spmv[~coo_dyn_lk_spmv['Name'].str.contains("rmat")].reset_index(drop=True)
coo_dyn_lk_spmv = coo_dyn_lk_spmv.groupby(["Name", "Nodes", "nProcess", "DataType"]).mean().reset_index().sort_values(by="Name").reset_index()
coo_dyn_lk_spmv.dropna(
    axis=0,
    how='any',
    thresh=None,
    subset=None,
    inplace=True
)
coo_dyn_lk_spmv = coo_dyn_lk_spmv.reset_index()
coo_dyn_lk_spmv = coo_dyn_lk_spmv.sort_values(by=["AvgRows", "AvgNPR"]).reset_index(drop=True)

density = []
min_time_factor = 1.0
if min(coo_dyn_lk_spmv["AvgTime"]) < 1.0:
    min_time_factor = 1/min(coo_dyn_lk_spmv["AvgTime"])
for i,r in coo_dyn_lk_spmv.iterrows():
    coo_dyn_lk_spmv.at[i, "AvgTime"] = min_time_factor*r["AvgTime"]
    density.append(float(r["AvgNNZ"])/float(r["AvgRows"] * r["AvgRows"]))
coo_dyn_lk_spmv["Density"] = density

column = "AvgTime"
coo_dyn_lk_spmv_train_data = coo_dyn_lk_spmv[~coo_dyn_lk_spmv["Name"].isin(test_matrices)].reset_index(drop=True)
coo_dyn_lk_spmv_test_data = coo_dyn_lk_spmv[coo_dyn_lk_spmv["Name"].isin(test_matrices)].reset_index(drop=True)

#     coo_local_kway_train = coo_local_kway_train_data[["AvgRows", "AvgNNZ", "AvgNPR", "AvgNPRSD", "Density"]].to_numpy()
#     coo_local_kway_test = coo_local_kway_test_data[["AvgRows", "AvgNNZ", "AvgNPR", "AvgNPRSD", "Density"]].to_numpy()

coo_dyn_lk_spmv_train = coo_dyn_lk_spmv_train_data[["Nodes", "nProcess", "AvgRows", "AvgNNZ", "AvgNPR", "AvgLocalNNZ", "AvgGlobalNNZ", "AvgInterProcessCall", "AvgDataSend", "AvgNPRSD", "Density"]].to_numpy()
coo_dyn_lk_spmv_test = coo_dyn_lk_spmv_test_data[["Nodes", "nProcess", "AvgRows", "AvgNNZ", "AvgNPR", "AvgLocalNNZ", "AvgGlobalNNZ", "AvgInterProcessCall", "AvgDataSend", "AvgNPRSD", "Density"]].to_numpy()

coo_dyn_lk_spmv_X = np.concatenate((coo_dyn_lk_spmv_train, coo_dyn_lk_spmv_test), axis=0)
scaler = MinMaxScaler()  # Default behavior is to scale to [0,1]
coo_dyn_lk_spmv_X = scaler.fit_transform(coo_dyn_lk_spmv_X)
X = coo_dyn_lk_spmv_X[0:len(coo_dyn_lk_spmv_train)]
coo_dyn_lk_spmv_test_X = coo_dyn_lk_spmv_X[len(coo_dyn_lk_spmv_train):]
train_y = np.array(coo_dyn_lk_spmv_train_data[column])
test_y = np.array(coo_dyn_lk_spmv_test_data[column])

scores = []
best = 0
# c=20e4
# e=0.001
for c in np.linspace(2e1,6e4, num=50):
    for e in np.linspace(0.0001, 0.9, 20):
        best_svr = SVR(kernel='poly', C=c, gamma='auto', degree=3, epsilon=e, coef0=1)
        cv = KFold(n_splits=5, random_state=42, shuffle=True)
        for train_index, test_index in cv.split(X):
            X_train, X_test, y_train, y_test = X[train_index], X[test_index], train_y[train_index], train_y[test_index]
            best_svr.fit(X_train, y_train)
            scores.append(best_svr.score(X_test, np.array(y_test)))
            if best < np.mean(scores):
                best = np.mean(scores)
                best_param = {"C": c, "epsilon": e}
svr = SVR(kernel='poly', C=best_param['C'], gamma='auto', degree=3, epsilon=best_param['epsilon'], coef0=1)
svr.fit(X, train_y)
y_pred = svr.predict(coo_dyn_lk_spmv_test_X)

max_err = 0
avg_err = 0
for i,row in coo_dyn_lk_spmv_test_data.iterrows():
#     print(y[index])
#     g = row["Name"].split("_")
    print(row["Name"], '{0:.5g}'.format(test_y[i]/min_time_factor),"&",'{0:.5g}'.format(y_pred[i]/min_time_factor), "&", 
          '{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])+"\\%", "\\\ \hline")           
    if max_err < abs(test_y[i]-y_pred[i])*100/test_y[i]:
        max_err = abs(test_y[i]-y_pred[i])*100/test_y[i]
    avg_err += abs(test_y[i]-y_pred[i])*100/test_y[i]
    mat.append(row["Name"])
    node.append(row["Nodes"])
    prcs.append(row["nProcess"])
    rows.append(row["AvgRows"])
    nnz_r.append(row["AvgNPR"])
    a_time.append(float('{0:.5g}'.format(test_y[i]/min_time_factor)))
    p_time.append(float('{0:.5g}'.format(y_pred[i]/min_time_factor)))
    err.append(float('{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])))
    spmv_model.append("COO LK-SPMV")
    m_size.append(row["MatrixSize"])
# omp_data.groupby("Threads").count()
print("best param: ", best_param)
print("max error: ", max_err, " avg error: ", avg_err/coo_dyn_lk_spmv_test_data.shape[0])
    
coo_dyn_lk_spmv_pred["Name"] = mat
coo_dyn_lk_spmv_pred["Model"] = spmv_model
coo_dyn_lk_spmv_pred["Nodes"] = node
coo_dyn_lk_spmv_pred["Process"] = prcs
coo_dyn_lk_spmv_pred["Matrix Size"] = m_size
coo_dyn_lk_spmv_pred["Avg Row"] = rows
coo_dyn_lk_spmv_pred["Nonzero per Row"] = nnz_r
coo_dyn_lk_spmv_pred["Actual Time"] = a_time
coo_dyn_lk_spmv_pred["Predicted Time"] = p_time
coo_dyn_lk_spmv_pred["Error"] = err

NACA0015 0.3051 & 0.24753 & 18.87\% \\ \hline
delaunay_n20 0.32949 & 0.24388 & 25.98\% \\ \hline
NACA0015 0.28833 & 0.25147 & 12.78\% \\ \hline
delaunay_n20 0.31899 & 0.24689 & 22.6\% \\ \hline
NACA0015 0.36068 & 0.30633 & 15.07\% \\ \hline
delaunay_n20 0.34245 & 0.30155 & 11.94\% \\ \hline
NACA0015 0.36924 & 0.34524 & 6.498\% \\ \hline
delaunay_n20 0.3902 & 0.33896 & 13.13\% \\ \hline
AS365 0.8581 & 0.70689 & 17.62\% \\ \hline
NLR 0.95214 & 0.76902 & 19.23\% \\ \hline
AS365 0.99845 & 0.76637 & 23.24\% \\ \hline
NLR 1.0139 & 0.83405 & 17.74\% \\ \hline
AS365 1.2087 & 0.97968 & 18.95\% \\ \hline
NLR 1.2947 & 1.0672 & 17.57\% \\ \hline
AS365 1.3544 & 1.1268 & 16.81\% \\ \hline
NLR 1.4252 & 1.237 & 13.2\% \\ \hline
hugetrace-00010 1.6301 & 1.4947 & 8.304\% \\ \hline
hugetrace-00010 1.766 & 1.6556 & 6.25\% \\ \hline
road_central 1.6068 & 1.5101 & 6.02\% \\ \hline
road_central 1.7504 & 1.6827 & 3.871\% \\ \hline
nlpkkt200 6.5395 & 6.8055 & 4.068\% \\ \hline
hugetrace-00010 2.2118 & 2.1472 &

# Dynamic CSR GK-SpMV

In [8]:
csr_dyn_gk_spmv_pred = pd.DataFrame()
mat = [] 
node = []
prcs = []
rows = []
m_size = []
nnz_r = []
a_time = []
p_time = []
err = []
spmv_model = []
csr_kway_spmv = pd.read_csv("../../SpMV_Model/src/SkylakeResults/MPI_KWAY_SpMV.csv")
csr_kway_spmv = csr_kway_spmv[~csr_kway_spmv['Name'].str.contains("rmat")].reset_index()
csr_kway_spmv = csr_kway_spmv.groupby(["Name", "Nodes", "nProcess", "DataType"]).mean().reset_index().sort_values(by="Name").reset_index(drop=True)

csr_kway_spmv.dropna(
    axis=0,
    how='any',
    thresh=None,
    subset=None,
    inplace=True
)

csr_kway_spmv = csr_kway_spmv.sort_values(by=["AvgRows", "AvgNPR"]).reset_index(drop=True)
density = []
min_time_factor = 1.0
if min(csr_kway_spmv["AvgTime"]) < 1.0:
    min_time_factor = 1/min(csr_kway_spmv["AvgTime"])
for i,r in csr_kway_spmv.iterrows():
    csr_kway_spmv.at[i, "AvgTime"] = min_time_factor*r["AvgTime"]
    density.append(float(r["AvgNNZ"])/float(r["AvgRows"] * r["AvgRows"]))
csr_kway_spmv["Density"] = density
column = "AvgTime"
csr_kway_train_data = csr_kway_spmv[~csr_kway_spmv["Name"].isin(test_matrices)].reset_index(drop=True)
csr_kway_test_data = csr_kway_spmv[csr_kway_spmv["Name"].isin(test_matrices)].reset_index(drop=True)

csr_kway_train = csr_kway_train_data[["Nodes", "nProcess", "AvgRows", "AvgNNZ", "AvgNPR", "AvgNPRSD", "Density"]].to_numpy()

csr_kway_test = csr_kway_test_data[["Nodes", "nProcess", "AvgRows", "AvgNNZ", "AvgNPR", "AvgNPRSD", "Density"]].to_numpy()

csr_kway_X = np.concatenate((csr_kway_train, csr_kway_test), axis=0)
scaler = MinMaxScaler()  # Default behavior is to scale to [0,1]
csr_kway_X = scaler.fit_transform(csr_kway_X)
X = csr_kway_X[0:len(csr_kway_train)]
csr_kway_test_X = csr_kway_X[len(csr_kway_train):]
train_y = np.array(csr_kway_train_data[column])
test_y = np.array(csr_kway_test_data[column])

scores = []
best = 0
# c=20e4
# e=0.001
for c in np.linspace(2e1,6e4, num=10):
    for e in np.linspace(0.0001, 0.1, 20):
        best_svr = SVR(kernel='poly', C=c, gamma='auto', degree=3, epsilon=e, coef0=1)
        cv = KFold(n_splits=5, random_state=42, shuffle=True)
        for train_index, test_index in cv.split(X):
            X_train, X_test, y_train, y_test = X[train_index], X[test_index], train_y[train_index], train_y[test_index]
            best_svr.fit(X_train, y_train)
            scores.append(best_svr.score(X_test, np.array(y_test)))
            if best < np.mean(scores):
                best = np.mean(scores)
                best_param = {"C": c, "epsilon": e}
svr = SVR(kernel='poly', C=best_param['C'], gamma='auto', degree=3, epsilon=best_param['epsilon'], coef0=1)
svr.fit(X, train_y)
y_pred = svr.predict(csr_kway_test_X)

max_err = 0
avg_err = 0
for i,row in csr_kway_test_data.iterrows():
#     print(y[index])
#     g = row["Name"].split("_")
    print(row["Name"], '{0:.5g}'.format(test_y[i]/min_time_factor),"&",'{0:.5g}'.format(y_pred[i]/min_time_factor), "&", 
          '{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])+"\\%", "\\\ \hline")    
    if max_err < abs(test_y[i]-y_pred[i])*100/test_y[i]:
        max_err = abs(test_y[i]-y_pred[i])*100/test_y[i]
    avg_err += abs(test_y[i]-y_pred[i])*100/test_y[i]
    mat.append(row["Name"])
    node.append(row["Nodes"])
    prcs.append(row["nProcess"])
    rows.append(row["AvgRows"])
    nnz_r.append(row["AvgNPR"])
    a_time.append(float('{0:.5g}'.format(test_y[i]/min_time_factor)))
    p_time.append(float('{0:.5g}'.format(y_pred[i]/min_time_factor)))
    err.append(float('{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])))
    spmv_model.append("CSR GK-SPMV")
    m_size.append(row["MatrixSize"])
# omp_data.groupby("Threads").count()
print("best param: ", best_param)
print("max error: ", max_err, " avg error: ", avg_err/csr_kway_test_data.shape[0])
csr_dyn_gk_spmv_pred["Name"] = mat
csr_dyn_gk_spmv_pred["Model"] = spmv_model
csr_dyn_gk_spmv_pred["Nodes"] = node
csr_dyn_gk_spmv_pred["Process"] = prcs
csr_dyn_gk_spmv_pred["Matrix Size"] = m_size
csr_dyn_gk_spmv_pred["Avg Row"] = rows
csr_dyn_gk_spmv_pred["Nonzero per Row"] = nnz_r
csr_dyn_gk_spmv_pred["Actual Time"] = a_time
csr_dyn_gk_spmv_pred["Predicted Time"] = p_time
csr_dyn_gk_spmv_pred["Error"] = err

NACA0015 6.7073 & 6.6663 & 0.6105\% \\ \hline
delaunay_n20 6.7706 & 6.7238 & 0.691\% \\ \hline
NACA0015 5.4022 & 5.7147 & 5.785\% \\ \hline
delaunay_n20 5.5189 & 5.7738 & 4.619\% \\ \hline
NACA0015 5.6271 & 6.0164 & 6.918\% \\ \hline
delaunay_n20 5.7361 & 6.0779 & 5.96\% \\ \hline
NACA0015 7.5914 & 7.2731 & 4.193\% \\ \hline
delaunay_n20 7.6589 & 7.3385 & 4.184\% \\ \hline
AS365 24.177 & 22.798 & 5.703\% \\ \hline
NLR 26.413 & 24.932 & 5.61\% \\ \hline
AS365 19.748 & 21.529 & 9.017\% \\ \hline
NLR 21.457 & 23.621 & 10.08\% \\ \hline
AS365 20.554 & 22.881 & 11.32\% \\ \hline
NLR 22.428 & 25.114 & 11.98\% \\ \hline
AS365 26.738 & 25.937 & 2.995\% \\ \hline
NLR 29.247 & 28.41 & 2.864\% \\ \hline
hugetrace-00010 73.336 & 70.712 & 3.577\% \\ \hline
hugetrace-00010 63.688 & 68.72 & 7.902\% \\ \hline
road_central 85.719 & 82.413 & 3.857\% \\ \hline
road_central 73.991 & 80.293 & 8.517\% \\ \hline
nlpkkt200 101.35 & 100.46 & 0.8775\% \\ \hline
hugetrace-00010 61.924 & 73.37 & 18.48\% \\ \hline

# Dynamic COO GK-SpMV

In [9]:
coo_dyn_gk_spmv_pred = pd.DataFrame()
mat = [] 
node = []
prcs = []
rows = []
nnz_r = []
a_time = []
p_time = []
err = []
spmv_model = []
m_size = []

coo_kway_spmv = pd.read_csv("../../SpMV_Model/src/SkylakeResults/MPI_KWAY_COO_SpMV.csv")
coo_kway_spmv = coo_kway_spmv[~coo_kway_spmv['Name'].str.contains("rmat")].reset_index(drop=True)
coo_kway_spmv = coo_kway_spmv.groupby(["Name", "Nodes", "nProcess", "DataType"]).mean().reset_index().sort_values(by="Name").reset_index()
coo_kway_spmv.dropna(
    axis=0,
    how='any',
    thresh=None,
    subset=None,
    inplace=True
)
coo_kway_spmv = coo_kway_spmv.reset_index()
coo_kway_spmv = coo_kway_spmv.sort_values(by=["AvgRows", "AvgNPR"]).reset_index(drop=True)

density = []
min_time_factor = 1.0
if min(coo_kway_spmv["AvgTime"]) < 1.0:
    min_time_factor = 1/min(coo_kway_spmv["AvgTime"])
for i,r in coo_kway_spmv.iterrows():
    coo_kway_spmv.at[i, "AvgTime"] = min_time_factor*r["AvgTime"]
    density.append(float(r["AvgNNZ"])/float(r["AvgRows"] * r["AvgRows"]))
coo_kway_spmv["Density"] = density

column = "AvgTime"
coo_kway_train_data = coo_kway_spmv[~coo_kway_spmv["Name"].isin(test_matrices)].reset_index(drop=True)
coo_kway_test_data = coo_kway_spmv[coo_kway_spmv["Name"].isin(test_matrices)].reset_index(drop=True)

coo_kway_train = coo_kway_train_data[["Nodes", "nProcess", "AvgRows", "AvgNNZ", "AvgNPR", "AvgNPRSD", "Density"]].to_numpy()

coo_kway_test = coo_kway_test_data[["Nodes", "nProcess", "AvgRows", "AvgNNZ", "AvgNPR", "AvgNPRSD", "Density"]].to_numpy()

coo_kway_X = np.concatenate((coo_kway_train, coo_kway_test), axis=0)
scaler = MinMaxScaler()  # Default behavior is to scale to [0,1]
coo_kway_X = scaler.fit_transform(coo_kway_X)
X = coo_kway_X[0:len(coo_kway_train)]
coo_kway_test_X = coo_kway_X[len(coo_kway_train):]
train_y = np.array(coo_kway_train_data[column])
test_y = np.array(coo_kway_test_data[column])

scores = []
best = 0
# c=20e4
# e=0.001
for c in np.linspace(2e1,6e4, num=50):
    for e in np.linspace(0.0001, 0.9, 20):
        best_svr = SVR(kernel='poly', C=c, gamma='auto', degree=3, epsilon=e, coef0=1)
        cv = KFold(n_splits=5, random_state=42, shuffle=True)
        for train_index, test_index in cv.split(X):
            X_train, X_test, y_train, y_test = X[train_index], X[test_index], train_y[train_index], train_y[test_index]
            best_svr.fit(X_train, y_train)
            scores.append(best_svr.score(X_test, np.array(y_test)))
            if best < np.mean(scores):
                best = np.mean(scores)
                best_param = {"C": c, "epsilon": e}
svr = SVR(kernel='poly', C=best_param['C'], gamma='auto', degree=3, epsilon=best_param['epsilon'], coef0=1)
svr.fit(X, train_y)
y_pred = svr.predict(coo_kway_test_X)

max_err = 0
avg_err = 0
for i,row in coo_kway_test_data.iterrows():
#     print(y[index])
#     g = row["Name"].split("_")
    print(row["Name"], '{0:.5g}'.format(test_y[i]/min_time_factor),"&",'{0:.5g}'.format(y_pred[i]/min_time_factor), "&", 
          '{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])+"\\%", "\\\ \hline")           
    if max_err < abs(test_y[i]-y_pred[i])*100/test_y[i]:
        max_err = abs(test_y[i]-y_pred[i])*100/test_y[i]
    avg_err += abs(test_y[i]-y_pred[i])*100/test_y[i]
    mat.append(row["Name"])
    node.append(row["Nodes"])
    prcs.append(row["nProcess"])
    rows.append(row["AvgRows"])
    nnz_r.append(row["AvgNPR"])
    a_time.append(float('{0:.5g}'.format(test_y[i]/min_time_factor)))
    p_time.append(float('{0:.5g}'.format(y_pred[i]/min_time_factor)))
    err.append(float('{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])))
    spmv_model.append("COO GK-SPMV")
    m_size.append(row["MatrixSize"])
# omp_data.groupby("Threads").count()
print("best param: ", best_param)
print("max error: ", max_err, " avg error: ", avg_err/coo_kway_test_data.shape[0])
coo_dyn_gk_spmv_pred["Name"] = mat
coo_dyn_gk_spmv_pred["Model"] = spmv_model
coo_dyn_gk_spmv_pred["Nodes"] = node
coo_dyn_gk_spmv_pred["Process"] = prcs
coo_dyn_gk_spmv_pred["Matrix Size"] = m_size
coo_dyn_gk_spmv_pred["Avg Row"] = rows
coo_dyn_gk_spmv_pred["Nonzero per Row"] = nnz_r
coo_dyn_gk_spmv_pred["Actual Time"] = a_time
coo_dyn_gk_spmv_pred["Predicted Time"] = p_time
coo_dyn_gk_spmv_pred["Error"] = err

NACA0015 7.696 & 7.8167 & 1.567\% \\ \hline
delaunay_n20 7.7865 & 7.896 & 1.407\% \\ \hline
NACA0015 6.8483 & 7.0063 & 2.307\% \\ \hline
delaunay_n20 6.9288 & 7.096 & 2.412\% \\ \hline
NACA0015 7.5423 & 7.6873 & 1.922\% \\ \hline
delaunay_n20 7.6419 & 7.7934 & 1.982\% \\ \hline
NACA0015 9.3246 & 9.1332 & 2.053\% \\ \hline
delaunay_n20 9.4156 & 9.2485 & 1.775\% \\ \hline
AS365 27.8 & 27.115 & 2.466\% \\ \hline
NLR 30.396 & 29.676 & 2.371\% \\ \hline
AS365 25.025 & 26.439 & 5.649\% \\ \hline
NLR 27.352 & 29.018 & 6.089\% \\ \hline
AS365 27.348 & 29.014 & 6.094\% \\ \hline
NLR 29.959 & 31.849 & 6.308\% \\ \hline
AS365 33.306 & 32.625 & 2.043\% \\ \hline
NLR 36.468 & 35.75 & 1.967\% \\ \hline
hugetrace-00010 79.21 & 78.443 & 0.9694\% \\ \hline
hugetrace-00010 70.32 & 77.43 & 10.11\% \\ \hline
road_central 91.204 & 90.127 & 1.181\% \\ \hline
road_central 79.774 & 88.956 & 11.51\% \\ \hline
nlpkkt200 174.69 & 174.88 & 0.1062\% \\ \hline
hugetrace-00010 66.842 & 83.428 & 24.81\% \\ \hline
nlp

# Dynamic CSR 2D-Partitioning SpMV

In [10]:
csr_dyn_2d_pred = pd.DataFrame()
mat = [] 
node = []
prcs = []
rows = []
nnz_r = []
a_time = []
p_time = []
err = []
spmv_model = []
m_size = []

csr_2d_spmv = pd.read_csv("../../SpMV_Model/src/SkylakeResults/MPI_CSR_2D_SpMV.csv")
csr_2d_spmv = csr_2d_spmv[~csr_2d_spmv['Name'].str.contains("rmat")].reset_index(drop=True)
csr_2d_spmv = csr_2d_spmv.groupby(["Name", "Nodes", "nProcess", "DataType"]).mean().reset_index().sort_values(by="Name").reset_index()
csr_2d_spmv.dropna(
    axis=0,
    how='any',
    thresh=None,
    subset=None,
    inplace=True
)
csr_2d_spmv = csr_2d_spmv.reset_index(drop=True)
csr_2d_spmv = csr_2d_spmv.sort_values(by=["AvgRow", "NonZeroPerRow"]).reset_index(drop=True)

density = []
min_time_factor = 1.0
if min(csr_2d_spmv["AvgTime"]) < 1.0:
    min_time_factor = 1/min(csr_2d_spmv["AvgTime"])
for i,r in csr_2d_spmv.iterrows():
    csr_2d_spmv.at[i, "AvgTime"] = min_time_factor*r["AvgTime"]
    density.append(float(r["NNZ"])/float(r["AvgRow"] * r["AvgRow"]))
csr_2d_spmv["Density"] = density

column = "AvgTime"
csr_2d_train_data = csr_2d_spmv[~csr_2d_spmv["Name"].isin(test_matrices)].reset_index(drop=True)
csr_2d_test_data = csr_2d_spmv[csr_2d_spmv["Name"].isin(test_matrices)].reset_index(drop=True)

csr_2d_train = csr_2d_train_data[["Nodes", "nProcess", "AvgRow", "NNZ", "NonZeroPerRow", "AvgNPRSD", "Density"]].to_numpy()

csr_2d_test = csr_2d_test_data[["Nodes", "nProcess", "AvgRow", "NNZ", "NonZeroPerRow", "AvgNPRSD", "Density"]].to_numpy()

csr_2d_X = np.concatenate((csr_2d_train, csr_2d_test), axis=0)
scaler = MinMaxScaler()  # Default behavior is to scale to [0,1]
csr_2d_X = scaler.fit_transform(csr_2d_X)
X = csr_2d_X[0:len(csr_2d_train)]
csr_2d_test_X = csr_2d_X[len(csr_2d_train):]
train_y = np.array(csr_2d_train_data[column])
test_y = np.array(csr_2d_test_data[column])

scores = []
best = 0
# c=20e4
# e=0.001
for c in np.linspace(2e1,6e4, num=50):
    for e in np.linspace(0.0001, 0.9, 20):
        best_svr = SVR(kernel='poly', C=c, gamma='auto', degree=3, epsilon=e, coef0=1)
        cv = KFold(n_splits=5, random_state=42, shuffle=True)
        for train_index, test_index in cv.split(X):
            X_train, X_test, y_train, y_test = X[train_index], X[test_index], train_y[train_index], train_y[test_index]
            best_svr.fit(X_train, y_train)
            scores.append(best_svr.score(X_test, np.array(y_test)))
            if best < np.mean(scores):
                best = np.mean(scores)
                best_param = {"C": c, "epsilon": e}
svr = SVR(kernel='poly', C=best_param['C'], gamma='auto', degree=3, epsilon=best_param['epsilon'], coef0=1)
svr.fit(X, train_y)
y_pred = svr.predict(csr_2d_test_X)
max_err = 0
avg_err = 0
for i,row in csr_2d_test_data.iterrows():
#     print(y[index])
#     g = row["Name"].split("_")
    print(row["Name"], "&", '{0:.5g}'.format(test_y[i]/min_time_factor),"&",'{0:.5g}'.format(y_pred[i]/min_time_factor), "&", 
          '{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])+"\\%", "\\\ \hline")           
    if max_err < abs(test_y[i]-y_pred[i])*100/test_y[i]:
        max_err = abs(test_y[i]-y_pred[i])*100/test_y[i]
    avg_err += abs(test_y[i]-y_pred[i])*100/test_y[i]
    mat.append(row["Name"])
    node.append(row["Nodes"])
    prcs.append(row["nProcess"])
    rows.append(row["AvgRow"])
    nnz_r.append(row["NonZeroPerRow"])
    a_time.append(float('{0:.5g}'.format(test_y[i]/min_time_factor)))
    p_time.append(float('{0:.5g}'.format(y_pred[i]/min_time_factor)))
    err.append(float('{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])))
    spmv_model.append("CSR 2D Partition")
    m_size.append(row["MatrixSize"])
# omp_data.groupby("Threads").count()
print("best param: ", best_param)
print("max error: ", max_err, " avg error: ", avg_err/csr_2d_test_data.shape[0])
csr_dyn_2d_pred["Name"] = mat
csr_dyn_2d_pred["Model"] = spmv_model
csr_dyn_2d_pred["Nodes"] = node
csr_dyn_2d_pred["Process"] = prcs
csr_dyn_2d_pred["Matrix Size"] = m_size
csr_dyn_2d_pred["Avg Row"] = rows
csr_dyn_2d_pred["Nonzero per Row"] = nnz_r
csr_dyn_2d_pred["Actual Time"] = a_time
csr_dyn_2d_pred["Predicted Time"] = p_time
csr_dyn_2d_pred["Error"] = err

NACA0015 & 1.2435 & 1.3503 & 8.586\% \\ \hline
delaunay_n20 & 1.2573 & 1.3626 & 8.375\% \\ \hline
NACA0015 & 1.3627 & 1.5947 & 17.03\% \\ \hline
delaunay_n20 & 1.3828 & 1.6088 & 16.34\% \\ \hline
NACA0015 & 2.0708 & 1.887 & 8.875\% \\ \hline
delaunay_n20 & 1.7642 & 1.904 & 7.926\% \\ \hline
NACA0015 & 1.9453 & 2.0184 & 3.758\% \\ \hline
delaunay_n20 & 1.9676 & 2.0365 & 3.505\% \\ \hline
AS365 & 4.3713 & 4.8751 & 11.53\% \\ \hline
AS365 & 4.6825 & 5.6381 & 20.41\% \\ \hline
NLR & 4.8133 & 5.3469 & 11.09\% \\ \hline
NLR & 5.4101 & 6.1787 & 14.21\% \\ \hline
AS365 & 5.8434 & 6.7825 & 16.07\% \\ \hline
AS365 & 7.4091 & 7.2422 & 2.252\% \\ \hline
NLR & 6.4835 & 7.4363 & 14.7\% \\ \hline
NLR & 8.2244 & 7.9396 & 3.464\% \\ \hline
hugetrace-00010 & 13.59 & 13.862 & 2.002\% \\ \hline
hugetrace-00010 & 17.388 & 15.849 & 8.851\% \\ \hline
road_central & 15.307 & 15.794 & 3.18\% \\ \hline
hugetrace-00010 & 20.399 & 18.816 & 7.759\% \\ \hline
road_central & 19.163 & 18.022 & 5.956\% \\ \hline
huget

# Dynamic COO 2D-Partitioning SpMV

In [11]:
coo_dyn_2d_pred = pd.DataFrame()
mat = [] 
node = []
prcs = []
rows = []
nnz_r = []
a_time = []
p_time = []
err = []
spmv_model = []
m_size = []
coo_2d_spmv = pd.read_csv("../../SpMV_Model/src/SkylakeResults/MPI_COO_2D_SpMV.csv")
coo_2d_spmv = coo_2d_spmv[~coo_2d_spmv['Name'].str.contains("rmat")].reset_index(drop=True)
coo_2d_spmv = coo_2d_spmv.groupby(["Name", "Nodes", "nProcess", "DataType"]).mean().reset_index().sort_values(by="Name").reset_index()
coo_2d_spmv.dropna(
    axis=0,
    how='any',
    thresh=None,
    subset=None,
    inplace=True
)
coo_2d_spmv = coo_2d_spmv.reset_index(drop=True)
coo_2d_spmv = coo_2d_spmv.sort_values(by=["AvgRow", "NonZeroPerRow"]).reset_index(drop=True)

density = []
min_time_factor = 1.0
if min(coo_2d_spmv["AvgTime"]) < 1.0:
    min_time_factor = 1/min(coo_2d_spmv["AvgTime"])
for i,r in coo_2d_spmv.iterrows():
    coo_2d_spmv.at[i, "AvgTime"] = min_time_factor*r["AvgTime"]
    density.append(float(r["NNZ"])/float(r["AvgRow"] * r["AvgRow"]))
coo_2d_spmv["Density"] = density

column = "AvgTime"
coo_2d_train_data = coo_2d_spmv[~coo_2d_spmv["Name"].isin(test_matrices)].reset_index(drop=True)
coo_2d_test_data = coo_2d_spmv[coo_2d_spmv["Name"].isin(test_matrices)].reset_index(drop=True)

coo_2d_train = coo_2d_train_data[["Nodes", "nProcess", "AvgRow", "NNZ", "NonZeroPerRow", "AvgNPRSD", "Density"]].to_numpy()

coo_2d_test = coo_2d_test_data[["Nodes", "nProcess", "AvgRow", "NNZ", "NonZeroPerRow", "AvgNPRSD", "Density"]].to_numpy()

coo_2d_X = np.concatenate((coo_2d_train, coo_2d_test), axis=0)
scaler = MinMaxScaler()  # Default behavior is to scale to [0,1]
coo_2d_X = scaler.fit_transform(coo_2d_X)
X = coo_2d_X[0:len(coo_2d_train)]
coo_2d_test_X = coo_2d_X[len(coo_2d_train):]
train_y = np.array(coo_2d_train_data[column])
test_y = np.array(coo_2d_test_data[column])

scores = []
best = 0
# c=20e4
# e=0.001
for c in np.linspace(2e1,5e5, num=50):
    for e in np.linspace(0.0001, 0.1, 20):
        best_svr = SVR(kernel='poly', C=c, gamma='auto', degree=3, epsilon=e, coef0=1)
        cv = KFold(n_splits=5, random_state=42, shuffle=True)
        for train_index, test_index in cv.split(X):
            X_train, X_test, y_train, y_test = X[train_index], X[test_index], train_y[train_index], train_y[test_index]
            best_svr.fit(X_train, y_train)
            scores.append(best_svr.score(X_test, np.array(y_test)))
            if best < np.mean(scores):
                best = np.mean(scores)
                best_param = {"C": c, "epsilon": e}
svr = SVR(kernel='poly', C=best_param['C'], gamma='auto', degree=3, epsilon=best_param['epsilon'], coef0=1)
svr.fit(X, train_y)
y_pred = svr.predict(coo_2d_test_X)

max_err = 0
avg_err = 0
for i,row in coo_2d_test_data.iterrows():
#     print(y[index])
#     g = row["Name"].split("_")
    print(row["Name"], "&", '{0:.5g}'.format(test_y[i]/min_time_factor),"&",'{0:.5g}'.format(y_pred[i]/min_time_factor), "&", 
          '{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])+"\\%", "\\\ \hline")           
    if max_err < abs(test_y[i]-y_pred[i])*100/test_y[i]:
        max_err = abs(test_y[i]-y_pred[i])*100/test_y[i]
    avg_err += abs(test_y[i]-y_pred[i])*100/test_y[i]
    mat.append(row["Name"])
    node.append(row["Nodes"])
    prcs.append(row["nProcess"])
    rows.append(row["AvgRow"])
    nnz_r.append(row["NonZeroPerRow"])
    a_time.append(float('{0:.5g}'.format(test_y[i]/min_time_factor)))
    p_time.append(float('{0:.5g}'.format(y_pred[i]/min_time_factor)))
    err.append(float('{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])))
    spmv_model.append("COO 2D Partition")
    m_size.append(row["MatrixSize"])
# omp_data.groupby("Threads").count()
print("best param: ", best_param)
print("max error: ", max_err, " avg error: ", avg_err/coo_2d_test_data.shape[0])
coo_dyn_2d_pred["Name"] = mat
coo_dyn_2d_pred["Model"] = spmv_model
coo_dyn_2d_pred["Nodes"] = node
coo_dyn_2d_pred["Process"] = prcs
coo_dyn_2d_pred["Matrix Size"] = m_size
coo_dyn_2d_pred["Avg Row"] = rows
coo_dyn_2d_pred["Nonzero per Row"] = nnz_r
coo_dyn_2d_pred["Actual Time"] = a_time
coo_dyn_2d_pred["Predicted Time"] = p_time
coo_dyn_2d_pred["Error"] = err

NACA0015 & 1.9234 & 0.98418 & 48.83\% \\ \hline
delaunay_n20 & 1.8458 & 0.99222 & 46.24\% \\ \hline
NACA0015 & 1.8469 & 1.0506 & 43.12\% \\ \hline
delaunay_n20 & 1.7502 & 1.0598 & 39.45\% \\ \hline
NACA0015 & 1.255 & 1.2518 & 0.2525\% \\ \hline
delaunay_n20 & 1.2699 & 1.2635 & 0.5059\% \\ \hline
NACA0015 & 1.3104 & 1.4196 & 8.331\% \\ \hline
delaunay_n20 & 1.3355 & 1.4328 & 7.282\% \\ \hline
AS365 & 3.6177 & 3.3734 & 6.752\% \\ \hline
AS365 & 3.6481 & 3.7656 & 3.22\% \\ \hline
NLR & 3.9329 & 3.7001 & 5.921\% \\ \hline
NLR & 4.0115 & 4.136 & 3.105\% \\ \hline
AS365 & 4.2777 & 4.6911 & 9.665\% \\ \hline
AS365 & 5.5348 & 5.2844 & 4.523\% \\ \hline
NLR & 4.7804 & 5.1592 & 7.924\% \\ \hline
NLR & 6.1182 & 5.8102 & 5.035\% \\ \hline
hugetrace-00010 & 10.213 & 10.13 & 0.8161\% \\ \hline
hugetrace-00010 & 11.378 & 11.247 & 1.154\% \\ \hline
road_central & 11.586 & 11.664 & 0.6716\% \\ \hline
hugetrace-00010 & 13.165 & 13.663 & 3.778\% \\ \hline
road_central & 12.835 & 12.903 & 0.5287\% \\ \hli

# Overall Evalution

In [12]:
pred = pd.concat([csr_dyn_lk_spmv_pred, coo_dyn_lk_spmv_pred, csr_dyn_gk_spmv_pred, coo_dyn_gk_spmv_pred, csr_dyn_2d_pred])
pred = pred.reset_index(drop=True)

In [13]:
pred = pred.sort_values(by=["Name"]).reset_index(drop=True)
models = ["CSR LK-SPMV", "COO LK-SPMV", "CSR GK-SPMV", "COO GK-SPMV", "CSR 2D Partition"]
print("\multirow{2}{*}{Name} & \multirow{2}{*}{Nodes} & \multirow{2}{*}{Processes} & \multicolumn{5}{c |} {Error\%} \\\\ \cline{4-8}")
print(" &  &  & CSR L1DR & COO L1DRV & CSR G1DR & COO G1DR & CSR 2DU \\\\ \hline")
for g,g_data in pred.groupby("Name"):
    for n,n_data in g_data.groupby("Nodes"):
        for p,p_data in n_data.groupby("Process"):
            p_min = p_data["Predicted Time"].min()
            pr_best_a = p_data[p_data["Predicted Time"] == p_min]["Model"].iloc[0]
            a_min = p_data["Actual Time"].min()
            ac_best_a = p_data[p_data["Actual Time"] == a_min]["Model"].iloc[0]
#             print(pr_best_a, " ", ac_best_a)
            print(g, ' & ', n, ' & ', p, end=' & ')
            i=1
            for m in models: #,m_data in p_data.groupby("Model"):
                m_data = p_data[p_data["Model"] == m].reset_index(drop=True)
                if m==ac_best_a:
                    if pr_best_a==ac_best_a and pr_best_a == m:
                        print("\cellcolor{green!25} ", float('{0:.3g}'.format(m_data["Error"].iloc[0])), end=' & ' if i<len(p_data) else ' \\\\ \hline')
                    else:
                        print("\cellcolor{blue!25} ", float('{0:.3g}'.format(m_data["Error"].iloc[0])), end=' & ' if i<len(p_data) else ' \\\\ \hline')
                elif m==pr_best_a:
                    print("\cellcolor{red!25} ", float('{0:.3g}'.format(m_data["Error"].iloc[0])), end=' & ' if i<len(p_data) else ' \\\\ \hline')
                else:
                    print(float('{0:.3g}'.format(m_data["Error"].iloc[0])), end=' & ' if i<len(p_data) else ' \\\\ \hline')
                i+=1
            print()
    
# pred

\multirow{2}{*}{Name} & \multirow{2}{*}{Nodes} & \multirow{2}{*}{Processes} & \multicolumn{5}{c |} {Error\%} \\ \cline{4-8}
 &  &  & CSR L1DR & COO L1DRV & CSR G1DR & COO G1DR & CSR 2DU \\ \hline
AS365  &  4  &  144 & \cellcolor{green!25}  18.7 & 16.8 & 3.0 & 2.04 & 2.25 \\ \hline
AS365  &  5  &  169 & \cellcolor{green!25}  17.6 & 18.9 & 11.3 & 6.09 & 16.1 \\ \hline
AS365  &  7  &  225 & \cellcolor{green!25}  19.3 & 23.2 & 9.02 & 5.65 & 20.4 \\ \hline
AS365  &  8  &  256 & \cellcolor{green!25}  16.5 & 17.6 & 5.7 & 2.47 & 11.5 \\ \hline
NACA0015  &  4  &  144 & \cellcolor{red!25}  14.8 & \cellcolor{blue!25}  6.5 & 4.19 & 2.05 & 3.76 \\ \hline
NACA0015  &  5  &  169 & \cellcolor{green!25}  16.1 & 15.1 & 6.92 & 1.92 & 8.88 \\ \hline
NACA0015  &  7  &  225 & \cellcolor{red!25}  24.5 & \cellcolor{blue!25}  12.8 & 5.79 & 2.31 & 17.0 \\ \hline
NACA0015  &  8  &  256 & \cellcolor{green!25}  18.7 & 18.9 & 0.611 & 1.57 & 8.59 \\ \hline
NLR  &  4  &  144 & \cellcolor{green!25}  16.9 & 13.2 & 2.86

In [14]:
for g,g_data in pred.groupby("Name"):
    for n,n_data in g_data.groupby("Nodes"):
        for p,p_data in n_data.groupby("Process"):
            print(g, p, end=' ')
            for m,m_data in p_data.groupby("Model"):
                print(m, m_data["Actual Time"].iloc[0], end=" ")
            print()

AS365 144 COO GK-SPMV 33.306 COO LK-SPMV 1.3544 CSR 2D Partition 7.4091 CSR GK-SPMV 26.738 CSR LK-SPMV 1.3116 
AS365 169 COO GK-SPMV 27.348 COO LK-SPMV 1.2087 CSR 2D Partition 5.8434 CSR GK-SPMV 20.554 CSR LK-SPMV 1.1269 
AS365 225 COO GK-SPMV 25.025 COO LK-SPMV 0.99845 CSR 2D Partition 4.6825 CSR GK-SPMV 19.748 CSR LK-SPMV 0.90991 
AS365 256 COO GK-SPMV 27.8 COO LK-SPMV 0.8581 CSR 2D Partition 4.3713 CSR GK-SPMV 24.177 CSR LK-SPMV 0.8264 
NACA0015 144 COO GK-SPMV 9.3246 COO LK-SPMV 0.36924 CSR 2D Partition 1.9453 CSR GK-SPMV 7.5914 CSR LK-SPMV 0.38137 
NACA0015 169 COO GK-SPMV 7.5423 COO LK-SPMV 0.36068 CSR 2D Partition 2.0708 CSR GK-SPMV 5.6271 CSR LK-SPMV 0.34119 
NACA0015 225 COO GK-SPMV 6.8483 COO LK-SPMV 0.28833 CSR 2D Partition 1.3627 CSR GK-SPMV 5.4022 CSR LK-SPMV 0.31422 
NACA0015 256 COO GK-SPMV 7.696 COO LK-SPMV 0.3051 CSR 2D Partition 1.2435 CSR GK-SPMV 6.7073 CSR LK-SPMV 0.30221 
NLR 144 COO GK-SPMV 36.468 COO LK-SPMV 1.4252 CSR 2D Partition 8.2244 CSR GK-SPMV 29.247 CSR L

In [15]:
def get_optimal(_test_matrices, _csr_kway_pred, _coo_kway_pred, _csr_local_kway_pred, _coo_local_kway_pred, _csr_2d_pred):
    _mat = []
    _p = []
    _algo = []
    _ralgo = []
    _csr_g1dr = []
    _coo_g1dr = []
    _csr_l1dr = []
    _coo_l1dr = []
    _csr_2d = []
    _coo_2d = []
    _pr_csr_g1dr = []
    _pr_coo_g1dr = []
    _pr_csr_l1dr = []
    _pr_coo_l1dr = []
    _pr_csr_2d = []
    _pr_coo_2d = []
    _right = []
    _ac = []
    _pr = []
    _optimal = pd.DataFrame()
    for m in _test_matrices:
        for p in [144, 169, 225]:
            _p.append(p)
            _mat.append(m)
#             print(p, m)
            al1 = _csr_kway_pred[(_csr_kway_pred["Name"] == m) & (_csr_kway_pred["Process"] == p)].reset_index(drop=True).iloc[0]
            csr_gk = al1["Predicted Time"]
            min_t = csr_gk;
            min_a = "CSR G1DR SpMV"
            r_a = "CSR G1DR SpMV"
            r_t = al1["Actual Time"]
            _csr_g1dr.append(al1["Actual Time"])
            _pr_csr_g1dr.append(al1["Predicted Time"])
            al5 = _csr_2d_pred[(_csr_2d_pred["Name"] == m) & (_csr_2d_pred["Process"] == p)].reset_index(drop=True).iloc[0]
            csr_2d = al5["Predicted Time"]
            _csr_2d.append(al5["Actual Time"])
            _pr_csr_2d.append(al5["Predicted Time"])
            if min_t > csr_2d:
                min_t = csr_2d
                min_a = "CSR 2DU"
            if r_t > al5["Actual Time"]:
                r_t = al5["Actual Time"]
                r_a = "CSR 2DU"
            al2 = _coo_kway_pred[(_coo_kway_pred["Name"] == m) & (_coo_kway_pred["Process"] == p)].reset_index(drop=True).iloc[0]
            coo_gk = al2["Predicted Time"]
            _coo_g1dr.append(al2["Actual Time"])
            _pr_coo_g1dr.append(al2["Predicted Time"])
            if min_t > coo_gk:
                min_t = coo_gk
                min_a = "COO G1DR SpMV"
            if r_t > al2["Actual Time"]:
                r_t = al2["Actual Time"]
                r_a = "COO G1DR SpMV"
            al3 = _csr_local_kway_pred[(_csr_local_kway_pred["Name"] == m) & (_csr_local_kway_pred["Process"] == p)].reset_index(drop=True).iloc[0]
            csr_lk = al3["Predicted Time"]
            _csr_l1dr.append(al3["Actual Time"])
            _pr_csr_l1dr.append(al3["Predicted Time"])
            if min_t > csr_lk:
                min_t = csr_lk
                min_a = "CSR L1DR SpMV"
            if r_t > al3["Actual Time"]:
                r_t = al3["Actual Time"]
                r_a = "CSR L1DR SpMV"
            al4 = _coo_local_kway_pred[(_coo_local_kway_pred["Name"] == m) & (_coo_local_kway_pred["Process"] == p)].reset_index(drop=True).iloc[0]
            coo_lk = al4["Predicted Time"]
            _coo_l1dr.append(al4["Actual Time"])
            _pr_coo_l1dr.append(al4["Predicted Time"])
            if min_t > coo_lk:
                min_t = coo_lk
                min_a = "COO L1DR SpMV"
            if r_t > al4["Actual Time"]:
                r_t = al4["Actual Time"]
                r_a = "COO L1DR SpMV"
            _algo.append(min_a)
            _ralgo.append(r_a)
            _ac.append(r_t)
            _pr.append(min_t)
            if min_a == r_a:
                _right.append(1)
            else:
                _right.append(0)
    #         print(p, " algo: ", min_a, " time:", min_t, " csr lk:", csr_lk, " csr gk:", csr_gk, " csr 2d:", csr_2d, " coo lk:", coo_lk, " coo gk:", coo_gk, " coo 2d:", coo_2d)
    _optimal["Matrices"] = _mat
    _optimal["Processes"] = _p
    _optimal["Predicted Best Strategy"] = _algo
    _optimal["Actual Best Strategy"] = _ralgo
    _optimal["R/W"] = _right
    _optimal["CSR L1DR SpMV"] = _csr_l1dr
    _optimal["CSR L1DR SpMV Predicted"] = _pr_csr_l1dr
    _optimal["COO L1DR SpMV"] = _coo_l1dr
    _optimal["COO L1DR SpMV Predicted"] = _pr_coo_l1dr
    _optimal["CSR G1DR SpMV"] = _csr_g1dr
    _optimal["CSR G1DR SpMV Predicted"] = _pr_csr_g1dr
    _optimal["COO G1DR SpMV"] = _coo_g1dr
    _optimal["COO G1DR SpMV Predicted"] = _pr_coo_g1dr
    _optimal["CSR 2DU"] = _csr_2d
    _optimal["CSR 2DU Predicted"] = _pr_csr_2d
    _optimal["Predicted Time"] = _ac
    _optimal["Actual Time"] = _pr
    _optimal[_optimal["Processes"] == 225]
    return _optimal

In [16]:
optimal = get_optimal(test_matrices, csr_dyn_gk_spmv_pred, coo_dyn_gk_spmv_pred, csr_dyn_lk_spmv_pred, coo_dyn_lk_spmv_pred, csr_dyn_2d_pred)
optimal

Unnamed: 0,Matrices,Processes,Predicted Best Strategy,Actual Best Strategy,R/W,CSR L1DR SpMV,CSR L1DR SpMV Predicted,COO L1DR SpMV,COO L1DR SpMV Predicted,CSR G1DR SpMV,CSR G1DR SpMV Predicted,COO G1DR SpMV,COO G1DR SpMV Predicted,CSR 2DU,CSR 2DU Predicted,Predicted Time,Actual Time
0,delaunay_n20,144,CSR L1DR SpMV,COO L1DR SpMV,0,0.41961,0.31857,0.3902,0.33896,7.6589,7.3385,9.4156,9.2485,1.9676,2.0365,0.3902,0.31857
1,delaunay_n20,169,CSR L1DR SpMV,CSR L1DR SpMV,1,0.3412,0.28099,0.34245,0.30155,5.7361,6.0779,7.6419,7.7934,1.7642,1.904,0.3412,0.28099
2,delaunay_n20,225,CSR L1DR SpMV,COO L1DR SpMV,0,0.32653,0.23238,0.31899,0.24689,5.5189,5.7738,6.9288,7.096,1.3828,1.6088,0.31899,0.23238
3,NACA0015,144,CSR L1DR SpMV,COO L1DR SpMV,0,0.38137,0.32512,0.36924,0.34524,7.5914,7.2731,9.3246,9.1332,1.9453,2.0184,0.36924,0.32512
4,NACA0015,169,CSR L1DR SpMV,CSR L1DR SpMV,1,0.34119,0.28642,0.36068,0.30633,5.6271,6.0164,7.5423,7.6873,2.0708,1.887,0.34119,0.28642
5,NACA0015,225,CSR L1DR SpMV,COO L1DR SpMV,0,0.31422,0.23726,0.28833,0.25147,5.4022,5.7147,6.8483,7.0063,1.3627,1.5947,0.28833,0.23726
6,AS365,144,CSR L1DR SpMV,CSR L1DR SpMV,1,1.3116,1.0661,1.3544,1.1268,26.738,25.937,33.306,32.625,7.4091,7.2422,1.3116,1.0661
7,AS365,169,CSR L1DR SpMV,CSR L1DR SpMV,1,1.1269,0.92885,1.2087,0.97968,20.554,22.881,27.348,29.014,5.8434,6.7825,1.1269,0.92885
8,AS365,225,CSR L1DR SpMV,CSR L1DR SpMV,1,0.90991,0.73445,0.99845,0.76637,19.748,21.529,25.025,26.439,4.6825,5.6381,0.90991,0.73445
9,road_central,144,COO L1DR SpMV,COO L1DR SpMV,1,2.6338,2.6794,2.4087,2.5767,95.798,95.304,105.35,104.81,25.038,22.395,2.4087,2.5767


In [17]:
print("performance of all")
# for i,r in optimal.iterrows():
#     print(r["Matrices"], " & \\backslashbox{", round(r["CSR L1DR SpMV"], 2),"}{",round(r["CSR L1DR SpMV Predicted"], 2), "} & \\backslashbox{", round(r["COO L1DR SpMV"], 2), "}{", round(r["COO L1DR SpMV Predicted"], 2), "} & \\backslashbox{", round(r["CSR G1DR SpMV"], 2), "}{", round(r["CSR G1DR SpMV Predicted"], 2), "} & \\backslashbox{", round(r["COO G1DR SpMV"], 2), "}{", round(r["COO G1DR SpMV Predicted"], 2), "} & \\backslashbox{", round(r["CSR 2DU"], 2), "}{", round(r["CSR 2DU Predicted"], 2), "} & \\backslashbox{", round(r["COO 2DU"], 2), "}{", round(r["COO 2DU Predicted"], 2), "} \\\ \hline")
for i,r in optimal.sort_values(by="Matrices").reset_index().iterrows():
    print(r["Matrices"], " & ", r["Processes"], " & ", round(r["CSR L1DR SpMV"], 2)," & ",round(r["CSR L1DR SpMV Predicted"], 2), " & ", round(r["COO L1DR SpMV"], 2), " & ", round(r["COO L1DR SpMV Predicted"], 2), " & ", round(r["CSR G1DR SpMV"], 2), " & ", round(r["CSR G1DR SpMV Predicted"], 2), " & ", round(r["COO G1DR SpMV"], 2), " & ", round(r["COO G1DR SpMV Predicted"], 2), " & ", round(r["CSR 2DU"], 2), " & ", round(r["CSR 2DU Predicted"], 2), " \\\ \hline")

print("Best Alforithm")
for i,r in optimal.iterrows():
    print(r["Matrices"], " & ", r["Predicted Best Strategy"], " & ", r["Actual Best Strategy"], " & ", r["Predicted Time"], " & ", r["Actual Time"], " & ", r["R/W"], " \\\ \hline")

performance of all
AS365  &  144  &  1.31  &  1.07  &  1.35  &  1.13  &  26.74  &  25.94  &  33.31  &  32.62  &  7.41  &  7.24  \\ \hline
AS365  &  169  &  1.13  &  0.93  &  1.21  &  0.98  &  20.55  &  22.88  &  27.35  &  29.01  &  5.84  &  6.78  \\ \hline
AS365  &  225  &  0.91  &  0.73  &  1.0  &  0.77  &  19.75  &  21.53  &  25.02  &  26.44  &  4.68  &  5.64  \\ \hline
NACA0015  &  144  &  0.38  &  0.33  &  0.37  &  0.35  &  7.59  &  7.27  &  9.32  &  9.13  &  1.95  &  2.02  \\ \hline
NACA0015  &  169  &  0.34  &  0.29  &  0.36  &  0.31  &  5.63  &  6.02  &  7.54  &  7.69  &  2.07  &  1.89  \\ \hline
NACA0015  &  225  &  0.31  &  0.24  &  0.29  &  0.25  &  5.4  &  5.71  &  6.85  &  7.01  &  1.36  &  1.59  \\ \hline
NLR  &  144  &  1.4  &  1.17  &  1.43  &  1.24  &  29.25  &  28.41  &  36.47  &  35.75  &  8.22  &  7.94  \\ \hline
NLR  &  169  &  1.26  &  1.01  &  1.29  &  1.07  &  22.43  &  25.11  &  29.96  &  31.85  &  6.48  &  7.44  \\ \hline
NLR  &  225  &  0.99  &  0.8  &  1.01  

In [158]:
csr_2d_spmv[csr_2d_spmv["Name"] == "AS365"]
csr_dyn_2d_pred
min_time_factor = 1.0
if min(csr_dyn_lk_spmv["AvgTime"]) < 1.0:
    min_time_factor = 1/min(csr_dyn_lk_spmv["AvgTime"])
for i,r in csr_dyn_lk_spmv.iterrows():
    csr_dyn_lk_spmv.at[i, "AvgTime"] = min_time_factor*r["AvgTime"]

Unnamed: 0,Name,Model,Nodes,Process,Matrix Size,Avg Row,Nonzero per Row,Actual Time,Predicted Time,Error
0,NACA0015,CSR 2D Partition,5,225,1039183,69279,0.399649,0.053592,0.049382,7.856
1,delaunay_n20,CSR 2D Partition,5,225,1048576,69906,0.399989,0.054246,0.04989,8.029
2,NACA0015,CSR 2D Partition,4,169,1039183,79938,0.461129,0.061411,0.040047,34.79
3,delaunay_n20,CSR 2D Partition,4,169,1048576,80660,0.461531,0.061718,0.040716,34.03
4,NACA0015,CSR 2D Partition,3,144,1039183,86599,0.49956,0.067617,0.052022,23.06
5,delaunay_n20,CSR 2D Partition,3,144,1048576,87382,0.49999,0.068403,0.052821,22.78
6,AS365,CSR 2D Partition,5,225,3799275,253285,0.398956,0.16538,0.19259,16.46
7,NLR,CSR 2D Partition,5,225,4163763,277585,0.399893,0.19505,0.21184,8.608
8,AS365,CSR 2D Partition,4,169,3799275,292252,0.460334,0.21273,0.22293,4.792
9,AS365,CSR 2D Partition,3,144,3799275,316607,0.498694,0.26972,0.26994,0.08134
