# Header

In [1]:
import numpy as np
from numpy import array, newaxis, expand_dims
import matplotlib.pyplot as plt
import pandas as pd
from copy import deepcopy as copy
import matplotlib.backends.backend_pdf as pdflib
import scipy as sp
import scipy.stats as stats
import math
%matplotlib inline
%config InlineBackend.figure_format = 'svg'

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_predict
# plotly.tools.set_credentials_file(username='mhossa10', api_key='CSWpOV3xHUVRzimjfHSB')
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV

# Color Lists

In [2]:
color_list1 = ['#1b9e77', '#d95f02', '#7570b3', '#e7298a', '#66a61e', '#e6ab02']
color_list2 = ['#e41a1c', '#377eb8', '#4daf4a', '#984ea3', '#ff7f00', '#ffff33']
color_list3 = ['#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c']

# Test Matrices

In [3]:
test_matrices = [
    "delaunay_n20"
    , "NACA0015"
#     , "belgium.osm"
    , "AS365"
#     , "roadNet-TX"
    , "road_central"
    , "NLR"
    , "hugetrace-00010"
    , "nlpkkt200"
#     , "uk-2002"
]

rmat_test_matrices = [
    "rmat_100M2M"
    , "rmat_100M3M"
    , "rmat_200M4M"
    , "rmat_500M3M"
    , "rmat_500M4M"
]

# Dynamic CSR Local SpMV(LK-SpMV)

In [None]:
csr_dyn_lk_spmv_pred = pd.DataFrame()
mat = [] 
node = []
prcs = []
rows = []
nnz_r = []
a_time = []
p_time = []
err = []
spmv_model = []
m_size = []
csr_dyn_lk_spmv = pd.read_csv("../../SpMV_Model/src/SkylakeResults/MPI_Local_KWAY_CSR_SpMV.csv")
csr_dyn_lk_spmv = csr_dyn_lk_spmv[~csr_dyn_lk_spmv['Name'].str.contains("rmat")].reset_index(drop=True)
csr_dyn_lk_spmv = csr_dyn_lk_spmv.groupby(["Name", "Nodes", "nProcess", "DataType"]).mean().reset_index().sort_values(by="Name").reset_index()
csr_dyn_lk_spmv.dropna(
    axis=0,
    how='any',
    thresh=None,
    subset=None,
    inplace=True
)
csr_dyn_lk_spmv = csr_dyn_lk_spmv.reset_index()
csr_dyn_lk_spmv = csr_dyn_lk_spmv.sort_values(by=["AvgRows", "AvgNPR"]).reset_index(drop=True)

density = []
min_time_factor = 1.0
if min(csr_dyn_lk_spmv["AvgTime"]) < 1.0:
    min_time_factor = 1/min(csr_dyn_lk_spmv["AvgTime"])
for i,r in csr_dyn_lk_spmv.iterrows():
    csr_dyn_lk_spmv.at[i, "AvgTime"] = min_time_factor*r["AvgTime"]
    density.append(float(r["AvgNNZ"])/float(r["AvgRows"] * r["AvgRows"]))
csr_dyn_lk_spmv["Density"] = density

column = "AvgTime"
csr_dyn_lk_spmv_train_data = csr_dyn_lk_spmv[~csr_dyn_lk_spmv["Name"].isin(test_matrices)].reset_index(drop=True)
csr_dyn_lk_spmv_test_data = csr_dyn_lk_spmv[csr_dyn_lk_spmv["Name"].isin(test_matrices)].reset_index(drop=True)

#     csr_local_kway_train = csr_local_kway_train_data[["AvgRows", "AvgNNZ", "AvgNPR", "AvgNPRSD", "Density"]].to_numpy()
#     csr_local_kway_test = csr_local_kway_test_data[["AvgRows", "AvgNNZ", "AvgNPR", "AvgNPRSD", "Density"]].to_numpy()

csr_dyn_lk_spmv_train = csr_dyn_lk_spmv_train_data[["Nodes", "nProcess", "AvgRows", "AvgNNZ", "AvgNPR", "AvgLocalNNZ", "AvgGlobalNNZ", "AvgInterProcessCall", "AvgDataSend", "AvgNPRSD", "Density"]].to_numpy()
csr_dyn_lk_spmv_test = csr_dyn_lk_spmv_test_data[["Nodes", "nProcess", "AvgRows", "AvgNNZ", "AvgNPR", "AvgLocalNNZ", "AvgGlobalNNZ", "AvgInterProcessCall", "AvgDataSend", "AvgNPRSD", "Density"]].to_numpy()

csr_dyn_lk_spmv_X = np.concatenate((csr_dyn_lk_spmv_train, csr_dyn_lk_spmv_test), axis=0)
scaler = MinMaxScaler()  # Default behavior is to scale to [0,1]
csr_dyn_lk_spmv_X = scaler.fit_transform(csr_dyn_lk_spmv_X)
X = csr_dyn_lk_spmv_X[0:len(csr_dyn_lk_spmv_train)]
csr_dyn_lk_spmv_test_X = csr_dyn_lk_spmv_X[len(csr_dyn_lk_spmv_train):]
train_y = np.array(csr_dyn_lk_spmv_train_data[column])
test_y = np.array(csr_dyn_lk_spmv_test_data[column])

scores = []
best = 0
# c=20e4
# e=0.001
for c in np.linspace(2e1,6e4, num=50):
    for e in np.linspace(0.0001, 0.9, 20):
        best_svr = SVR(kernel='poly', C=c, gamma='auto', degree=3, epsilon=e, coef0=1)
        cv = KFold(n_splits=5, random_state=42, shuffle=True)
        for train_index, test_index in cv.split(X):
            X_train, X_test, y_train, y_test = X[train_index], X[test_index], train_y[train_index], train_y[test_index]
            best_svr.fit(X_train, y_train)
            scores.append(best_svr.score(X_test, np.array(y_test)))
            if best < np.mean(scores):
                best = np.mean(scores)
                best_param = {"C": c, "epsilon": e}
svr = SVR(kernel='poly', C=best_param['C'], gamma='auto', degree=3, epsilon=best_param['epsilon'], coef0=1)
svr.fit(X, train_y)
y_pred = svr.predict(csr_dyn_lk_spmv_test_X)

max_err = 0
avg_err = 0
for i,row in csr_dyn_lk_spmv_test_data.iterrows():
#     print(y[index])
#     g = row["Name"].split("_")
    print(row["Name"], '{0:.5g}'.format(test_y[i]/min_time_factor),"&",'{0:.5g}'.format(y_pred[i]/min_time_factor), "&", 
          '{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])+"\\%", "\\\ \hline")           
    if max_err < abs(test_y[i]-y_pred[i])*100/test_y[i]:
        max_err = abs(test_y[i]-y_pred[i])*100/test_y[i]
    avg_err += abs(test_y[i]-y_pred[i])*100/test_y[i]
    mat.append(row["Name"])
    node.append(row["Nodes"])
    prcs.append(row["nProcess"])
    rows.append(row["AvgRows"])
    nnz_r.append(row["AvgNPR"])
    a_time.append(float('{0:.5g}'.format(test_y[i]/min_time_factor)))
    p_time.append(float('{0:.5g}'.format(y_pred[i]/min_time_factor)))
    err.append(float('{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])))
    spmv_model.append("CSR LK-SPMV")
    m_size.append(row["MatrixSize"])
# omp_data.groupby("Threads").count()
print("best param: ", best_param)
print("max error: ", max_err, " avg error: ", avg_err/csr_dyn_lk_spmv_test_data.shape[0])
    
csr_dyn_lk_spmv_pred["Name"] = mat
csr_dyn_lk_spmv_pred["Model"] = spmv_model
csr_dyn_lk_spmv_pred["Nodes"] = node
csr_dyn_lk_spmv_pred["Process"] = prcs
csr_dyn_lk_spmv_pred["Matrix Size"] = m_size
csr_dyn_lk_spmv_pred["Avg Row"] = rows
csr_dyn_lk_spmv_pred["Nonzero per Row"] = nnz_r
csr_dyn_lk_spmv_pred["Actual Time"] = a_time
csr_dyn_lk_spmv_pred["Predicted Time"] = p_time
csr_dyn_lk_spmv_pred["Error"] = err

# Dynamic COO Local SpMV(LK-SpMV)

In [5]:
coo_dyn_lk_spmv_pred = pd.DataFrame()
mat = [] 
node = []
prcs = []
rows = []
nnz_r = []
a_time = []
p_time = []
err = []
spmv_model = []
m_size = []
coo_dyn_lk_spmv = pd.read_csv("../../SpMV_Model/src/SkylakeResults/MPI_Local_KWAY_COO_SpMV.csv")
coo_dyn_lk_spmv = coo_dyn_lk_spmv[~coo_dyn_lk_spmv['Name'].str.contains("rmat")].reset_index(drop=True)
coo_dyn_lk_spmv = coo_dyn_lk_spmv.groupby(["Name", "Nodes", "nProcess", "DataType"]).mean().reset_index().sort_values(by="Name").reset_index()
coo_dyn_lk_spmv.dropna(
    axis=0,
    how='any',
    thresh=None,
    subset=None,
    inplace=True
)
coo_dyn_lk_spmv = coo_dyn_lk_spmv.reset_index()
coo_dyn_lk_spmv = coo_dyn_lk_spmv.sort_values(by=["AvgRows", "AvgNPR"]).reset_index(drop=True)

density = []
min_time_factor = 1.0
if min(coo_dyn_lk_spmv["AvgTime"]) < 1.0:
    min_time_factor = 1/min(coo_dyn_lk_spmv["AvgTime"])
for i,r in coo_dyn_lk_spmv.iterrows():
    coo_dyn_lk_spmv.at[i, "AvgTime"] = min_time_factor*r["AvgTime"]
    density.append(float(r["AvgNNZ"])/float(r["AvgRows"] * r["AvgRows"]))
coo_dyn_lk_spmv["Density"] = density

column = "AvgTime"
coo_dyn_lk_spmv_train_data = coo_dyn_lk_spmv[~coo_dyn_lk_spmv["Name"].isin(test_matrices)].reset_index(drop=True)
coo_dyn_lk_spmv_test_data = coo_dyn_lk_spmv[coo_dyn_lk_spmv["Name"].isin(test_matrices)].reset_index(drop=True)

#     coo_local_kway_train = coo_local_kway_train_data[["AvgRows", "AvgNNZ", "AvgNPR", "AvgNPRSD", "Density"]].to_numpy()
#     coo_local_kway_test = coo_local_kway_test_data[["AvgRows", "AvgNNZ", "AvgNPR", "AvgNPRSD", "Density"]].to_numpy()

coo_dyn_lk_spmv_train = coo_dyn_lk_spmv_train_data[["Nodes", "nProcess", "AvgRows", "AvgNNZ", "AvgNPR", "AvgLocalNNZ", "AvgGlobalNNZ", "AvgInterProcessCall", "AvgDataSend", "AvgNPRSD", "Density"]].to_numpy()
coo_dyn_lk_spmv_test = coo_dyn_lk_spmv_test_data[["Nodes", "nProcess", "AvgRows", "AvgNNZ", "AvgNPR", "AvgLocalNNZ", "AvgGlobalNNZ", "AvgInterProcessCall", "AvgDataSend", "AvgNPRSD", "Density"]].to_numpy()

coo_dyn_lk_spmv_X = np.concatenate((coo_dyn_lk_spmv_train, coo_dyn_lk_spmv_test), axis=0)
scaler = MinMaxScaler()  # Default behavior is to scale to [0,1]
coo_dyn_lk_spmv_X = scaler.fit_transform(coo_dyn_lk_spmv_X)
X = coo_dyn_lk_spmv_X[0:len(coo_dyn_lk_spmv_train)]
coo_dyn_lk_spmv_test_X = coo_dyn_lk_spmv_X[len(coo_dyn_lk_spmv_train):]
train_y = np.array(coo_dyn_lk_spmv_train_data[column])
test_y = np.array(coo_dyn_lk_spmv_test_data[column])

scores = []
best = 0
# c=20e4
# e=0.001
for c in np.linspace(2e1,6e4, num=50):
    for e in np.linspace(0.0001, 0.9, 20):
        best_svr = SVR(kernel='poly', C=c, gamma='auto', degree=3, epsilon=e, coef0=1)
        cv = KFold(n_splits=5, random_state=42, shuffle=True)
        for train_index, test_index in cv.split(X):
            X_train, X_test, y_train, y_test = X[train_index], X[test_index], train_y[train_index], train_y[test_index]
            best_svr.fit(X_train, y_train)
            scores.append(best_svr.score(X_test, np.array(y_test)))
            if best < np.mean(scores):
                best = np.mean(scores)
                best_param = {"C": c, "epsilon": e}
svr = SVR(kernel='poly', C=best_param['C'], gamma='auto', degree=3, epsilon=best_param['epsilon'], coef0=1)
svr.fit(X, train_y)
y_pred = svr.predict(coo_dyn_lk_spmv_test_X)

max_err = 0
avg_err = 0
for i,row in coo_dyn_lk_spmv_test_data.iterrows():
#     print(y[index])
#     g = row["Name"].split("_")
    print(row["Name"], '{0:.5g}'.format(test_y[i]/min_time_factor),"&",'{0:.5g}'.format(y_pred[i]/min_time_factor), "&", 
          '{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])+"\\%", "\\\ \hline")           
    if max_err < abs(test_y[i]-y_pred[i])*100/test_y[i]:
        max_err = abs(test_y[i]-y_pred[i])*100/test_y[i]
    avg_err += abs(test_y[i]-y_pred[i])*100/test_y[i]
    mat.append(row["Name"])
    node.append(row["Nodes"])
    prcs.append(row["nProcess"])
    rows.append(row["AvgRows"])
    nnz_r.append(row["AvgNPR"])
    a_time.append(float('{0:.5g}'.format(test_y[i]/min_time_factor)))
    p_time.append(float('{0:.5g}'.format(y_pred[i]/min_time_factor)))
    err.append(float('{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])))
    spmv_model.append("COO LK-SPMV")
    m_size.append(row["MatrixSize"])
# omp_data.groupby("Threads").count()
print("best param: ", best_param)
print("max error: ", max_err, " avg error: ", avg_err/coo_dyn_lk_spmv_test_data.shape[0])
    
coo_dyn_lk_spmv_pred["Name"] = mat
coo_dyn_lk_spmv_pred["Model"] = spmv_model
coo_dyn_lk_spmv_pred["Nodes"] = node
coo_dyn_lk_spmv_pred["Process"] = prcs
coo_dyn_lk_spmv_pred["Matrix Size"] = m_size
coo_dyn_lk_spmv_pred["Avg Row"] = rows
coo_dyn_lk_spmv_pred["Nonzero per Row"] = nnz_r
coo_dyn_lk_spmv_pred["Actual Time"] = a_time
coo_dyn_lk_spmv_pred["Predicted Time"] = p_time
coo_dyn_lk_spmv_pred["Error"] = err

NACA0015 0.30284 & 0.27636 & 8.744\% \\ \hline
delaunay_n20 0.36322 & 0.26905 & 25.93\% \\ \hline
NACA0015 0.28619 & 0.31731 & 10.87\% \\ \hline
delaunay_n20 0.33376 & 0.30824 & 7.647\% \\ \hline
NACA0015 0.40619 & 0.37756 & 7.049\% \\ \hline
delaunay_n20 0.42821 & 0.36615 & 14.49\% \\ \hline
AS365 1.0329 & 0.79383 & 23.15\% \\ \hline
NLR 1.2148 & 0.86317 & 28.94\% \\ \hline
AS365 1.233 & 0.98733 & 19.92\% \\ \hline
NLR 1.3698 & 1.0738 & 21.61\% \\ \hline
AS365 1.6173 & 1.1505 & 28.86\% \\ \hline
NLR 1.6422 & 1.2641 & 23.02\% \\ \hline
hugetrace-00010 2.1274 & 1.8595 & 12.59\% \\ \hline
road_central 2.1259 & 1.8997 & 10.64\% \\ \hline
hugetrace-00010 2.4622 & 2.3305 & 5.351\% \\ \hline
nlpkkt200 8.4518 & 8.4681 & 0.1934\% \\ \hline
road_central 2.4631 & 2.4236 & 1.603\% \\ \hline
hugetrace-00010 3.0982 & 2.7132 & 12.43\% \\ \hline
nlpkkt200 10.38 & 10.946 & 5.455\% \\ \hline
road_central 2.884 & 2.8289 & 1.913\% \\ \hline
nlpkkt200 12.528 & 12.752 & 1.781\% \\ \hline
best param:  {'C':

# Dynamic CSR GK-SpMV

In [6]:
csr_dyn_gk_spmv_pred = pd.DataFrame()
mat = [] 
node = []
prcs = []
rows = []
m_size = []
nnz_r = []
a_time = []
p_time = []
err = []
spmv_model = []
csr_kway_spmv = pd.read_csv("../../SpMV_Model/src/SkylakeResults/MPI_KWAY_SpMV.csv")
csr_kway_spmv = csr_kway_spmv[~csr_kway_spmv['Name'].str.contains("rmat")].reset_index()
csr_kway_spmv = csr_kway_spmv.groupby(["Name", "Nodes", "nProcess", "DataType"]).mean().reset_index().sort_values(by="Name").reset_index(drop=True)

csr_kway_spmv.dropna(
    axis=0,
    how='any',
    thresh=None,
    subset=None,
    inplace=True
)

csr_kway_spmv = csr_kway_spmv.sort_values(by=["AvgRows", "AvgNPR"]).reset_index(drop=True)
density = []
min_time_factor = 1.0
if min(csr_kway_spmv["AvgTime"]) < 1.0:
    min_time_factor = 1/min(csr_kway_spmv["AvgTime"])
for i,r in csr_kway_spmv.iterrows():
    csr_kway_spmv.at[i, "AvgTime"] = min_time_factor*r["AvgTime"]
    density.append(float(r["AvgNNZ"])/float(r["AvgRows"] * r["AvgRows"]))
csr_kway_spmv["Density"] = density
column = "AvgTime"
csr_kway_train_data = csr_kway_spmv[~csr_kway_spmv["Name"].isin(test_matrices)].reset_index(drop=True)
csr_kway_test_data = csr_kway_spmv[csr_kway_spmv["Name"].isin(test_matrices)].reset_index(drop=True)

csr_kway_train = csr_kway_train_data[["Nodes", "nProcess", "AvgRows", "AvgNNZ", "AvgNPR", "AvgNPRSD", "Density"]].to_numpy()

csr_kway_test = csr_kway_test_data[["Nodes", "nProcess", "AvgRows", "AvgNNZ", "AvgNPR", "AvgNPRSD", "Density"]].to_numpy()

csr_kway_X = np.concatenate((csr_kway_train, csr_kway_test), axis=0)
scaler = MinMaxScaler()  # Default behavior is to scale to [0,1]
csr_kway_X = scaler.fit_transform(csr_kway_X)
X = csr_kway_X[0:len(csr_kway_train)]
csr_kway_test_X = csr_kway_X[len(csr_kway_train):]
train_y = np.array(csr_kway_train_data[column])
test_y = np.array(csr_kway_test_data[column])

scores = []
best = 0
# c=20e4
# e=0.001
for c in np.linspace(2e1,6e4, num=10):
    for e in np.linspace(0.0001, 0.1, 20):
        best_svr = SVR(kernel='poly', C=c, gamma='auto', degree=3, epsilon=e, coef0=1)
        cv = KFold(n_splits=5, random_state=42, shuffle=True)
        for train_index, test_index in cv.split(X):
            X_train, X_test, y_train, y_test = X[train_index], X[test_index], train_y[train_index], train_y[test_index]
            best_svr.fit(X_train, y_train)
            scores.append(best_svr.score(X_test, np.array(y_test)))
            if best < np.mean(scores):
                best = np.mean(scores)
                best_param = {"C": c, "epsilon": e}
svr = SVR(kernel='poly', C=best_param['C'], gamma='auto', degree=3, epsilon=best_param['epsilon'], coef0=1)
svr.fit(X, train_y)
y_pred = svr.predict(csr_kway_test_X)

max_err = 0
avg_err = 0
for i,row in csr_kway_test_data.iterrows():
#     print(y[index])
#     g = row["Name"].split("_")
    print(row["Name"], '{0:.5g}'.format(test_y[i]/min_time_factor),"&",'{0:.5g}'.format(y_pred[i]/min_time_factor), "&", 
          '{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])+"\\%", "\\\ \hline")    
    if max_err < abs(test_y[i]-y_pred[i])*100/test_y[i]:
        max_err = abs(test_y[i]-y_pred[i])*100/test_y[i]
    avg_err += abs(test_y[i]-y_pred[i])*100/test_y[i]
    mat.append(row["Name"])
    node.append(row["Nodes"])
    prcs.append(row["nProcess"])
    rows.append(row["AvgRows"])
    nnz_r.append(row["AvgNPR"])
    a_time.append(float('{0:.5g}'.format(test_y[i]/min_time_factor)))
    p_time.append(float('{0:.5g}'.format(y_pred[i]/min_time_factor)))
    err.append(float('{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])))
    spmv_model.append("CSR GK-SPMV")
    m_size.append(row["MatrixSize"])
# omp_data.groupby("Threads").count()
print("best param: ", best_param)
print("max error: ", max_err, " avg error: ", avg_err/csr_kway_test_data.shape[0])
csr_dyn_gk_spmv_pred["Name"] = mat
csr_dyn_gk_spmv_pred["Model"] = spmv_model
csr_dyn_gk_spmv_pred["Nodes"] = node
csr_dyn_gk_spmv_pred["Process"] = prcs
csr_dyn_gk_spmv_pred["Matrix Size"] = m_size
csr_dyn_gk_spmv_pred["Avg Row"] = rows
csr_dyn_gk_spmv_pred["Nonzero per Row"] = nnz_r
csr_dyn_gk_spmv_pred["Actual Time"] = a_time
csr_dyn_gk_spmv_pred["Predicted Time"] = p_time
csr_dyn_gk_spmv_pred["Error"] = err

NACA0015 2.7777 & 2.4513 & 11.75\% \\ \hline
delaunay_n20 2.5088 & 2.476 & 1.309\% \\ \hline
NACA0015 2.4091 & 2.2635 & 6.044\% \\ \hline
delaunay_n20 2.473 & 2.2867 & 7.532\% \\ \hline
NACA0015 2.3541 & 2.762 & 17.33\% \\ \hline
delaunay_n20 2.3872 & 2.7926 & 16.98\% \\ \hline
AS365 8.7472 & 8.6934 & 0.6152\% \\ \hline
NLR 9.5694 & 9.5186 & 0.5308\% \\ \hline
AS365 8.4277 & 8.3564 & 0.8457\% \\ \hline
NLR 9.3346 & 9.1646 & 1.821\% \\ \hline
AS365 9.6471 & 10.832 & 12.28\% \\ \hline
NLR 10.807 & 11.905 & 10.16\% \\ \hline
hugetrace-00010 28.331 & 26.781 & 5.472\% \\ \hline
road_central 32.657 & 31.153 & 4.606\% \\ \hline
hugetrace-00010 24.097 & 26.22 & 8.813\% \\ \hline
nlpkkt200 37.653 & 39.691 & 5.413\% \\ \hline
road_central 27.895 & 30.588 & 9.653\% \\ \hline
hugetrace-00010 37.824 & 34.545 & 8.669\% \\ \hline
nlpkkt200 37.447 & 38.379 & 2.489\% \\ \hline
road_central 45.002 & 40.341 & 10.36\% \\ \hline
nlpkkt200 55.888 & 54.257 & 2.918\% \\ \hline
best param:  {'C': 60000.0, 'eps

# Dynamic COO GK-SpMV

In [7]:
coo_dyn_gk_spmv_pred = pd.DataFrame()
mat = [] 
node = []
prcs = []
rows = []
nnz_r = []
a_time = []
p_time = []
err = []
spmv_model = []
m_size = []

coo_kway_spmv = pd.read_csv("../../SpMV_Model/src/SkylakeResults/MPI_KWAY_COO_SpMV.csv")
coo_kway_spmv = coo_kway_spmv[~coo_kway_spmv['Name'].str.contains("rmat")].reset_index(drop=True)
coo_kway_spmv = coo_kway_spmv.groupby(["Name", "Nodes", "nProcess", "DataType"]).mean().reset_index().sort_values(by="Name").reset_index()
coo_kway_spmv.dropna(
    axis=0,
    how='any',
    thresh=None,
    subset=None,
    inplace=True
)
coo_kway_spmv = coo_kway_spmv.reset_index()
coo_kway_spmv = coo_kway_spmv.sort_values(by=["AvgRows", "AvgNPR"]).reset_index(drop=True)

density = []
min_time_factor = 1.0
if min(coo_kway_spmv["AvgTime"]) < 1.0:
    min_time_factor = 1/min(coo_kway_spmv["AvgTime"])
for i,r in coo_kway_spmv.iterrows():
    coo_kway_spmv.at[i, "AvgTime"] = min_time_factor*r["AvgTime"]
    density.append(float(r["AvgNNZ"])/float(r["AvgRows"] * r["AvgRows"]))
coo_kway_spmv["Density"] = density

column = "AvgTime"
coo_kway_train_data = coo_kway_spmv[~coo_kway_spmv["Name"].isin(test_matrices)].reset_index(drop=True)
coo_kway_test_data = coo_kway_spmv[coo_kway_spmv["Name"].isin(test_matrices)].reset_index(drop=True)

coo_kway_train = coo_kway_train_data[["Nodes", "nProcess", "AvgRows", "AvgNNZ", "AvgNPR", "AvgNPRSD", "Density"]].to_numpy()

coo_kway_test = coo_kway_test_data[["Nodes", "nProcess", "AvgRows", "AvgNNZ", "AvgNPR", "AvgNPRSD", "Density"]].to_numpy()

coo_kway_X = np.concatenate((coo_kway_train, coo_kway_test), axis=0)
scaler = MinMaxScaler()  # Default behavior is to scale to [0,1]
coo_kway_X = scaler.fit_transform(coo_kway_X)
X = coo_kway_X[0:len(coo_kway_train)]
coo_kway_test_X = coo_kway_X[len(coo_kway_train):]
train_y = np.array(coo_kway_train_data[column])
test_y = np.array(coo_kway_test_data[column])

scores = []
best = 0
# c=20e4
# e=0.001
for c in np.linspace(2e1,6e4, num=50):
    for e in np.linspace(0.0001, 0.9, 20):
        best_svr = SVR(kernel='poly', C=c, gamma='auto', degree=3, epsilon=e, coef0=1)
        cv = KFold(n_splits=5, random_state=42, shuffle=True)
        for train_index, test_index in cv.split(X):
            X_train, X_test, y_train, y_test = X[train_index], X[test_index], train_y[train_index], train_y[test_index]
            best_svr.fit(X_train, y_train)
            scores.append(best_svr.score(X_test, np.array(y_test)))
            if best < np.mean(scores):
                best = np.mean(scores)
                best_param = {"C": c, "epsilon": e}
svr = SVR(kernel='poly', C=best_param['C'], gamma='auto', degree=3, epsilon=best_param['epsilon'], coef0=1)
svr.fit(X, train_y)
y_pred = svr.predict(coo_kway_test_X)

max_err = 0
avg_err = 0
for i,row in coo_kway_test_data.iterrows():
#     print(y[index])
#     g = row["Name"].split("_")
    print(row["Name"], '{0:.5g}'.format(test_y[i]/min_time_factor),"&",'{0:.5g}'.format(y_pred[i]/min_time_factor), "&", 
          '{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])+"\\%", "\\\ \hline")           
    if max_err < abs(test_y[i]-y_pred[i])*100/test_y[i]:
        max_err = abs(test_y[i]-y_pred[i])*100/test_y[i]
    avg_err += abs(test_y[i]-y_pred[i])*100/test_y[i]
    mat.append(row["Name"])
    node.append(row["Nodes"])
    prcs.append(row["nProcess"])
    rows.append(row["AvgRows"])
    nnz_r.append(row["AvgNPR"])
    a_time.append(float('{0:.5g}'.format(test_y[i]/min_time_factor)))
    p_time.append(float('{0:.5g}'.format(y_pred[i]/min_time_factor)))
    err.append(float('{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])))
    spmv_model.append("COO GK-SPMV")
    m_size.append(row["MatrixSize"])
# omp_data.groupby("Threads").count()
print("best param: ", best_param)
print("max error: ", max_err, " avg error: ", avg_err/coo_kway_test_data.shape[0])
coo_dyn_gk_spmv_pred["Name"] = mat
coo_dyn_gk_spmv_pred["Model"] = spmv_model
coo_dyn_gk_spmv_pred["Nodes"] = node
coo_dyn_gk_spmv_pred["Process"] = prcs
coo_dyn_gk_spmv_pred["Matrix Size"] = m_size
coo_dyn_gk_spmv_pred["Avg Row"] = rows
coo_dyn_gk_spmv_pred["Nonzero per Row"] = nnz_r
coo_dyn_gk_spmv_pred["Actual Time"] = a_time
coo_dyn_gk_spmv_pred["Predicted Time"] = p_time
coo_dyn_gk_spmv_pred["Error"] = err

NACA0015 4.0225 & 3.9623 & 1.495\% \\ \hline
delaunay_n20 4.2734 & 4.0288 & 5.725\% \\ \hline
NACA0015 4.3269 & 4.0312 & 6.834\% \\ \hline
delaunay_n20 4.2676 & 4.1035 & 3.845\% \\ \hline
NACA0015 4.463 & 4.7264 & 5.903\% \\ \hline
delaunay_n20 4.474 & 4.8253 & 7.854\% \\ \hline
AS365 13.111 & 13.125 & 0.1139\% \\ \hline
NLR 14.361 & 14.347 & 0.1017\% \\ \hline
AS365 14.308 & 14.451 & 0.9964\% \\ \hline
NLR 15.71 & 15.844 & 0.8555\% \\ \hline
AS365 16.718 & 17.946 & 7.35\% \\ \hline
NLR 18.709 & 19.715 & 5.38\% \\ \hline
hugetrace-00010 35.209 & 32.556 & 7.535\% \\ \hline
road_central 39.166 & 36.308 & 7.299\% \\ \hline
hugetrace-00010 32.724 & 35.407 & 8.199\% \\ \hline
nlpkkt200 124.74 & 127.4 & 2.131\% \\ \hline
road_central 36.309 & 39.235 & 8.059\% \\ \hline
hugetrace-00010 48.825 & 45.583 & 6.639\% \\ \hline
nlpkkt200 156.42 & 157.42 & 0.6382\% \\ \hline
road_central 55.172 & 50.831 & 7.867\% \\ \hline
nlpkkt200 195.83 & 193.15 & 1.368\% \\ \hline
best param:  {'C': 60000.0, 'eps

# Dynamic CSR 2D-Partitioning SpMV

In [8]:
csr_dyn_2d_pred = pd.DataFrame()
mat = [] 
node = []
prcs = []
rows = []
nnz_r = []
a_time = []
p_time = []
err = []
spmv_model = []
m_size = []

csr_2d_spmv = pd.read_csv("../../SpMV_Model/src/SkylakeResults/MPI_CSR_2D_SpMV.csv")
csr_2d_spmv = csr_2d_spmv[~csr_2d_spmv['Name'].str.contains("rmat")].reset_index(drop=True)
csr_2d_spmv = csr_2d_spmv.groupby(["Name", "Nodes", "nProcess", "DataType"]).mean().reset_index().sort_values(by="Name").reset_index()
csr_2d_spmv.dropna(
    axis=0,
    how='any',
    thresh=None,
    subset=None,
    inplace=True
)
csr_2d_spmv = csr_2d_spmv.reset_index(drop=True)
csr_2d_spmv = csr_2d_spmv.sort_values(by=["AvgRow", "NonZeroPerRow"]).reset_index(drop=True)

density = []
min_time_factor = 1.0
if min(csr_2d_spmv["AvgTime"]) < 1.0:
    min_time_factor = 1/min(csr_2d_spmv["AvgTime"])
for i,r in csr_2d_spmv.iterrows():
    csr_2d_spmv.at[i, "AvgTime"] = min_time_factor*r["AvgTime"]
    density.append(float(r["NNZ"])/float(r["AvgRow"] * r["AvgRow"]))
csr_2d_spmv["Density"] = density

column = "AvgTime"
csr_2d_train_data = csr_2d_spmv[~csr_2d_spmv["Name"].isin(test_matrices)].reset_index(drop=True)
csr_2d_test_data = csr_2d_spmv[csr_2d_spmv["Name"].isin(test_matrices)].reset_index(drop=True)

csr_2d_train = csr_2d_train_data[["Nodes", "nProcess", "AvgRow", "NNZ", "NonZeroPerRow", "AvgNPRSD", "Density"]].to_numpy()

csr_2d_test = csr_2d_test_data[["Nodes", "nProcess", "AvgRow", "NNZ", "NonZeroPerRow", "AvgNPRSD", "Density"]].to_numpy()

csr_2d_X = np.concatenate((csr_2d_train, csr_2d_test), axis=0)
scaler = MinMaxScaler()  # Default behavior is to scale to [0,1]
csr_2d_X = scaler.fit_transform(csr_2d_X)
X = csr_2d_X[0:len(csr_2d_train)]
csr_2d_test_X = csr_2d_X[len(csr_2d_train):]
train_y = np.array(csr_2d_train_data[column])
test_y = np.array(csr_2d_test_data[column])

scores = []
best = 0
# c=20e4
# e=0.001
for c in np.linspace(2e1,6e4, num=50):
    for e in np.linspace(0.0001, 0.9, 20):
        best_svr = SVR(kernel='poly', C=c, gamma='auto', degree=3, epsilon=e, coef0=1)
        cv = KFold(n_splits=5, random_state=42, shuffle=True)
        for train_index, test_index in cv.split(X):
            X_train, X_test, y_train, y_test = X[train_index], X[test_index], train_y[train_index], train_y[test_index]
            best_svr.fit(X_train, y_train)
            scores.append(best_svr.score(X_test, np.array(y_test)))
            if best < np.mean(scores):
                best = np.mean(scores)
                best_param = {"C": c, "epsilon": e}
svr = SVR(kernel='poly', C=best_param['C'], gamma='auto', degree=3, epsilon=best_param['epsilon'], coef0=1)
svr.fit(X, train_y)
y_pred = svr.predict(csr_2d_test_X)
max_err = 0
avg_err = 0
for i,row in csr_2d_test_data.iterrows():
#     print(y[index])
#     g = row["Name"].split("_")
    print(row["Name"], "&", '{0:.5g}'.format(test_y[i]/min_time_factor),"&",'{0:.5g}'.format(y_pred[i]/min_time_factor), "&", 
          '{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])+"\\%", "\\\ \hline")           
    if max_err < abs(test_y[i]-y_pred[i])*100/test_y[i]:
        max_err = abs(test_y[i]-y_pred[i])*100/test_y[i]
    avg_err += abs(test_y[i]-y_pred[i])*100/test_y[i]
    mat.append(row["Name"])
    node.append(row["Nodes"])
    prcs.append(row["nProcess"])
    rows.append(row["AvgRow"])
    nnz_r.append(row["NonZeroPerRow"])
    a_time.append(float('{0:.5g}'.format(test_y[i]/min_time_factor)))
    p_time.append(float('{0:.5g}'.format(y_pred[i]/min_time_factor)))
    err.append(float('{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])))
    spmv_model.append("CSR 2D Partition")
    m_size.append(row["MatrixSize"])
# omp_data.groupby("Threads").count()
print("best param: ", best_param)
print("max error: ", max_err, " avg error: ", avg_err/csr_2d_test_data.shape[0])
csr_dyn_2d_pred["Name"] = mat
csr_dyn_2d_pred["Model"] = spmv_model
csr_dyn_2d_pred["Nodes"] = node
csr_dyn_2d_pred["Process"] = prcs
csr_dyn_2d_pred["Matrix Size"] = m_size
csr_dyn_2d_pred["Avg Row"] = rows
csr_dyn_2d_pred["Nonzero per Row"] = nnz_r
csr_dyn_2d_pred["Actual Time"] = a_time
csr_dyn_2d_pred["Predicted Time"] = p_time
csr_dyn_2d_pred["Error"] = err

NACA0015 & 1.3734 & 1.4011 & 2.02\% \\ \hline
delaunay_n20 & 1.3901 & 1.4138 & 1.704\% \\ \hline
NACA0015 & 1.5737 & 1.65 & 4.848\% \\ \hline
delaunay_n20 & 1.5816 & 1.6652 & 5.286\% \\ \hline
NACA0015 & 1.7328 & 1.8732 & 8.104\% \\ \hline
delaunay_n20 & 1.7529 & 1.8906 & 7.856\% \\ \hline
AS365 & 4.238 & 5.0642 & 19.49\% \\ \hline
NLR & 4.9986 & 5.5551 & 11.13\% \\ \hline
AS365 & 5.4516 & 6.0248 & 10.51\% \\ \hline
AS365 & 6.9119 & 6.8982 & 0.1992\% \\ \hline
NLR & 6.1033 & 6.6114 & 8.325\% \\ \hline
NLR & 7.6207 & 7.5713 & 0.648\% \\ \hline
hugetrace-00010 & 14.259 & 14.057 & 1.416\% \\ \hline
hugetrace-00010 & 17.407 & 16.655 & 4.32\% \\ \hline
road_central & 17.721 & 15.904 & 10.25\% \\ \hline
hugetrace-00010 & 20.348 & 19.195 & 5.665\% \\ \hline
nlpkkt200 & 46.784 & 45.241 & 3.299\% \\ \hline
road_central & 18.95 & 18.808 & 0.7466\% \\ \hline
road_central & 22.679 & 21.706 & 4.291\% \\ \hline
nlpkkt200 & 58.7 & 55.359 & 5.692\% \\ \hline
nlpkkt200 & 72.959 & 61.929 & 15.12\% \\ \h

# Dynamic COO 2D-Partitioning SpMV

In [9]:
coo_dyn_2d_pred = pd.DataFrame()
mat = [] 
node = []
prcs = []
rows = []
nnz_r = []
a_time = []
p_time = []
err = []
spmv_model = []
m_size = []
coo_2d_spmv = pd.read_csv("../../SpMV_Model/src/SkylakeResults/MPI_COO_2D_SpMV.csv")
coo_2d_spmv = coo_2d_spmv[~coo_2d_spmv['Name'].str.contains("rmat")].reset_index(drop=True)
coo_2d_spmv = coo_2d_spmv.groupby(["Name", "Nodes", "nProcess", "DataType"]).mean().reset_index().sort_values(by="Name").reset_index()
coo_2d_spmv.dropna(
    axis=0,
    how='any',
    thresh=None,
    subset=None,
    inplace=True
)
coo_2d_spmv = coo_2d_spmv.reset_index(drop=True)
coo_2d_spmv = coo_2d_spmv.sort_values(by=["AvgRow", "NonZeroPerRow"]).reset_index(drop=True)

density = []
min_time_factor = 1.0
if min(coo_2d_spmv["AvgTime"]) < 1.0:
    min_time_factor = 1/min(coo_2d_spmv["AvgTime"])
for i,r in coo_2d_spmv.iterrows():
    coo_2d_spmv.at[i, "AvgTime"] = min_time_factor*r["AvgTime"]
    density.append(float(r["NNZ"])/float(r["AvgRow"] * r["AvgRow"]))
coo_2d_spmv["Density"] = density

column = "AvgTime"
coo_2d_train_data = coo_2d_spmv[~coo_2d_spmv["Name"].isin(test_matrices)].reset_index(drop=True)
coo_2d_test_data = coo_2d_spmv[coo_2d_spmv["Name"].isin(test_matrices)].reset_index(drop=True)

coo_2d_train = coo_2d_train_data[["Nodes", "nProcess", "AvgRow", "NNZ", "NonZeroPerRow", "AvgNPRSD", "Density"]].to_numpy()

coo_2d_test = coo_2d_test_data[["Nodes", "nProcess", "AvgRow", "NNZ", "NonZeroPerRow", "AvgNPRSD", "Density"]].to_numpy()

coo_2d_X = np.concatenate((coo_2d_train, coo_2d_test), axis=0)
scaler = MinMaxScaler()  # Default behavior is to scale to [0,1]
coo_2d_X = scaler.fit_transform(coo_2d_X)
X = coo_2d_X[0:len(coo_2d_train)]
coo_2d_test_X = coo_2d_X[len(coo_2d_train):]
train_y = np.array(coo_2d_train_data[column])
test_y = np.array(coo_2d_test_data[column])

scores = []
best = 0
# c=20e4
# e=0.001
for c in np.linspace(2e1,5e5, num=50):
    for e in np.linspace(0.0001, 0.1, 20):
        best_svr = SVR(kernel='poly', C=c, gamma='auto', degree=3, epsilon=e, coef0=1)
        cv = KFold(n_splits=5, random_state=42, shuffle=True)
        for train_index, test_index in cv.split(X):
            X_train, X_test, y_train, y_test = X[train_index], X[test_index], train_y[train_index], train_y[test_index]
            best_svr.fit(X_train, y_train)
            scores.append(best_svr.score(X_test, np.array(y_test)))
            if best < np.mean(scores):
                best = np.mean(scores)
                best_param = {"C": c, "epsilon": e}
svr = SVR(kernel='poly', C=best_param['C'], gamma='auto', degree=3, epsilon=best_param['epsilon'], coef0=1)
svr.fit(X, train_y)
y_pred = svr.predict(coo_2d_test_X)

max_err = 0
avg_err = 0
for i,row in coo_2d_test_data.iterrows():
#     print(y[index])
#     g = row["Name"].split("_")
    print(row["Name"], "&", '{0:.5g}'.format(test_y[i]/min_time_factor),"&",'{0:.5g}'.format(y_pred[i]/min_time_factor), "&", 
          '{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])+"\\%", "\\\ \hline")           
    if max_err < abs(test_y[i]-y_pred[i])*100/test_y[i]:
        max_err = abs(test_y[i]-y_pred[i])*100/test_y[i]
    avg_err += abs(test_y[i]-y_pred[i])*100/test_y[i]
    mat.append(row["Name"])
    node.append(row["Nodes"])
    prcs.append(row["nProcess"])
    rows.append(row["AvgRow"])
    nnz_r.append(row["NonZeroPerRow"])
    a_time.append(float('{0:.5g}'.format(test_y[i]/min_time_factor)))
    p_time.append(float('{0:.5g}'.format(y_pred[i]/min_time_factor)))
    err.append(float('{0:.4g}'.format(abs(test_y[i]-y_pred[i])*100/test_y[i])))
    spmv_model.append("COO 2D Partition")
    m_size.append(row["MatrixSize"])
# omp_data.groupby("Threads").count()
print("best param: ", best_param)
print("max error: ", max_err, " avg error: ", avg_err/coo_2d_test_data.shape[0])
coo_dyn_2d_pred["Name"] = mat
coo_dyn_2d_pred["Model"] = spmv_model
coo_dyn_2d_pred["Nodes"] = node
coo_dyn_2d_pred["Process"] = prcs
coo_dyn_2d_pred["Matrix Size"] = m_size
coo_dyn_2d_pred["Avg Row"] = rows
coo_dyn_2d_pred["Nonzero per Row"] = nnz_r
coo_dyn_2d_pred["Actual Time"] = a_time
coo_dyn_2d_pred["Predicted Time"] = p_time
coo_dyn_2d_pred["Error"] = err

NACA0015 & 1.4643 & 0.95166 & 35.01\% \\ \hline
delaunay_n20 & 1.4995 & 0.95993 & 35.98\% \\ \hline
NACA0015 & 1.0895 & 1.1455 & 5.135\% \\ \hline
delaunay_n20 & 1.1012 & 1.1557 & 4.948\% \\ \hline
NACA0015 & 1.5212 & 1.3594 & 10.63\% \\ \hline
delaunay_n20 & 1.4511 & 1.372 & 5.445\% \\ \hline
AS365 & 3.3986 & 3.4065 & 0.2306\% \\ \hline
NLR & 3.6584 & 3.7438 & 2.335\% \\ \hline
AS365 & 4.0799 & 4.1738 & 2.302\% \\ \hline
AS365 & 5.0671 & 5.0365 & 0.6031\% \\ \hline
NLR & 4.4726 & 4.5861 & 2.538\% \\ \hline
NLR & 5.577 & 5.5317 & 0.8124\% \\ \hline
hugetrace-00010 & 10.713 & 10.434 & 2.603\% \\ \hline
hugetrace-00010 & 12.535 & 12.339 & 1.567\% \\ \hline
road_central & 12.147 & 12.05 & 0.7994\% \\ \hline
hugetrace-00010 & 15.397 & 14.549 & 5.508\% \\ \hline
nlpkkt200 & 34.334 & 30.795 & 10.31\% \\ \hline
road_central & 13.935 & 14.084 & 1.069\% \\ \hline
road_central & 17.1 & 16.478 & 3.64\% \\ \hline
nlpkkt200 & 45.03 & 40.198 & 10.73\% \\ \hline
nlpkkt200 & 55.538 & 47.942 & 13.68\% 

# Overall Evalution

In [10]:
pred = pd.concat([csr_dyn_lk_spmv_pred, coo_dyn_lk_spmv_pred, csr_dyn_gk_spmv_pred, coo_dyn_gk_spmv_pred, csr_dyn_2d_pred])
pred = pred.reset_index(drop=True)

In [15]:
pred = pred.sort_values(by=["Name"]).reset_index(drop=True)
models = ["CSR LK-SPMV", "COO LK-SPMV", "CSR GK-SPMV", "COO GK-SPMV", "CSR 2D Partition"]
print("\multirow{2}{*}{Name} & \multirow{2}{*}{Nodes} & \multirow{2}{*}{Processes} & \multicolumn{5}{c |} {Error\%} \\\\ \cline{4-8}")
print(" &  &  & CSR L1DR & COO L1DRV & CSR G1DR & COO G1DR & CSR 2DU \\\\ \hline")
for g,g_data in pred.groupby("Name"):
    for n,n_data in g_data.groupby("Nodes"):
        for p,p_data in n_data.groupby("Process"):
            p_min = p_data["Predicted Time"].min()
            pr_best_a = p_data[p_data["Predicted Time"] == p_min]["Model"].iloc[0]
            a_min = p_data["Actual Time"].min()
            ac_best_a = p_data[p_data["Actual Time"] == a_min]["Model"].iloc[0]
#             print(pr_best_a, " ", ac_best_a)
            print(g, ' & ', n, ' & ', p, end=' & ')
            i=1
            for m in models: #,m_data in p_data.groupby("Model"):
                m_data = p_data[p_data["Model"] == m].reset_index(drop=True)
                if m==ac_best_a:
                    if pr_best_a==ac_best_a and pr_best_a == m:
                        print("\cellcolor{green!25} ", float('{0:.3g}'.format(m_data["Error"].iloc[0])), end=' & ' if i<len(p_data) else ' \\\\ \hline')
                    else:
                        print("\cellcolor{blue!25} ", float('{0:.3g}'.format(m_data["Error"].iloc[0])), end=' & ' if i<len(p_data) else ' \\\\ \hline')
                elif m==pr_best_a:
                    print("\cellcolor{red!25} ", float('{0:.3g}'.format(m_data["Error"].iloc[0])), end=' & ' if i<len(p_data) else ' \\\\ \hline')
                else:
                    print(float('{0:.3g}'.format(m_data["Error"].iloc[0])), end=' & ' if i<len(p_data) else ' \\\\ \hline')
                i+=1
            print()
    
# pred

\multirow{2}{*}{Name} & \multirow{2}{*}{Nodes} & \multirow{2}{*}{Processes} & \multicolumn{5}{c |} {Error\%} \\ \cline{4-8}
 &  &  & CSR L1DR & COO L1DRV & CSR G1DR & COO G1DR & CSR 2DU \\ \hline
AS365  &  3  &  144 & \cellcolor{green!25}  25.7 & 28.9 & 12.3 & 7.35 & 0.199 \\ \hline
AS365  &  4  &  169 & \cellcolor{green!25}  18.8 & 19.9 & 0.846 & 0.996 & 10.5 \\ \hline
AS365  &  5  &  225 & \cellcolor{green!25}  23.2 & 23.1 & 0.615 & 0.114 & 19.5 \\ \hline
NACA0015  &  3  &  144 & \cellcolor{red!25}  15.6 & \cellcolor{blue!25}  7.05 & 17.3 & 5.9 & 8.1 \\ \hline
NACA0015  &  4  &  169 & \cellcolor{red!25}  8.53 & \cellcolor{blue!25}  10.9 & 6.04 & 6.83 & 4.85 \\ \hline
NACA0015  &  5  &  225 & \cellcolor{red!25}  20.5 & \cellcolor{blue!25}  8.74 & 11.8 & 1.5 & 2.02 \\ \hline
NLR  &  3  &  144 & \cellcolor{green!25}  21.2 & 23.0 & 10.2 & 5.38 & 0.648 \\ \hline
NLR  &  4  &  169 & \cellcolor{green!25}  21.4 & 21.6 & 1.82 & 0.856 & 8.32 \\ \hline
NLR  &  5  &  225 & \cellcolor{green!25}  

In [125]:
for g,g_data in pred.groupby("Name"):
    for n,n_data in g_data.groupby("Nodes"):
        for p,p_data in n_data.groupby("Process"):
            print(g, p, end=' ')
            for m,m_data in p_data.groupby("Model"):
                print(m, m_data["Actual Time"].iloc[0], end=" ")
            print()

AS365 144 COO GK-SPMV 16.718 COO LK-SPMV 1.6173 CSR 2D Partition 0.26972 CSR GK-SPMV 9.6471 CSR LK-SPMV 1.4994 
AS365 169 COO GK-SPMV 14.308 COO LK-SPMV 1.233 CSR 2D Partition 0.21273 CSR GK-SPMV 8.4277 CSR LK-SPMV 1.1788 
AS365 225 COO GK-SPMV 13.111 COO LK-SPMV 1.0329 CSR 2D Partition 0.16538 CSR GK-SPMV 8.7472 CSR LK-SPMV 1.0072 
NACA0015 144 COO GK-SPMV 4.463 COO LK-SPMV 0.40619 CSR 2D Partition 0.067617 CSR GK-SPMV 2.3541 CSR LK-SPMV 0.43359 
NACA0015 169 COO GK-SPMV 4.3269 COO LK-SPMV 0.28619 CSR 2D Partition 0.061411 CSR GK-SPMV 2.4091 CSR LK-SPMV 0.33981 
NACA0015 225 COO GK-SPMV 4.0225 COO LK-SPMV 0.30284 CSR 2D Partition 0.053592 CSR GK-SPMV 2.7777 CSR LK-SPMV 0.34201 
NLR 144 COO GK-SPMV 18.709 COO LK-SPMV 1.6422 CSR 2D Partition 0.29737 CSR GK-SPMV 10.807 CSR LK-SPMV 1.5512 
NLR 169 COO GK-SPMV 15.71 COO LK-SPMV 1.3698 CSR 2D Partition 0.23816 CSR GK-SPMV 9.3346 CSR LK-SPMV 1.3235 
NLR 225 COO GK-SPMV 14.361 COO LK-SPMV 1.2148 CSR 2D Partition 0.19505 CSR GK-SPMV 9.5694 CSR

In [12]:
def get_optimal(_test_matrices, _csr_kway_pred, _coo_kway_pred, _csr_local_kway_pred, _coo_local_kway_pred, _csr_2d_pred):
    _mat = []
    _p = []
    _algo = []
    _ralgo = []
    _csr_g1dr = []
    _coo_g1dr = []
    _csr_l1dr = []
    _coo_l1dr = []
    _csr_2d = []
    _coo_2d = []
    _pr_csr_g1dr = []
    _pr_coo_g1dr = []
    _pr_csr_l1dr = []
    _pr_coo_l1dr = []
    _pr_csr_2d = []
    _pr_coo_2d = []
    _right = []
    _ac = []
    _pr = []
    _optimal = pd.DataFrame()
    for m in _test_matrices:
        for p in [144, 169, 225]:
            _p.append(p)
            _mat.append(m)
#             print(p, m)
            al1 = _csr_kway_pred[(_csr_kway_pred["Name"] == m) & (_csr_kway_pred["Process"] == p)].reset_index(drop=True).iloc[0]
            csr_gk = al1["Predicted Time"]
            min_t = csr_gk;
            min_a = "CSR G1DR SpMV"
            r_a = "CSR G1DR SpMV"
            r_t = al1["Actual Time"]
            _csr_g1dr.append(al1["Actual Time"])
            _pr_csr_g1dr.append(al1["Predicted Time"])
            al5 = _csr_2d_pred[(_csr_2d_pred["Name"] == m) & (_csr_2d_pred["Process"] == p)].reset_index(drop=True).iloc[0]
            csr_2d = al5["Predicted Time"]
            _csr_2d.append(al5["Actual Time"])
            _pr_csr_2d.append(al5["Predicted Time"])
            if min_t > csr_2d:
                min_t = csr_2d
                min_a = "CSR 2DU"
            if r_t > al5["Actual Time"]:
                r_t = al5["Actual Time"]
                r_a = "CSR 2DU"
            al2 = _coo_kway_pred[(_coo_kway_pred["Name"] == m) & (_coo_kway_pred["Process"] == p)].reset_index(drop=True).iloc[0]
            coo_gk = al2["Predicted Time"]
            _coo_g1dr.append(al2["Actual Time"])
            _pr_coo_g1dr.append(al2["Predicted Time"])
            if min_t > coo_gk:
                min_t = coo_gk
                min_a = "COO G1DR SpMV"
            if r_t > al2["Actual Time"]:
                r_t = al2["Actual Time"]
                r_a = "COO G1DR SpMV"
            al3 = _csr_local_kway_pred[(_csr_local_kway_pred["Name"] == m) & (_csr_local_kway_pred["Process"] == p)].reset_index(drop=True).iloc[0]
            csr_lk = al3["Predicted Time"]
            _csr_l1dr.append(al3["Actual Time"])
            _pr_csr_l1dr.append(al3["Predicted Time"])
            if min_t > csr_lk:
                min_t = csr_lk
                min_a = "CSR L1DR SpMV"
            if r_t > al3["Actual Time"]:
                r_t = al3["Actual Time"]
                r_a = "CSR L1DR SpMV"
            al4 = _coo_local_kway_pred[(_coo_local_kway_pred["Name"] == m) & (_coo_local_kway_pred["Process"] == p)].reset_index(drop=True).iloc[0]
            coo_lk = al4["Predicted Time"]
            _coo_l1dr.append(al4["Actual Time"])
            _pr_coo_l1dr.append(al4["Predicted Time"])
            if min_t > coo_lk:
                min_t = coo_lk
                min_a = "COO L1DR SpMV"
            if r_t > al4["Actual Time"]:
                r_t = al4["Actual Time"]
                r_a = "COO L1DR SpMV"
            _algo.append(min_a)
            _ralgo.append(r_a)
            _ac.append(r_t)
            _pr.append(min_t)
            if min_a == r_a:
                _right.append(1)
            else:
                _right.append(0)
    #         print(p, " algo: ", min_a, " time:", min_t, " csr lk:", csr_lk, " csr gk:", csr_gk, " csr 2d:", csr_2d, " coo lk:", coo_lk, " coo gk:", coo_gk, " coo 2d:", coo_2d)
    _optimal["Matrices"] = _mat
    _optimal["Processes"] = _p
    _optimal["Predicted Best Strategy"] = _algo
    _optimal["Actual Best Strategy"] = _ralgo
    _optimal["R/W"] = _right
    _optimal["CSR L1DR SpMV"] = _csr_l1dr
    _optimal["CSR L1DR SpMV Predicted"] = _pr_csr_l1dr
    _optimal["COO L1DR SpMV"] = _coo_l1dr
    _optimal["COO L1DR SpMV Predicted"] = _pr_coo_l1dr
    _optimal["CSR G1DR SpMV"] = _csr_g1dr
    _optimal["CSR G1DR SpMV Predicted"] = _pr_csr_g1dr
    _optimal["COO G1DR SpMV"] = _coo_g1dr
    _optimal["COO G1DR SpMV Predicted"] = _pr_coo_g1dr
    _optimal["CSR 2DU"] = _csr_2d
    _optimal["CSR 2DU Predicted"] = _pr_csr_2d
    _optimal["Predicted Time"] = _ac
    _optimal["Actual Time"] = _pr
    _optimal[_optimal["Processes"] == 225]
    return _optimal

In [13]:
optimal = get_optimal(test_matrices, csr_dyn_gk_spmv_pred, coo_dyn_gk_spmv_pred, csr_dyn_lk_spmv_pred, coo_dyn_lk_spmv_pred, csr_dyn_2d_pred)
optimal

Unnamed: 0,Matrices,Processes,Predicted Best Strategy,Actual Best Strategy,R/W,CSR L1DR SpMV,CSR L1DR SpMV Predicted,COO L1DR SpMV,COO L1DR SpMV Predicted,CSR G1DR SpMV,CSR G1DR SpMV Predicted,COO G1DR SpMV,COO G1DR SpMV Predicted,CSR 2DU,CSR 2DU Predicted,Predicted Time,Actual Time
0,delaunay_n20,144,CSR L1DR SpMV,COO L1DR SpMV,0,0.49596,0.35153,0.42821,0.36615,2.3872,2.7926,4.474,4.8253,1.7529,1.8906,0.42821,0.35153
1,delaunay_n20,169,CSR L1DR SpMV,COO L1DR SpMV,0,0.39296,0.29801,0.33376,0.30824,2.473,2.2867,4.2676,4.1035,1.5816,1.6652,0.33376,0.29801
2,delaunay_n20,225,CSR L1DR SpMV,CSR L1DR SpMV,1,0.34137,0.26005,0.36322,0.26905,2.5088,2.476,4.2734,4.0288,1.3901,1.4138,0.34137,0.26005
3,NACA0015,144,CSR L1DR SpMV,COO L1DR SpMV,0,0.43359,0.36577,0.40619,0.37756,2.3541,2.762,4.463,4.7264,1.7328,1.8732,0.40619,0.36577
4,NACA0015,169,CSR L1DR SpMV,COO L1DR SpMV,0,0.33981,0.31084,0.28619,0.31731,2.4091,2.2635,4.3269,4.0312,1.5737,1.65,0.28619,0.31084
5,NACA0015,225,CSR L1DR SpMV,COO L1DR SpMV,0,0.34201,0.27196,0.30284,0.27636,2.7777,2.4513,4.0225,3.9623,1.3734,1.4011,0.30284,0.27196
6,AS365,144,CSR L1DR SpMV,CSR L1DR SpMV,1,1.4994,1.1135,1.6173,1.1505,9.6471,10.832,16.718,17.946,6.9119,6.8982,1.4994,1.1135
7,AS365,169,CSR L1DR SpMV,CSR L1DR SpMV,1,1.1788,0.95697,1.233,0.98733,8.4277,8.3564,14.308,14.451,5.4516,6.0248,1.1788,0.95697
8,AS365,225,CSR L1DR SpMV,CSR L1DR SpMV,1,1.0072,0.77312,1.0329,0.79383,8.7472,8.6934,13.111,13.125,4.238,5.0642,1.0072,0.77312
9,road_central,144,COO L1DR SpMV,COO L1DR SpMV,1,3.1775,2.9176,2.884,2.8289,45.002,40.341,55.172,50.831,22.679,21.706,2.884,2.8289


In [14]:
print("performance of all")
# for i,r in optimal.iterrows():
#     print(r["Matrices"], " & \\backslashbox{", round(r["CSR L1DR SpMV"], 2),"}{",round(r["CSR L1DR SpMV Predicted"], 2), "} & \\backslashbox{", round(r["COO L1DR SpMV"], 2), "}{", round(r["COO L1DR SpMV Predicted"], 2), "} & \\backslashbox{", round(r["CSR G1DR SpMV"], 2), "}{", round(r["CSR G1DR SpMV Predicted"], 2), "} & \\backslashbox{", round(r["COO G1DR SpMV"], 2), "}{", round(r["COO G1DR SpMV Predicted"], 2), "} & \\backslashbox{", round(r["CSR 2DU"], 2), "}{", round(r["CSR 2DU Predicted"], 2), "} & \\backslashbox{", round(r["COO 2DU"], 2), "}{", round(r["COO 2DU Predicted"], 2), "} \\\ \hline")
for i,r in optimal.sort_values(by="Matrices").reset_index().iterrows():
    print(r["Matrices"], " & ", r["Processes"], " & ", round(r["CSR L1DR SpMV"], 2)," & ",round(r["CSR L1DR SpMV Predicted"], 2), " & ", round(r["COO L1DR SpMV"], 2), " & ", round(r["COO L1DR SpMV Predicted"], 2), " & ", round(r["CSR G1DR SpMV"], 2), " & ", round(r["CSR G1DR SpMV Predicted"], 2), " & ", round(r["COO G1DR SpMV"], 2), " & ", round(r["COO G1DR SpMV Predicted"], 2), " & ", round(r["CSR 2DU"], 2), " & ", round(r["CSR 2DU Predicted"], 2), " \\\ \hline")

print("Best Alforithm")
for i,r in optimal.iterrows():
    print(r["Matrices"], " & ", r["Predicted Best Strategy"], " & ", r["Actual Best Strategy"], " & ", r["Predicted Time"], " & ", r["Actual Time"], " & ", r["R/W"], " \\\ \hline")

performance of all
AS365  &  144  &  1.5  &  1.11  &  1.62  &  1.15  &  9.65  &  10.83  &  16.72  &  17.95  &  6.91  &  6.9  \\ \hline
AS365  &  169  &  1.18  &  0.96  &  1.23  &  0.99  &  8.43  &  8.36  &  14.31  &  14.45  &  5.45  &  6.02  \\ \hline
AS365  &  225  &  1.01  &  0.77  &  1.03  &  0.79  &  8.75  &  8.69  &  13.11  &  13.12  &  4.24  &  5.06  \\ \hline
NACA0015  &  144  &  0.43  &  0.37  &  0.41  &  0.38  &  2.35  &  2.76  &  4.46  &  4.73  &  1.73  &  1.87  \\ \hline
NACA0015  &  169  &  0.34  &  0.31  &  0.29  &  0.32  &  2.41  &  2.26  &  4.33  &  4.03  &  1.57  &  1.65  \\ \hline
NACA0015  &  225  &  0.34  &  0.27  &  0.3  &  0.28  &  2.78  &  2.45  &  4.02  &  3.96  &  1.37  &  1.4  \\ \hline
NLR  &  144  &  1.55  &  1.22  &  1.64  &  1.26  &  10.81  &  11.9  &  18.71  &  19.71  &  7.62  &  7.57  \\ \hline
NLR  &  169  &  1.32  &  1.04  &  1.37  &  1.07  &  9.33  &  9.16  &  15.71  &  15.84  &  6.1  &  6.61  \\ \hline
NLR  &  225  &  1.15  &  0.84  &  1.21  &  0.86  

In [158]:
csr_2d_spmv[csr_2d_spmv["Name"] == "AS365"]
csr_dyn_2d_pred
min_time_factor = 1.0
if min(csr_dyn_lk_spmv["AvgTime"]) < 1.0:
    min_time_factor = 1/min(csr_dyn_lk_spmv["AvgTime"])
for i,r in csr_dyn_lk_spmv.iterrows():
    csr_dyn_lk_spmv.at[i, "AvgTime"] = min_time_factor*r["AvgTime"]

Unnamed: 0,Name,Model,Nodes,Process,Matrix Size,Avg Row,Nonzero per Row,Actual Time,Predicted Time,Error
0,NACA0015,CSR 2D Partition,5,225,1039183,69279,0.399649,0.053592,0.049382,7.856
1,delaunay_n20,CSR 2D Partition,5,225,1048576,69906,0.399989,0.054246,0.04989,8.029
2,NACA0015,CSR 2D Partition,4,169,1039183,79938,0.461129,0.061411,0.040047,34.79
3,delaunay_n20,CSR 2D Partition,4,169,1048576,80660,0.461531,0.061718,0.040716,34.03
4,NACA0015,CSR 2D Partition,3,144,1039183,86599,0.49956,0.067617,0.052022,23.06
5,delaunay_n20,CSR 2D Partition,3,144,1048576,87382,0.49999,0.068403,0.052821,22.78
6,AS365,CSR 2D Partition,5,225,3799275,253285,0.398956,0.16538,0.19259,16.46
7,NLR,CSR 2D Partition,5,225,4163763,277585,0.399893,0.19505,0.21184,8.608
8,AS365,CSR 2D Partition,4,169,3799275,292252,0.460334,0.21273,0.22293,4.792
9,AS365,CSR 2D Partition,3,144,3799275,316607,0.498694,0.26972,0.26994,0.08134
