In [1]:
import yaml
import seaborn as sns
import pandas as pd
import copy

In [2]:
from utils.helper import * 
from utils.evaluators import *
from models.build_model import *

In [3]:
def remove_nano_from_input(arr, feature_names):
    for i in range(len(feature_names)):
        if "nano" in feature_names[i]:
            arr[:, i] = 0
    return arr

In [4]:
def eval_matrics(actual, predict_nano, predict_nonano, name="ANN"):
    print(f"eval_matrics for {name}")
    ori = calculate_r2(actual, predict_nano)
    nonano = calculate_r2(actual, predict_nonano)
    print("r2 with nano: ", ori)
    print("r2 without nano: ", nonano)
    print()

    ori = calculate_rmae(actual, predict_nano)
    nonano = calculate_rmae(actual, predict_nonano)
    print("MAE with nano: ", ori)
    print("MAE without nano: ", nonano)
    print()

    ori = calculate_rmse(actual, predict_nano)
    nonano = calculate_rmse(actual, predict_nonano)
    print("RMSE with nano: ", ori)
    print("RMSE without nano: ", nonano)
    print()

    ori = calculate_RSR(actual, predict_nano)
    nonano = calculate_RSR(actual, predict_nonano)
    print("RSR with nano: ", ori)
    print("RSR without nano: ", nonano)
    print()

    ori = calculate_mape(actual, predict_nano)
    nonano = calculate_mape(actual, predict_nonano)
    print("MAPE with nano: ", ori)
    print("MAPE without nano: ", nonano)
    print()

    ori = calcuate_nmbe(actual, predict_nano)
    nonano = calcuate_nmbe(actual, predict_nonano)
    print("NMBE with nano: ", ori)
    print("NMBE without nano: ", nonano)
    print()

In [5]:
ann_config_path = 'configs/ann.yaml'
with open(ann_config_path, "r") as f:
    config = yaml.safe_load(f)
k_fold = config["k_fold"]
save_path = config["Dataset"]["save_path"]
model = build_model(config)
model.load()

pred_all = []
pred_nonano_all = []
ytest_all = []

for i in range(k_fold):
    xtrain, ytrain, xtest, ytest = model.inputs[i]
    feature_names = model.feature_names
    pred = model.models[i].predict(xtest)
    
    xtest_nonano = remove_nano_from_input(copy.deepcopy(xtest), feature_names)
    pred_nonano = model.models[i].predict(xtest_nonano)

    pred_all.append(pred)
    pred_nonano_all.append(pred_nonano)
    ytest_all.append(ytest)
    
    fig = plt.figure(figsize=(5,5))
    plt.plot([0, ytest.max()], [0, ytest.max()], "k--")
    plt.scatter(ytest, pred, marker="*", color='blue', label="pred w/ nano")
    plt.scatter(ytest, pred_nonano, marker="+", color='red', label="pred w/o nano")
    plt.xlabel("GroundTruth")
    plt.ylabel("Prediction")
    plt.legend()
    plt.tight_layout()

    plt.savefig(f"{save_path}/wo_nano_scatter_{i}_fold.png")
    plt.close("all")

eval_matrics(ytest_all, pred_all, pred_nonano_all, name="ANN")

eval_matrics for ANN
r2 with nano:  0.8675867479990527
r2 without nano:  0.846570727317232

MAE with nano:  2.4982198027920677
MAE without nano:  2.5607270311023536

RMSE with nano:  7.6660292384986235
RMSE without nano:  8.251993146746823

RSR with nano:  0.11858394689689092
RSR without nano:  0.1215510012106605

MAPE with nano:  14.064637051070605
MAPE without nano:  14.020735816286445

NMBE with nano:  -0.5191521483960382
NMBE without nano:  2.7380580807303336



In [6]:
pred_all = np.array(pred_all).flatten()
pred_nonano_all = np.array(pred_nonano_all).flatten()
ytest_all = np.array(ytest_all).flatten()

fig = plt.figure(figsize=(5,5))
plt.plot([0, ytest_all.max()], [0, ytest_all.max()], "k--")
plt.scatter(ytest_all, pred_all, marker="*", color='blue', label="pred w/ nano")
plt.scatter(ytest_all, pred_nonano_all, marker="+", color='red',label="pred w/o nano")
plt.xlabel("GroundTruth")
plt.ylabel("Prediction")
plt.legend()
plt.tight_layout()

plt.savefig(f"{save_path}/wo_nano_scatter_all.png")
plt.close("all")

In [7]:
rf_config_path = 'configs/random_forest.yaml'
with open(rf_config_path, "r") as f:
    config = yaml.safe_load(f)
k_fold = config["k_fold"]
save_path = config["Dataset"]["save_path"]
model = build_model(config)
model.load()

pred_all = []
pred_nonano_all = []
ytest_all = []

for i in range(k_fold):
    xtrain, ytrain, xtest, ytest = model.inputs[i]
    feature_names = model.feature_names
    pred = model.models[i].predict(xtest)
    
    xtest_nonano = remove_nano_from_input(copy.deepcopy(xtest), feature_names)
    pred_nonano = model.models[i].predict(xtest_nonano)

    pred_all.append(pred)
    pred_nonano_all.append(pred_nonano)
    ytest_all.append(ytest)
    
    fig = plt.figure(figsize=(5,5))
    plt.plot([0, ytest.max()], [0, ytest.max()], "k--")
    plt.scatter(ytest, pred, marker="*", color='blue', label="pred w/ nano")
    plt.scatter(ytest, pred_nonano, marker="+", color='red', label="pred w/o nano")
    plt.xlabel("GroundTruth")
    plt.ylabel("Prediction")
    plt.legend()
    plt.tight_layout()

    plt.savefig(f"{save_path}/wo_nano_scatter_{i}_fold.png")
    plt.close("all")

eval_matrics(ytest_all, pred_all, pred_nonano_all, name="random forest")

eval_matrics for random forest
r2 with nano:  0.9100204413524926
r2 without nano:  0.8961244222929904

MAE with nano:  2.1702815278519147
MAE without nano:  2.2783942737860268

RMSE with nano:  6.319412061762755
RMSE without nano:  6.789870191523941

RSR with nano:  0.10301757642080281
RSR without nano:  0.10814940513675463

MAPE with nano:  10.239995256644566
MAPE without nano:  11.021659685613328

NMBE with nano:  1.9806003531872378
NMBE without nano:  4.065492901406348



In [8]:
pred_all = np.array(pred_all).flatten()
pred_nonano_all = np.array(pred_nonano_all).flatten()
ytest_all = np.array(ytest_all).flatten()

fig = plt.figure(figsize=(5,5))
plt.plot([0, ytest_all.max()], [0, ytest_all.max()], "k--")
plt.scatter(ytest_all, pred_all, marker="*", color='blue', label="pred w/ nano")
plt.scatter(ytest_all, pred_nonano_all, marker="+", color='red',label="pred w/o nano")
plt.xlabel("GroundTruth")
plt.ylabel("Prediction")
plt.legend()
plt.tight_layout()

plt.savefig(f"{save_path}/wo_nano_scatter_all.png")
plt.close("all")

In [9]:
xgb_config_path = 'configs/xgboost.yaml'
with open(xgb_config_path, "r") as f:
    config = yaml.safe_load(f)
k_fold = config["k_fold"]
save_path = config["Dataset"]["save_path"]
model = build_model(config)
model.load()

pred_all = []
pred_nonano_all = []
ytest_all = []

for i in range(k_fold):
    xtrain, ytrain, xtest, ytest = model.inputs[i]
    feature_names = model.feature_names
    pred = model.models[i].predict(xtest)
    
    xtest_nonano = remove_nano_from_input(copy.deepcopy(xtest), feature_names)
    pred_nonano = model.models[i].predict(xtest_nonano)

    pred_all.append(pred)
    pred_nonano_all.append(pred_nonano)
    ytest_all.append(ytest)
    
    fig = plt.figure(figsize=(5,5))
    plt.plot([0, ytest.max()], [0, ytest.max()], "k--")
    plt.scatter(ytest, pred, marker="*", color='blue', label="pred w/ nano")
    plt.scatter(ytest, pred_nonano, marker="+", color='red',label="pred w/o nano")
    plt.xlabel("GroundTruth")
    plt.ylabel("Prediction")
    plt.legend()
    plt.tight_layout()

    plt.savefig(f"{save_path}/wo_nano_scatter_{i}_fold.png")
    plt.close("all")

eval_matrics(ytest_all, pred_all, pred_nonano_all, name="xgboost")

eval_matrics for xgboost
r2 with nano:  0.937001275689759
r2 without nano:  0.9164954932918862

MAE with nano:  1.9330720425751633
MAE without nano:  2.1229068511817166

RMSE with nano:  5.287746459455391
RMSE without nano:  6.08779056026332

RSR with nano:  0.09175786381502679
RSR without nano:  0.10076882467516478

MAPE with nano:  7.853802509113127
MAPE without nano:  9.429650615501656

NMBE with nano:  1.241463985056801
NMBE without nano:  4.4953807050614465



In [10]:
pred_all = np.array(pred_all).flatten()
pred_nonano_all = np.array(pred_nonano_all).flatten()
ytest_all = np.array(ytest_all).flatten()

fig = plt.figure(figsize=(5,5))
plt.plot([0, ytest_all.max()], [0, ytest_all.max()], "k--")
plt.scatter(ytest_all, pred_all, marker="*", color='blue', label="pred w/ nano")
plt.scatter(ytest_all, pred_nonano_all, marker="+", color='red',label="pred w/o nano")
plt.xlabel("GroundTruth")
plt.ylabel("Prediction")
plt.legend()
plt.tight_layout()

plt.savefig(f"{save_path}/wo_nano_scatter_all.png")
plt.close("all")

In [11]:
ridge_config_path = 'configs/ridge_regression.yaml'
with open(ridge_config_path, "r") as f:
    config = yaml.safe_load(f)
k_fold = config["k_fold"]
save_path = config["Dataset"]["save_path"]
model = build_model(config)
model.load()

pred_all = []
pred_nonano_all = []
ytest_all = []

for i in range(k_fold):
    xtrain, ytrain, xtest, ytest = model.inputs[i]
    feature_names = model.feature_names
    pred = model.models[i].predict(xtest)
    
    xtest_nonano = remove_nano_from_input(copy.deepcopy(xtest), feature_names)
    pred_nonano = model.models[i].predict(xtest_nonano)

    pred_all.append(pred)
    pred_nonano_all.append(pred_nonano)
    ytest_all.append(ytest)
    
    fig = plt.figure(figsize=(5,5))
    plt.plot([0, ytest.max()], [0, ytest.max()], "k--")
    plt.scatter(ytest, pred, marker="*", color='blue', label="pred w/ nano")
    plt.scatter(ytest, pred_nonano, marker="+", color='red',label="pred w/o nano")
    plt.xlabel("GroundTruth")
    plt.ylabel("Prediction")
    plt.legend()
    plt.tight_layout()

    plt.savefig(f"{save_path}/wo_nano_scatter_{i}_fold.png")
    plt.close("all")
    
eval_matrics(ytest_all, pred_all, pred_nonano_all, name="ridge regression")

eval_matrics for ridge regression
r2 with nano:  0.7918800455851897
r2 without nano:  0.7029727729342605

MAE with nano:  2.674902711011107
MAE without nano:  2.9348489998235805

RMSE with nano:  9.610845737819657
RMSE without nano:  11.481612245607314

RSR with nano:  0.1269706215131191
RSR without nano:  0.13930959059587594

MAPE with nano:  16.12255631683393
MAPE without nano:  17.573504541210845

NMBE with nano:  0.6544656155509855
NMBE without nano:  7.258389792509725



In [12]:
pred_all = np.array(pred_all).flatten()
pred_nonano_all = np.array(pred_nonano_all).flatten()
ytest_all = np.array(ytest_all).flatten()

fig = plt.figure(figsize=(5,5))
plt.plot([0, ytest_all.max()], [0, ytest_all.max()], "k--")
plt.scatter(ytest_all, pred_all, marker="*", color='blue', label="pred w/ nano")
plt.scatter(ytest_all, pred_nonano_all, marker="+", color='red',label="pred w/o nano")
plt.xlabel("GroundTruth")
plt.ylabel("Prediction")
plt.legend()
plt.tight_layout()

plt.savefig(f"{save_path}/wo_nano_scatter_all.png")
plt.close("all")