In [114]:
import os
import warnings
from joblib import load
from utils.scaler import *
from sklearn.metrics import *
from matplotlib import pyplot as plt
from IPython.core.interactiveshell import InteractiveShell

In [115]:
plt.rc('font', family='GULIM')
warnings.filterwarnings(action='ignore')
InteractiveShell.ast_node_interactivity = "all"

In [116]:
X_test = pd.read_csv('Database/test/X_self_test_norm.csv', index_col=0)
y_test = pd.read_csv('Database/test/y_self_test.csv', index_col=0)
target_scaler = load('Database/target_scaler.joblib')
y_test = inverse_boxcox(y_test, -0.4431146771093272) - 0.000001

In [117]:
STN_list = ['STN001', 'STN002', 'STN003', 'STN004', 'STN005', 'STN006', 'STN007', 'STN008', 'STN009', 'STN010',
            'STN011', 'STN012', 'STN013', 'STN014', 'STN015', 'STN016', 'STN017', 'STN018', 'STN019', 'STN020']

STN_list = ['STN001']

dt_files = sorted(filename for filename in os.listdir('File/DT') if filename.endswith('.pkl'))
lgb_files = sorted(filename for filename in os.listdir('File/LGBM') if filename.endswith('.pkl'))
xgb_files = sorted(filename for filename in os.listdir('File/XGB') if filename.endswith('.pkl'))
cat_files = sorted(filename for filename in os.listdir('File/CAT') if filename.endswith('.pkl'))

best_dt_dict = {}
best_lgb_dict = {}
best_xgb_dict = {}
best_cat_dict = {}

for i, STN in enumerate(STN_list):
    best_dt_dict[STN] = load(os.path.join('File/DT', dt_files[i]))
    best_lgb_dict[STN] = load(os.path.join('File/LGBM', lgb_files[i]))
    best_xgb_dict[STN] = load(os.path.join('File/XGB', xgb_files[i]))
    best_cat_dict[STN] = load(os.path.join('File/CAT', cat_files[i]))

In [118]:
pred_dt_dict = {}
pred_lgb_dict = {}
pred_xgb_dict = {}
pred_cat_dict = {}

for i, STN in enumerate(STN_list):
    pred_dt_dict[STN] = inverse_boxcox(
        target_scaler.inverse_transform(pd.DataFrame(best_dt_dict[STN].predict(X_test))),
        -0.4431146771093272) - 0.000001
    pred_lgb_dict[STN] = inverse_boxcox(
        target_scaler.inverse_transform(pd.DataFrame(best_dt_dict[STN].predict(X_test))),
        -0.4431146771093272) - 0.000001
    pred_xgb_dict[STN] = inverse_boxcox(
        target_scaler.inverse_transform(pd.DataFrame(best_dt_dict[STN].predict(X_test))),
        -0.4431146771093272) - 0.000001
    pred_cat_dict[STN] = inverse_boxcox(
        target_scaler.inverse_transform(pd.DataFrame(best_dt_dict[STN].predict(X_test))),
        -0.4431146771093272) - 0.000001

In [130]:
prediction_df = pd.DataFrame()
for i, STN in enumerate(STN_list):
    prediction_df = pd.concat(
        [pd.DataFrame(pred_dt_dict[STN], columns=[f'DT_{STN}']),
         pd.DataFrame(pred_lgb_dict[STN], columns=[f'lgb_{STN}']),
         pd.DataFrame(pred_xgb_dict[STN], columns=[f'xgb_{STN}']),
         pd.DataFrame(pred_cat_dict[STN], columns=[f'cat_{STN}'])], axis=1)

In [155]:
metric_df = pd.DataFrame(columns=['R2', 'RMSE', 'MAE', 'MAPE'])
for i, STN in enumerate(STN_list):
    metric_df = pd.concat(
        [metric_df, pd.DataFrame([r2_score(y_test.values, pred_dt_dict[STN]),
                                  root_mean_squared_error(y_test.values, pred_dt_dict[STN]),
                                  mean_absolute_error(y_test.values, pred_dt_dict[STN]),
                                  mean_absolute_percentage_error(y_test.values, pred_dt_dict[STN])],
                                 index=metric_df.columns, columns=[f'DT_{STN}']).T], axis=0)

    metric_df = pd.concat(
        [metric_df, pd.DataFrame([r2_score(y_test.values, pred_lgb_dict[STN]),
                                  root_mean_squared_error(y_test.values, pred_lgb_dict[STN]),
                                  mean_absolute_error(y_test.values, pred_lgb_dict[STN]),
                                  mean_absolute_percentage_error(y_test.values, pred_lgb_dict[STN])],
                                 index=metric_df.columns, columns=[f'LGB_{STN}']).T], axis=0)

    metric_df = pd.concat(
        [metric_df, pd.DataFrame([r2_score(y_test.values, pred_xgb_dict[STN]),
                                  root_mean_squared_error(y_test.values, pred_xgb_dict[STN]),
                                  mean_absolute_error(y_test.values, pred_xgb_dict[STN]),
                                  mean_absolute_percentage_error(y_test.values, pred_xgb_dict[STN])],
                                 index=metric_df.columns, columns=[f'XGB_{STN}']).T], axis=0)

    metric_df = pd.concat(
        [metric_df, pd.DataFrame([r2_score(y_test.values, pred_cat_dict[STN]),
                                  root_mean_squared_error(y_test.values, pred_cat_dict[STN]),
                                  mean_absolute_error(y_test.values, pred_cat_dict[STN]),
                                  mean_absolute_percentage_error(y_test.values, pred_cat_dict[STN])],
                                 index=metric_df.columns, columns=[f'CAT_{STN}']).T], axis=0)