In [7]:
import pandas as pd
import numpy as np
import joblib
import re
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow import keras

In [8]:
lr = joblib.load('models/lr_best.sav')
l1 = joblib.load('models/l1_best.sav')
l2 = joblib.load('models/l2_best.sav')
pr = joblib.load('models/pr_best.sav')
svm = joblib.load('models/svm_best.sav')
gbr = joblib.load('models/gbr_best.sav')
xgb = joblib.load('models/xgb_best.sav')
mlp = keras.models.load_model('models/mlp.h5')

In [34]:
# Evaluation
from sklearn.metrics import make_scorer, mean_squared_error, r2_score

rmse = make_scorer(mean_squared_error, squared=False, greater_is_better=False)
r2 = make_scorer(r2_score)

In [21]:
# Data
X_test = pd.read_csv('data/test.csv')
y_test = X_test['TARGET_energy']
X_test.drop(columns=['TARGET_energy'], inplace=True)

X_test.head()

Unnamed: 0,date,T1,RH_1,T2,RH_2,T3,RH_3,T4,RH_4,T5,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,2016-01-25 10:20:00,19.79,44.06,19.066667,44.2,20.1,44.2,17.6,46.433333,17.89,...,16.5,49.2,9.466667,763.5,79.333333,3.333333,40.0,5.866667,39.136068,39.136068
1,2016-02-20 12:00:00,21.033333,40.79,20.1,41.163333,20.79,39.2,17.823333,44.126667,17.6,...,17.7,44.09,9.2,755.0,90.0,6.0,29.0,7.7,29.592663,29.592663
2,2016-04-01 14:30:00,22.39,38.7,22.73,36.5,22.29,37.9,21.2,37.363333,19.7,...,20.5,41.29,10.75,760.7,58.5,2.5,34.5,2.8,7.68935,7.68935
3,2016-01-22 05:20:00,18.926667,37.59,17.963333,37.5,19.166667,40.23,16.5,38.2,16.7,...,16.0,42.7,-1.133333,763.933333,86.333333,3.0,62.666667,-3.133333,42.139292,42.139292
4,2016-04-24 18:30:00,22.2,36.0,20.0,37.06,22.566667,33.9,21.7,33.88,20.7,...,20.5,34.963333,7.4,757.45,65.0,5.5,40.0,1.15,33.962548,33.962548


In [24]:
# Preprocessing
from transformers import DateTransformer
from sklearn.preprocessing import MinMaxScaler
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline

# Drop redundant features
drop = ('drop', ['rv1', 'rv2'])

# Scalers
mms = MinMaxScaler()

dt = DateTransformer(['date'])

mlp_preprocess = make_column_transformer(drop, (dt, ['date']), remainder='passthrough')
pipeline = make_pipeline(mlp_preprocess, mms, mlp)

In [27]:
models = [lr, l1, l2, pr, svm, gbr, xgb, mlp]
model_names = ['lr', 'l1', 'l2', 'pr', 'svm', 'gbr', 'xgb', 'mlp']
rmse_score, r2_score = [],[]

for model in models:
    if model == mlp:
        rmse_score.append(rmse(y_true=y_test, X=mlp_preprocess.fit_transform(X_test), estimator=mlp))
        r2_score.append(r2(y_true=y_test, X=mlp_preprocess.fit_transform(X_test), estimator=mlp))
        continue

    rmse_score.append(rmse(y_true=y_test, X=X_test, estimator=model))
    r2_score.append(r2(y_true=y_test, X=X_test, estimator=model))

test_eval = pd.DataFrame(zip(model_names, rmse_score, r2_score), columns=['model', 'rmse', 'r2'])
test_eval.sort_values(by='rmse', inplace=True)
test_eval



Unnamed: 0,model,rmse,r2
6,xgb,73.496789,0.533002
5,gbr,75.751863,0.503905
3,pr,85.62428,0.366171
0,lr,96.190307,0.20009
1,l1,96.196956,0.19998
2,l2,96.206243,0.199825
4,svm,105.498926,0.03778
7,mlp,4680.067487,-1892.575346
