In [266]:
import pandas as pd
from sklearn.model_selection import GridSearchCV
import warnings
from sklearn.model_selection import RandomizedSearchCV

# FR

In [267]:
fr_train = pd.read_csv("fr_train_DAY.csv", index_col=0)
fr_train

Unnamed: 0,ID,DAY_ID,FR_CONSUMPTION,FR_DE_EXCHANGE,FR_NET_EXPORT,FR_NET_IMPORT,FR_GAS,FR_COAL,FR_HYDRO,FR_NUCLEAR,FR_SOLAR,FR_WINDPOW,FR_RESIDUAL_LOAD,FR_RAIN,FR_WIND,FR_TEMP,GAS_RET,COAL_RET,CARBON_RET,TARGET
1,1179,1,1.222131,-0.331356,0.778627,-0.778627,1.991028,-0.786509,0.709616,1.381575,0.485975,-0.172140,1.214288,-0.497520,-1.465608,0.231602,1.480313,0.931562,0.822047,-0.063369
2,1327,2,-0.667390,1.102015,0.256736,-0.256736,0.458302,-0.766904,-0.930172,-0.379230,1.032412,-0.844350,-0.540642,-0.372156,-0.926064,0.641235,1.802550,1.140920,0.900434,2.575976
3,2016,3,-0.834564,1.051716,-0.612133,0.612133,0.069297,-0.718729,-0.383690,-1.579208,2.986527,-0.718643,-0.856321,-1.118297,0.488650,-0.951057,0.440121,-0.064550,-0.032756,0.068905
4,2047,5,-0.470371,-0.144615,-1.811403,1.811403,0.528273,-0.766063,-0.398178,-1.866010,3.425813,-0.640389,-0.552878,-0.790071,0.021868,1.459745,-0.117977,0.550433,0.781870,1.031308
5,1995,7,-0.625625,-0.002239,-0.745182,0.745182,0.727314,-0.778036,-0.739291,-1.934168,2.276123,-0.079343,-0.806379,-0.663419,1.367421,0.954384,-0.379980,0.518459,-0.034642,-0.118915
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
844,1120,1205,-0.100235,-0.381612,0.106216,-0.106216,0.137448,0.578370,-0.191697,0.132869,0.127692,-0.389945,-0.019808,-0.435577,-0.667096,-0.481947,0.047390,0.562084,-0.954402,-0.028575
845,1721,1207,0.516789,-0.651648,-0.820640,0.820640,0.295393,-0.783923,-0.138441,-0.523101,1.539418,0.839589,0.236243,1.112782,-0.018442,-0.960526,0.099209,1.750872,0.646905,-0.729755
846,2039,1208,-0.709011,-0.427976,-1.678101,1.678101,0.179728,-0.758579,-0.380974,-1.903612,1.061313,-0.468104,-0.666261,2.661142,0.630211,-1.439105,0.291714,0.568479,0.395742,0.136028
848,1987,1212,-0.520506,0.270515,-0.789824,0.789824,0.625656,-0.776785,-0.906285,-1.434474,1.589641,-0.266687,-0.589767,-0.237434,1.123953,-0.308232,0.017778,0.072168,-0.160792,-0.425474


In [268]:
fr_train['NEW_ENERGY'] = fr_train['FR_HYDRO'] + fr_train['FR_NUCLEAR'] + fr_train['FR_WINDPOW'] + fr_train['FR_SOLAR']
fr_train['OLD_ENERGY'] = fr_train['FR_GAS'] + fr_train['FR_COAL']

In [269]:
fr_train['WEEKDAY'] = fr_train['DAY_ID'] % 7 + 1 

In [270]:
fr_X = fr_train.drop(['ID', 'TARGET', 'FR_NET_EXPORT'], axis=1)
fr_y = fr_train['TARGET']
fr_X.columns

Index(['DAY_ID', 'FR_CONSUMPTION', 'FR_DE_EXCHANGE', 'FR_NET_IMPORT', 'FR_GAS',
       'FR_COAL', 'FR_HYDRO', 'FR_NUCLEAR', 'FR_SOLAR', 'FR_WINDPOW',
       'FR_RESIDUAL_LOAD', 'FR_RAIN', 'FR_WIND', 'FR_TEMP', 'GAS_RET',
       'COAL_RET', 'CARBON_RET', 'NEW_ENERGY', 'OLD_ENERGY', 'WEEKDAY'],
      dtype='object')

In [271]:
from sklearn.model_selection import train_test_split

fr_X_train, fr_X_valid, fr_y_train, fr_y_valid = train_test_split(fr_X, fr_y, test_size=0.2, random_state=42)

print("train dataset:", fr_X_train.shape, fr_y_train.shape)
print("validation dataset:", fr_X_valid.shape, fr_y_valid.shape)

train dataset: (581, 20) (581,)
validation dataset: (146, 20) (146,)


# linear regression

In [272]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from scipy.stats import spearmanr

lr_model = LinearRegression()
print(lr_model)

lr_model.fit(fr_X_train, fr_y_train)

fr_y_pred_lr = lr_model.predict(fr_X_valid)

spearman_corr, _ = spearmanr(fr_y_valid, fr_y_pred_lr)

print("sc:", spearman_corr)

mse = mean_squared_error(fr_y_valid, fr_y_pred_lr)
print("mse:", mse)

LinearRegression()
sc: 0.13320556930353264
mse: 1.624969553390686


# random forest

In [273]:
from sklearn.ensemble import RandomForestRegressor
from scipy.stats import spearmanr

rf_model = RandomForestRegressor(random_state=42)
print(rf_model)

rf_model.fit(fr_X_train, fr_y_train)

fr_y_pred_rf = rf_model.predict(fr_X_valid)

spearman_corr_rf, _ = spearmanr(fr_y_valid, fr_y_pred_rf)
print("sc:", spearman_corr_rf)

mse = mean_squared_error(fr_y_valid, fr_y_pred_rf)
print("mse:", mse)

RandomForestRegressor(random_state=42)
sc: 0.16328859985605215
mse: 1.658469432743062


# gradientboost

In [274]:
from sklearn.ensemble import GradientBoostingRegressor
from scipy.stats import spearmanr

gb_model = GradientBoostingRegressor(random_state=42)  
print(gb_model)

gb_model.fit(fr_X_train, fr_y_train)

fr_y_pred_gb = gb_model.predict(fr_X_valid)

spearman_corr_gb, _ = spearmanr(fr_y_valid, fr_y_pred_gb)
print("sc:", spearman_corr_gb)

mse_gb = mean_squared_error(fr_y_valid, fr_y_pred_gb)
print("mse:", mse_gb)

GradientBoostingRegressor(random_state=42)
sc: 0.05355870702547197
mse: 1.8402023888278507


# adaboost

In [275]:
from sklearn.ensemble import AdaBoostRegressor
from scipy.stats import spearmanr

adaboost_model = AdaBoostRegressor(random_state=42)
print(adaboost_model)

adaboost_model.fit(fr_X_train, fr_y_train)

fr_y_pred_adaboost = adaboost_model.predict(fr_X_valid)

spearman_corr_adaboost, _ = spearmanr(fr_y_valid, fr_y_pred_adaboost)
print("sc:", spearman_corr_adaboost)

mse_adaboost = mean_squared_error(fr_y_valid, fr_y_pred_adaboost)
print("mse:", mse_adaboost)

AdaBoostRegressor(random_state=42)
sc: 0.1412209027815726
mse: 1.6678718059489261


# xgboost

In [276]:
from xgboost import XGBRegressor
from scipy.stats import spearmanr

xgb_model = XGBRegressor(objective='reg:squarederror', random_state=42) 
print(xgb_model)

xgb_model.fit(fr_X_train, fr_y_train)

fr_y_pred_xgb = xgb_model.predict(fr_X_valid)

spearman_corr_xgb, _ = spearmanr(fr_y_valid, fr_y_pred_xgb)
print("sc:", spearman_corr_xgb)

mse_xgb = mean_squared_error(fr_y_valid, fr_y_pred_xgb)
print("mse:", mse_xgb)

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=None, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=None, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=None, n_jobs=None,
             num_parallel_tree=None, random_state=42, ...)
sc: 0.0795408207039795
mse: 1.9264362016192866


# knn

In [277]:
from sklearn.neighbors import KNeighborsRegressor
from scipy.stats import spearmanr

knn_model = KNeighborsRegressor(n_neighbors=5)  
print(knn_model)

knn_model.fit(fr_X_train, fr_y_train)

fr_y_pred_knn = knn_model.predict(fr_X_valid)

spearman_corr_knn, _ = spearmanr(fr_y_valid, fr_y_pred_knn)
print("sc:", spearman_corr_knn)

mse_knn = mean_squared_error(fr_y_valid, fr_y_pred_knn)
print("mse:", mse_knn)

KNeighborsRegressor()
sc: -0.11886750800033584
mse: 1.8901075438577983


# svr

In [278]:
from sklearn.svm import SVR
from scipy.stats import spearmanr

svr_model = SVR(kernel='linear', C=1.0, epsilon=0.2)  
print(svr_model)

svr_model.fit(fr_X_train, fr_y_train)

fr_y_pred_svr = svr_model.predict(fr_X_valid)

spearman_corr_svr, _ = spearmanr(fr_y_valid, fr_y_pred_svr)
print("sc:", spearman_corr_svr)

mse_svr = mean_squared_error(fr_y_valid, fr_y_pred_svr)
print("mse:", mse_svr)

SVR(epsilon=0.2, kernel='linear')
sc: 0.08380176395342306
mse: 1.630190100619118
