In [317]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import scipy
import sktime
from datetime import datetime
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.model_selection import SlidingWindowSplitter
from sktime.utils.plotting import plot_series
import sklearn
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, mean_squared_error, root_mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

In [356]:
def get_windows(y, cv, shift):
    train_windows = []
    test_windows = []
    for i, (train, test) in enumerate(cv.split(y)):
        if not (i%shift):
            train_windows.append(y[train])
            test_windows.append(y[test])
    return train_windows, test_windows

In [357]:
data = pd.read_csv("Processed_data.csv")[int(-24*4*365*1.5):].reset_index()

In [371]:
window_length = 12
horizon = 4
fh = ForecastingHorizon(list(range(1, horizon+1)))
cv = SlidingWindowSplitter(window_length=window_length, fh=fh)
n_splits = cv.get_n_splits(data["Total Load Interpolated"])

In [372]:
x, y = get_windows(data["Total Load Interpolated"], cv, window_length)

In [373]:
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.2, shuffle=False)
X_train, X_test, Y_train, Y_test = np.array(X_train), np.array(X_test), np.array(Y_train), np.array(Y_test)

In [384]:
def err(Y_test, y_hat, horizon):
    errors = pd.DataFrame()
    errors["RMSE"] = [root_mean_squared_error(Y_test[:, i], y_hat[:, i]) for i in range(horizon)]
    errors["MAPE"] = [mean_absolute_percentage_error(Y_test[:, i], y_hat[:, i]) for i in range(horizon)]
    errors["MAE"] = [mean_absolute_error(Y_test[:, i], y_hat[:, i]) for i in range(horizon)]
    errors["MSE"] = [mean_squared_error(Y_test[:, i], y_hat[:, i]) for i in range(horizon)]
    return errors.T

In [385]:
tree = RandomForestRegressor(n_jobs=-1)
tree = tree.fit(X_train, Y_train)
y_hat = tree.predict(X_test)
err(Y_test, y_hat, horizon)

Unnamed: 0,0,1,2,3
RMSE,77.382282,130.688577,186.996467,243.900698
MAPE,0.007278,0.012388,0.01734,0.02245
MAE,61.593139,104.692505,146.662238,190.722694
MSE,5988.017569,17079.504158,34967.678609,59487.550311


<b>On the STL residuals, use RF to see if there is any signal.</b>

In [386]:
from sklearn.linear_model import LinearRegression
tree = LinearRegression(n_jobs=-1)
tree = tree.fit(X_train, Y_train)
y_hat = tree.predict(X_test)
err(Y_test, y_hat, horizon)

Unnamed: 0,0,1,2,3
RMSE,72.674597,133.859624,196.525638,257.023989
MAPE,0.006835,0.012899,0.018677,0.024076
MAE,58.102365,108.765718,157.054191,202.924014
MSE,5281.59712,17918.398965,38622.326557,66061.330963


In [387]:
from sklearn.linear_model import Ridge
tree = Ridge(alpha=10000)
tree = tree.fit(X_train, Y_train)
y_hat = tree.predict(X_test)
err(Y_test, y_hat, horizon)

Unnamed: 0,0,1,2,3
RMSE,72.681432,133.864478,196.530623,257.027198
MAPE,0.006836,0.0129,0.018677,0.024076
MAE,58.107886,108.769783,157.057471,202.928085
MSE,5282.590616,17919.698558,38624.285805,66062.980347


In [388]:
from sklearn.linear_model import MultiTaskLasso
tree = MultiTaskLasso(max_iter=10000)
tree = tree.fit(X_train, Y_train)
y_hat = tree.predict(X_test)
err(Y_test, y_hat, horizon)

Unnamed: 0,0,1,2,3
RMSE,72.674714,133.858477,196.524242,257.022489
MAPE,0.006835,0.012899,0.018677,0.024076
MAE,58.102777,108.765106,157.052777,202.923586
MSE,5281.61407,17918.091795,38621.777727,66060.560035


Take the vector of the errors, compute the mean and autocorellation (-> 0 and higher possible)