In [None]:



# # Foreacasting with classical Algorithms
# - SVR
# - RF
# - Lasso
#
# In depth evaluation and testing of models supplemented by graphs, plots and tables of MAPE for different time steps ahead in terms of prediction.
#
# The problem framing:
# forecast periods p = {6, 12, 18, 24, 30, 36} hours
#
# - predict the period:
#     - single step of p
#     - multistep the consecutive p step values
#

In [None]:




get_ipython().run_line_magic('load_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')

In [None]:




import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [None]:




from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Lasso

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error

In [None]:




from utils import *

In [None]:




plt.rcParams['figure.figsize'] = (20, 10)

In [None]:







# ### Load preprocessed data into dataframe

In [None]:




filename = 'processed_series.csv'
df = pd.read_csv(filename,
                 low_memory=False,
                 index_col='Date', parse_dates=True
                )
df.columns = ['Value']

print(df.shape)
df.head()

In [None]:




df.describe()


# ### Resample to 6 hours
# To simplify the framing to 6-hour period for 1, 2, 3,...6 steps ahead forecasting.

In [None]:




df_six_hr = df.Value.resample('6H').mean().to_frame()

In [None]:




print('DF shape:', df_six_hr.shape)
df_six_hr.head()

In [None]:




df_six_hr.describe()


# ### Transformations
#
# Scale using StandardScaler

In [None]:




scaler = StandardScaler()
scaled = scaler.fit_transform(df_six_hr)

In [None]:




df_scaled = df_six_hr.copy()
df_scaled[:] = scaled
df_scaled.head()

In [None]:




df_scaled.describe()

In [None]:







# ### Supervised learning framing
#
# Extract supervised learning data

In [None]:




n_inputs = 8 # Use the last 48 hours data
n_outputs = 1 # Predict the next hours
forecast_column = 'Value'

df_data, label_columns = frame_supervised_data(df_scaled, n_inputs, n_outputs,
                                                    forecast_columns=[forecast_column])
print(df_data.columns)
df_data.head()

In [None]:




# Extract features
X = df_data.drop(label_columns, axis=1)

# Extract labels
y = df_data[label_columns]

X.shape, y.shape


# #### Split into train and test sets
#
# Using the first 40 years for training, and remaining 5 years for testing.

In [None]:




split_year = '2014'

X_train = X[:split_year].values
X_test = X[split_year:].values

y_train = y[:split_year].values[:, 0]
y_test = y[split_year:].values[:, 0]

print('X_train shape', X_train.shape)
print('y_train shape', y_train.shape)
print('X_test shape', X_test.shape)
print('y_test shape', y_test.shape)

In [None]:




n_steps = 6
dataset = (X_train, y_train, X_test, y_test)


# ### Lasso

In [None]:




    lasso = Lasso(alpha = 0.00005, max_iter = 3000)

In [None]:




lasso_metrics, lasso_pred = evaluate_model(lasso, dataset, scaler, n_steps)
lasso_metrics

In [None]:


lasso_metrics.plot()

In [None]:




visualize_pred(y_test, lasso_pred, 'LASSO Model', y, split_year, scaler)

In [None]:


visualize_pred_ext(y_test, lasso_pred, 'LASSO Model', y, split_year, scaler)

In [None]:







# ### SVR

In [None]:




svr = SVR(epsilon=0.02, C=8)

In [None]:




svr_metrics, svr_pred = evaluate_model(svr, dataset, scaler, n_steps)
svr_metrics

In [None]:


svr_metrics.plot()

In [None]:




visualize_pred(y_test, svr_pred, 'SVR Model', y, split_year, scaler)

In [None]:


visualize_pred_ext(y_test, svr_pred, 'SVR Model', y, split_year, scaler)


#
#
#
#
#
# # ### Random Forest Regressor

In [None]:




rf = RandomForestRegressor(n_estimators=100)

In [None]:




rf_metrics, rf_pred = evaluate_model(rf, dataset, scaler, n_steps)
rf_metrics

In [None]:


rf_metrics.plot()

In [None]:




visualize_pred(y_test, rf_pred, 'RF Model', y, split_year, scaler)

In [None]:


visualize_pred_ext(y_test, rf_pred, 'RF Model', y, split_year, scaler)

In [None]:







# ### Results summary
# MAPE values

In [None]:




results = [lasso_metrics, svr_metrics, rf_metrics]
names = ['LASSO', 'SVR', 'RF']

mape_results = [res.MAPE for res in results]

In [None]:


summary = pd.concat(mape_results, axis=1)
summary.columns = names

In [None]:




summary.T

In [None]:




summary.plot()