In [None]:
import pandas as pd
import numpy as np

import modelUtils as mu

import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv('load_prepro.csv', index_col=0, parse_dates=True)
df.describe()

In [None]:
df['load_ewma'] = df.KWH.ewm(span=4).mean()
df[(df.index.month == 6) & (df.index.day <= 7)][['KWH', 'load_ewma']].plot()
df.drop(columns=['KWH'], inplace=True)

In [None]:
lag = 12

for l in range(1, lag + 1):
    col = f'LoadTm{l}'
    df[col] = 0.0
    for i in range(0, len(df)):
        df[col][i] = df.loc[:,'load_ewma'][i - l]

In [None]:
case3 = pd.read_csv('../data/case03/case03_01.csv',index_col=0, usecols=['datetime','pv', 'load'], parse_dates=True)
case3_hb = case3[(case3.index.month == 12) & (case3.index.day <= 7)]

In [None]:
lag = 12

mask1 = (df.index.month == 12)
train_df = df[~mask1]
hb_df = df[mask1]
hb_df = hb_df[hb_df.index.day <= 7]

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, KFold

from sklearn.neural_network import MLPRegressor

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA

In [None]:
maxLoad = max(df.load_ewma)

def nrmse(y_true, y_pred):
    return (mean_squared_error(y_true, y_pred) ** .5)/maxLoad

In [None]:
X = train_df.drop(columns=['load_ewma'])
y = train_df['load_ewma']

print(f'# features = {len(X.columns)}')

In [None]:
to_scale = ['temp', 'irradiance', 'wind'] + [f'LoadTm{l}' for l in range(1,lag+1)]

preprocessor = ColumnTransformer(
    remainder='passthrough',
    transformers=
        [('scaler', StandardScaler(), to_scale)]
)

pipeline = Pipeline(
    steps=[
        ('preprocessor', preprocessor),
        ('pca', PCA(.95)),
        ('regressor', MLPRegressor())
    ]
)

search_space = [
    {
        'regressor': [MLPRegressor()],
        'regressor__hidden_layer_sizes': [4*[21]],
        'regressor__activation': ['logistic'],
        'regressor__solver': ['adam'],
        'regressor__batch_size': [256],
        'regressor__max_iter': [400],
        'regressor__learning_rate_init': [1e-2, 1e-3, 1e-4, 1e-5],
    }
]

cv = KFold(n_splits=10)
search = GridSearchCV(pipeline, search_space, cv=cv, n_jobs=-1)

In [None]:
#results = search.fit(X, y)
#results.best_params_

In [None]:
pipeline = Pipeline(steps= [
    ('pre', preprocessor),
    ('pca', PCA(.95)),
    ('reg', MLPRegressor(
        activation = 'logistic', 
        batch_size = 256,
        hidden_layer_sizes = 3 * [42],
        learning_rate_init = 0.001,
        solver = 'adam', 
        max_iter = 400))
])

In [None]:
X_hb = hb_df.drop(columns=['load_ewma'])
y_hb = hb_df['load_ewma']

pipeline.fit(X, y)
y_pred = pipeline.predict(X_hb)

nrmse(y_hb, y_pred)

### Recurrent predict

In [None]:
def recurrent_predict(model, X, lag=lag):
    window_size = len(X)
    pred = np.zeros(window_size)
    
    for i in range(0, window_size):
        j = 0
        while (j < i and j < lag):
            X.loc[:, f'LoadTm{j+1}'][i] = pred[i - (j + 1)]
            j += 1
            
        X_i = pd.DataFrame(X.iloc[i, :].values.reshape(1,-1), index=[X.index[i]], columns=X.columns)
        
        pred[i] = model.predict(X_i)
        
    return pred

In [None]:
new_hb = X_hb.copy()
pred = recurrent_predict(pipeline, new_hb)

In [None]:
nrmse(y_hb, pred)

In [None]:
plot = pd.DataFrame(index=X_hb.index)
plot['y_true'] = y_hb
plot['y_pred'] = pred
plot.plot();

In [None]:
kPV = 20
res = np.zeros(kPV)

pred = recurrent_predict(pipeline, new_hb)
new_hb = X_hb.copy()

for k in range(0,kPV):
    y = y_hb.copy()
    for i in range(0, len(y.values)):
        y.values[i] -= (k * case3.pv[i])
    
    res[k] = nrmse(y, pred)
        
pd.DataFrame(res, index=[f'{k}PV' for k in range(0,kPV)], columns=['nrmse']).plot(kind='bar');

In [None]:
mu.save_model(model=pipeline, path='forecaster', name='forecaster')