In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

import math
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeRegressor

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from math import sqrt

from ngboost import NGBRegressor
from ngboost.scores import LogScore, CRPScore
from ngboost.distns import Exponential, Normal, LogNormal

import time

import properscoring as p

## Read and preprocess  the dataset

In [None]:
df = pd.read_csv('power_weather_data.csv')

# csv file MUST contain 'date' and 'Power' fields
# optional: weather data

In [None]:
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y %H:%M')

In [None]:
df['hour'] = df['date'].apply(lambda x: x.hour )
df['month'] = df['date'].apply(lambda x: x.month)

In [None]:
P = df['Power']

PowerData = pd.concat([P.shift(3), P.shift(2), P.shift(1)], axis=1)
PowerData.columns = ['t-45', 't-30', 't-15']

df = pd.concat([df, PowerData.reindex(df.index)], axis=1)
    
df = df.fillna(0)

## Time horizons

In [None]:
weeks = [['2018-03-01', '2019-03-15']]

val_days = 14

n_points_day = 4 * 24

## Set the dataframes

In [None]:
dfs = []

for w in weeks:
    
    w_start = datetime.strptime(w[0]+" 00:00", '%Y-%m-%d %H:%M')
    w_end = datetime.strptime(w[1]+" 23:59", '%Y-%m-%d %H:%M')
    
    dfs.append(df[(df['date'] > w_start) & (df['date'] < w_end)])
    
n_sets = len(dfs)

## Train Test Split

In [None]:
X_train_ = []
X_test_ = []
y_train_ = []
y_test_ = []

x_scaler = []
y_scaler = []

t_train = []
t_test = []

for i in range(len(dfs)):

    train = dfs[i][:int(-n_points_day*val_days)]
    test = dfs[i][int(-n_points_day*val_days):]
    
    X_tr = train.drop(['Power','Time'], axis=1).values
    X_t = test.drop(['Power','Time'], axis=1).values
    
    y_tr = train['Power'].values
    y_t = test['Power'].values
    
    x_sc = MinMaxScaler()
    y_sc = MinMaxScaler()
#     x_sc = StandardScaler()
#     y_sc = StandardScaler()
    x_sc.fit(X_tr)
    y_sc.fit(y_tr.reshape(-1, 1))
    x_scaler.append(x_sc)
    y_scaler.append(y_sc)
    
    X_train_.append(x_sc.transform(X_tr))
    X_test_.append(x_sc.transform(X_t))
    y_train_.append(y_sc.transform(y_tr.reshape(-1, 1)))
    y_test_.append(y_sc.transform(y_t.reshape(-1, 1)))
    
    t_train.append(dfs[i].iloc[:int(-n_points_day*val_days)]['Time'].values)
    t_test.append(dfs[i].iloc[int(-n_points_day*val_days):]['Time'].values)

In [None]:
X_train = X_train_
X_test = X_test_
y_train = y_train_
y_test = y_test_

## NGBoost

In [None]:
tree_learner = DecisionTreeRegressor(
    criterion="friedman_mse",
    min_samples_split=2,
    min_samples_leaf=1,
    min_weight_fraction_leaf=0.0,
    max_depth=3,
    splitter="best",
    random_state=None,
)

In [None]:
ngbs = []

start = time.time()

for i in range(len(dfs)):
    
    X_train_i = X_train[i]
    y_train_i = y_train[i]

    ngb = NGBRegressor(Base=tree_learner, n_estimators=1000).fit(X_train_i, y_train_i.ravel())
    
    ngbs.append(ngb)

end = time.time()
print((end - start)/(len(dfs)))

## Evaluation

In [None]:
def PICP_func(y, lower, upper):
    sum_points = 0
    for i, yi in enumerate(y):
        if lower[i] <= yi <= upper[i]:
            sum_points += 1
    
    return sum_points / len(y)

def PINAW_func(y, lower, upper):
    PIAW = np.mean(upper - lower)
    R = np.max(y) - np.min(y)
    PINAW = PIAW / R
    
    return PINAW

In [None]:
y = []
y_hat = []
upper_hat = []
lower_hat = []

RMSE_all = []
CRPS_all = []

for i in range(len(dfs)):
    
    ngb = ngbs[i]
    
    # For multi-step ahead prediction
    y_45 = ngb.pred_dist(X_test[i][0].reshape(1, -1)).params['loc']
    y_30 = ngb.pred_dist(X_test[i][1].reshape(1, -1)).params['loc']
    y_15 = ngb.pred_dist(X_test[i][2].reshape(1, -1)).params['loc']
    for j in range(3, X_test[i].shape[0]):
        X_t[j][-3] = y_45
        X_t[j][-2] = y_30
        X_t[j][-1] = y_15
        y_pred_j = ngb.pred_dist(X_test[i][j].reshape(1, -1)).params['loc']
        y_45 = y_30
        y_30 = y_15
        y_15 = y_pred_j
    # end of multi-step ahead
    
    y_pred = ngb.predict(X_train[i])
    y_dists = ngb.pred_dist(X_train[i])
    
    mean = y_dists[:].params['loc']
    std = y_dists[:].params['scale']
    
    mean = y_scaler[i].inverse_transform(mean.reshape(1, -1))
    std = y_scaler[i].inverse_transform(std.reshape(1, -1))
    
    mean = mean.flatten()
    std = std.flatten()
    
    real_y_test = y_scaler[i].inverse_transform(y_train[i])
    real_y_test = real_y_test.flatten()
    
    lower = mean - 1.9600 * std
    upper = mean + 1.9600 * std
    
    y_hat.append(mean)
    y.append(real_y_test)
    lower_hat.append(lower)
    upper_hat.append(upper)
    
    # Deterministic metrics
    MAE = mean_absolute_error(real_y_test, mean)
    RMSE = mean_squared_error(real_y_test, mean, squared=False)
    MBE = np.mean(mean - real_y_test)
    print(f'MAE: {MAE:.3f}')
    print(f'RMSE: {RMSE:.3f}')
    print(f'MBE: {MBE:.3f}')
    
    # Probabilistic metrics
    PICP = PICP_func(real_y_test, lower[1], upper[1])
    PINAW = PINAW_func(real_y_test, lower[1], upper[1])
    C = prscore.crps_gaussian(real_y_test, mu=mean, sig=std)
    CRPS = C.mean()
    print(f'PICP: {PICP:.3f}')
    print(f'PINAW: {PINAW:.3f}')
    print(f'CRPS: {CRPS:.3f}')
    print('\n')

## Plotting

In [None]:
plt.figure(figsize=(20,6))

i = 0  # index of training set

x = list(range(len(y[i])))

# Plot predictive means as blue line, observations as gray dots
plt.plot(x, y[i], 'k', markersize=10, label='Observations')
plt.plot(x, y_hat[i], 'b-', markersize=10, label='Observations')

# plt.plot(lower_hat[i])
# plt.plot(upper_hat[i])

plt.fill_between(x, lower_hat[i], upper_hat[i], alpha=0.5, fc='b', ec='None')

# plt.legend(['Observed Data', 'Mean', 'Confidence'])
plt.xlabel('Time')
plt.ylabel('Power')

In [None]:
plt.figure(figsize=(20,6))

i = 0  # index of training set

k = 0  # index for a spesific day
j = list(range((n_points_day*k),n_points_day*(k+1)))  # # indeces of that day

x = list(range(len(y[i][j])))

# Plot predictive means as blue line, observations as gray dots
plt.plot(x, y[i][j], 'k', markersize=10, label='Observations')
plt.plot(x, y_hat[i][j], 'b-', markersize=10, label='Observations')

# plt.plot(lower_hat[i])
# plt.plot(upper_hat[i])

plt.fill_between(x, lower_hat[i][j], upper_hat[i][j], alpha=0.5, fc='b', ec='None')

# plt.legend(['Observed Data', 'Mean', 'Confidence'])
plt.xlabel('Time')
plt.ylabel('Power')