In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import time
from datetime import datetime

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

import scipy.stats as stats

import properscoring as prscore

import statsmodels.api as sm
import statsmodels.formula.api as smf

## Read and preprocess  the dataset

In [None]:
df = pd.read_csv('power_weather_data.csv')

# csv file MUST contain 'date' and 'Power' fields
# optional: weather data

In [None]:
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y %H:%M')

In [None]:
df['hour'] = df['date'].apply(lambda x: x.hour )
df['month'] = df['date'].apply(lambda x: x.month)

In [None]:
P = df['Power']

PowerData = pd.concat([P.shift(3), P.shift(2), P.shift(1)], axis=1)
PowerData.columns = ['t-45', 't-30', 't-15']

df = pd.concat([df, PowerData.reindex(df.index)], axis=1)
    
df = df.fillna(0)

## Time horizons

In [None]:
weeks = [['2018-03-01', '2019-03-15']]

val_days = 14

n_points_day = 4 * 24

## Set the dataframes

In [None]:
dfs = []

for w in weeks:
    
    w_start = datetime.strptime(w[0]+" 00:00", '%Y-%m-%d %H:%M')
    w_end = datetime.strptime(w[1]+" 23:59", '%Y-%m-%d %H:%M')
    
    dfs.append(df[(df['date'] > w_start) & (df['date'] < w_end)])
    
n_sets = len(dfs)

## Train Test Split

In [None]:
X_train_ = []
X_test_ = []
y_train_ = []
y_test_ = []

x_scaler = []
y_scaler = []

t_train = []
t_test = []

for i in range(n_sets):

    train = dfs[i][:int(-n_points_day*val_days)]
    test = dfs[i][int(-n_points_day*val_days):]
    
    X_tr = train.drop(['Power','date'], axis=1).values
    X_t = test.drop(['Power','date'], axis=1).values
    
    y_tr = train['Power'].values
    y_t = test['Power'].values
    
    x_sc = MinMaxScaler()
    y_sc = MinMaxScaler()
#     x_sc = StandardScaler()
#     y_sc = StandardScaler()
    x_sc.fit(X_tr)
    y_sc.fit(y_tr.reshape(-1, 1))  #reshape only because fit needs a 2d array
    x_scaler.append(x_sc)
    y_scaler.append(y_sc)
    
    X_train_.append(x_sc.transform(X_tr))
    X_test_.append(x_sc.transform(X_t))
    y_train_.append(y_sc.transform(y_tr.reshape(-1, 1)))
    y_test_.append(y_sc.transform(y_t.reshape(-1, 1)))
    
    t_train.append(dfs[i].iloc[:int(-n_points_day*val_days)]['date'].values)
    t_test.append(dfs[i].iloc[int(-n_points_day*val_days):]['date'].values)

In [None]:
X_train = X_train_
X_test = X_test_
y_train = y_train_
y_test = y_test_

## Quantile Regression

In [None]:
models_50 = []
models_2_5 = []
models_97_5 = []

start = time.time()

for i in range(n_sets):
    model_50 = sm.QuantReg(y_train[i], X_train[i]).fit(q=0.5)
    model_2_5 = sm.QuantReg(y_train[i], X_train[i]).fit(q=0.025)
    model_97_5 = sm.QuantReg(y_train[i], X_train[i]).fit(q=0.975)
    
    models_50.append(model_50)
    models_2_5.append(model_2_5)
    models_97_5.append(model_97_5)
    
end = time.time()
print((end - start)/n_sets)

## Evaluation

In [None]:
def PICP_func(y, lower, upper):
    sum_points = 0
    for i, yi in enumerate(y):
        if lower[i] <= yi <= upper[i]:
            sum_points += 1
    
    return sum_points / len(y)

def PINAW_func(y, lower, upper):
    PIAW = np.mean(upper - lower)
    R = np.max(y) - np.min(y)
    PINAW = PIAW / R
    
    return PINAW

In [None]:
y = []
y_hat = []
upper_hat = []
lower_hat = []

MAE_all = []
RMSE_all = []
MBE_all = []
CRPS_all = []

for i in range(n_sets):
    
    model_50 = models_50[i]
    model_2_5 = models_2_5[i]
    model_97_5 = models_97_5[i]
    
    X_test_i = X_test[i]
    y_test_i = y_test[i]
    
    # For multi-step ahead prediction
    y_first = model_50.predict(X_test_i[:3])
    
    y_3 = y_first[3-3]
    y_2 = y_first[3-2]
    y_1 = y_first[3-1]
    for j in range(3, X_test[i].shape[0]):
        X_test_i[j][-3] = y_3
        X_test_i[j][-2] = y_2
        X_test_i[j][-1] = y_1
        y_pred_j = model_50.predict(X_test_i[j])
        y_3 = y_2
        y_2 = y_1
        y_1 = y_pred_j
    # end of multi-step ahead
    
    mean = model_50.predict(X_test_i)
    lower = model_2_5.predict(X_test_i)
    upper = model_97_5.predict(X_test_i)
    
    mean = y_scaler[i].inverse_transform(mean.reshape(1, -1))
    lower = y_scaler[i].inverse_transform(lower.reshape(1, -1))
    upper = y_scaler[i].inverse_transform(upper.reshape(1, -1))
    
    mean = mean.flatten()
    lower = lower.flatten()
    upper = upper.flatten()
    
    real_y_test = y_scaler[i].inverse_transform(y_test_i)
    real_y_test = real_y_test.flatten()
    
    
    y_hat.append(mean)
    y.append(real_y_test)
    lower_hat.append(lower)
    upper_hat.append(upper)
    
    # Deterministic metrics
    MAE = mean_absolute_error(real_y_test, mean)
    RMSE = mean_squared_error(real_y_test, mean, squared=False)
    MBE = np.mean(mean - real_y_test)
    print(f'MAE: {MAE:.3f}')
    print(f'RMSE: {RMSE:.3f}')
    print(f'MBE: {MBE:.3f}')
    
    # Probabilistic metrics
    PICP = PICP_func(real_y_test, lower, upper)
    PINAW = PINAW_func(real_y_test, lower, upper)
    C = prscore.crps_gaussian(real_y_test, mu=mean, sig=((upper-lower)/4))
    CRPS = C.mean()
    print(f'PICP: {PICP:.3f}')
    print(f'PINAW: {PINAW:.3f}')
    print(f'CRPS: {CRPS:.3f}')
    print('\n')