In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from pandas import concat

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt

import torch
from torch import nn, optim
import torch.optim as optim
import torch.nn.functional as F
from torch.optim.optimizer import Optimizer

import time

import properscoring as prscore

## Read and preprocess  the dataset

In [None]:
df = pd.read_csv('power_weather_data.csv')

# csv file MUST contain 'date' and 'Power' fields
# optional: weather data

In [None]:
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y %H:%M')

In [None]:
df['hour'] = df['date'].apply(lambda x: x.hour )
df['month'] = df['date'].apply(lambda x: x.month)

In [None]:
P = df['Power']

PowerData = pd.concat([P.shift(3), P.shift(2), P.shift(1)], axis=1)
PowerData.columns = ['t-45', 't-30', 't-15']

df = pd.concat([df, PowerData.reindex(df.index)], axis=1)
    
df = df.fillna(0)

## Time horizons

In [None]:
weeks = [['2018-03-01', '2019-03-15']]

val_days = 14

n_points_day = 4 * 24

## Set the dataframes

In [None]:
dfs = []

for w in weeks:
    
    w_start = datetime.strptime(w[0]+" 00:00", '%Y-%m-%d %H:%M')
    w_end = datetime.strptime(w[1]+" 23:59", '%Y-%m-%d %H:%M')
    
    dfs.append(df[(df['date'] > w_start) & (df['date'] < w_end)])
    
n_sets = len(dfs)

## Train Test Split

In [None]:
X_train_ = []
X_test_ = []
y_train_ = []
y_test_ = []

x_scaler = []
y_scaler = []

t_train = []
t_test = []

for i in range(len(dfs)):

    train = dfs[i][:int(-n_points_day*val_days)]
    test = dfs[i][int(-n_points_day*val_days):]
    
    X_tr = train.drop(['Power','Time'], axis=1).values
    X_t = test.drop(['Power','Time'], axis=1).values
    
    y_tr = train['Power'].values
    y_t = test['Power'].values
    
    x_sc = MinMaxScaler()
    y_sc = MinMaxScaler()
#     x_sc = StandardScaler()
#     y_sc = StandardScaler()
    x_sc.fit(X_tr)
    y_sc.fit(y_tr.reshape(-1, 1))
    x_scaler.append(x_sc)
    y_scaler.append(y_sc)
    
    X_train_.append(x_sc.transform(X_tr))
    X_test_.append(x_sc.transform(X_t))
    y_train_.append(y_sc.transform(y_tr.reshape(-1, 1)) + 0.001)
    y_test_.append(y_sc.transform(y_t.reshape(-1, 1)) + 0.001)
    
    t_train.append(dfs[i].iloc[:int(-n_points_day*val_days)]['Time'].values)
    t_test.append(dfs[i].iloc[int(-n_points_day*val_days):]['Time'].values)

In [None]:
X_train = []
X_test = []
y_train = []
y_test = []

for i in range(len(dfs)):
    X_train.append(torch.from_numpy(X_train_[i]).float())
    X_test.append(torch.from_numpy(X_test_[i]).float())
    
    y_tr = torch.from_numpy(y_train_[i]).float()
    y_train.append(torch.squeeze(y_tr))
    y_t = torch.from_numpy(y_test_[i]).float()
    y_test.append(torch.squeeze(y_t))

## LUBE

In [None]:
import torch.nn as nn
import torch.nn.functional as F

n_neurons = 50
eta = 30

class Net(nn.Module):
  def __init__(self, n_features):
    super(Net, self).__init__()
    self.fc1 = nn.Linear(n_features, n_neurons)
    self.fc2 = nn.Linear(n_neurons, 2)
  def forward(self, x):
    x = F.relu(self.fc1(x)) #
    return torch.sigmoid(self.fc2(x)) 


In [None]:
def CWC(y_pred, y_true):
    
    y_pred = Variable(y_pred, requires_grad=True).to(device)
    y_true = Variable(y_true, requires_grad=True).to(device)
    
    u = y_pred.detach().numpy().T[0]
    l = y_pred.detach().numpy().T[1]
    
    u = torch.squeeze(torch.from_numpy(u).float())
    l = torch.squeeze(torch.from_numpy(l).float())
   
    sum = 0
    W = []
    for i in range(len(y_pred)):
        
        Wi = torch.abs(u[i]-l[i]) #)**2 
        W.append(Wi)
        
        if l[i] < y_true[i] < u[i]:
            sum += 1
    
    #calculate PICP: PI coverage probability
    PICP = sum/len(y_true)
    
    #calculate MPIW
    W = np.array(W)
    W = torch.from_numpy(W).float()
    MPIW = torch.sqrt(torch.mean(W))
    
    R = torch.max(y_true)-torch.min(y_true)

    return ((MPIW)/R)*(1+1*math.exp(-eta*(PICP-0.95)))



In [None]:
class UniformSampler(object):
    def __init__(self, minval, maxval, dtype='float', cuda=False):
        self.minval = minval
        self.maxval = maxval
        self.cuda = cuda
        self.dtype_str = dtype
        dtypes = {
            'float': torch.cuda.FloatTensor if cuda else torch.FloatTensor,
            'int': torch.cuda.IntTensor if cuda else torch.IntTensor,
            'long': torch.cuda.LongTensor if cuda else torch.LongTensor
        }
        self.dtype = dtypes[dtype]

    def sample(self, size):
        if self.dtype_str == 'float':
            return self.dtype(*size).uniform_(
                self.minval, self.maxval
            )
        elif self.dtype_str == 'int' or self.dtype_str == 'long':
            return self.dtype(*size).random_(
                self.minval, self.maxval + 1
            )
        else:
            raise Exception("unknown dtype")


class GaussianSampler(object):
    def __init__(self, mu, sigma, dtype='float', cuda=False):
        self.sigma = sigma
        self.mu = mu
        self.cuda = cuda
        self.dtype_str = dtype
        dtypes = {
            'float': torch.cuda.FloatTensor if cuda else torch.FloatTensor,
            'int': torch.cuda.IntTensor if cuda else torch.IntTensor,
            'long': torch.cuda.LongTensor if cuda else torch.LongTensor
        }
        self.dtype = dtypes[dtype]

    def sample(self, size):
        ''' pytorch doesnt support int or long normal distrs
            so we will resolve to casting '''
        rand_float = torch.cuda.FloatTensor if self.cuda else torch.FloatTensor
        rand_block = rand_float(*size).normal_(self.mu, self.sigma)

        if self.dtype_str == 'int' or self.dtype_str == 'long':
            rand_block = rand_block.type(self.dtype)

        return rand_block


class SimulatedAnnealing(Optimizer):
    def __init__(self, params, sampler, tau0=5.0, anneal_rate=0.0003,
                 min_temp=1e-5, anneal_every=10, hard=True, hard_rate=0.95):
        defaults = dict(sampler=sampler, tau0=tau0, tau=tau0, anneal_rate=anneal_rate,
                        min_temp=min_temp, anneal_every=anneal_every,
                        hard=hard, hard_rate=hard_rate, iteration=0)
        super(SimulatedAnnealing, self).__init__(params, defaults)


    def step(self, closure=None):
        """Performs a single optimization step.
        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        if closure is None:
            raise Exception("loss closure is required to do SA")

        loss = closure()

        for group in self.param_groups:
            # the sampler samples randomness
            # that is used in optimizations
            sampler = group['sampler']

            # clone all of the params to keep in case we need to swap back
            cloned_params = [p.clone() for p in group['params']]

            for p in group['params']:
                # anneal tau if it matches the requirements
                if group['iteration'] > 0 and group['iteration'] % group['anneal_every'] == 0:
                    if not group['hard']:
                        # smoother annealing: consider using this over hard annealing
                        rate = -group['anneal_rate'] * group['iteration']
                        group['tau'] = np.maximum(group['tau0'] * np.exp(rate),
                                                  group['min_temp'])
                    else:
                        # hard annealing
                        group['tau'] = np.maximum(group['hard_rate'] * group['tau'],
                                                  group['min_temp'])

                random_perturbation = group['sampler'].sample(p.data.size())
                p.data = p.data / torch.norm(p.data)
                p.data.add_(random_perturbation)
                group['iteration'] += 1

            # re-evaluate the loss function with the perturbed params
            # if we didn't accept the new params, then swap back and return
            loss_perturbed = closure()
            final_loss, is_swapped_back = self.anneal(loss, loss_perturbed, group['tau'])
            if is_swapped_back:
                for p, pbkp in zip(group['params'], cloned_params):
                    p.data = pbkp.data

            return final_loss 


    def anneal(self, loss, loss_perturbed, tau):
        '''returns loss, is_new_loss'''
        def acceptance_prob(old, new, temp):
            return torch.exp((old - new)/(temp))

        if loss_perturbed.data < loss.data:
#             print("old = ", loss.data, "| pert = ", loss_perturbed.data, " | tau = ", tau)
            return loss_perturbed, False
        else:
            # evaluate the metropolis criterion
            ap = acceptance_prob(loss, loss_perturbed, tau)
            random = np.random.rand()
            print("old = ", loss.data, "| new = ", loss_perturbed.data,
                  " | ap = ", ap.data, " | tau = ", tau, " | r = ", random)
            
            if ap.data > random:
                return loss_perturbed, False

            return loss, True
        

In [None]:
def closure():
    y_pred_train = net(X_train_i)
    loss = CWC(y_pred_train, y_train_i)
    return loss

t_loss = []
nets = []

start = time.time()

for i in range(len(dfs)):

    net = Net(X_train[i].shape[1])

    sampler = GaussianSampler(mu=0, sigma=1) #sampler = UniformSampler(minval=-0.5, maxval=0.5)
    optimizer = SimulatedAnnealing(net.parameters(), sampler=sampler)
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    X_train_i = X_train[i].to(device)
    y_train_i = y_train[i].to(device)
    X_test_i = X_test[i].to(device)
    y_test_i = y_test[i].to(device)
    net = net.to(device)
    
    ite = []
    loss_all = []
    
    for epoch in range(1000):
        y_pred_train = net(X_train_i)
        y_pred_train = torch.squeeze(y_pred_train)
        train_loss = CWC(y_pred_train, y_train_i)
        train_loss = train_loss.to(device)


        ite = np.append(ite, epoch)
        loss_all = np.append(loss_all, train_loss.detach().numpy()) 

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step(closure)
        
    
    t_loss.append(loss_all)
    nets.append(net)
    
    
end = time.time()
print((end - start)/len(dfs))

## Evaluation

In [None]:
def RMSE(y_pred, y_test): 
  return sqrt(((y_pred-y_test)**2).mean())

RMSE_all = []
CRPS_all = []

for i in range(len(dfs)):
    
    net = nets[i]
    
    y_pred_train = net(X_train[i])
    y_pred_train= y_pred_train.detach().numpy()
    
    y_train_i = y_train[i].cpu()
    y_train_i = y_train_i.detach().numpy()
    
    
    # For multi-step ahead prediction
    y_45_ = net(X_test[i][0].unsqueeze(0)).detach().numpy()
    y_45 = ((y_45_.T[0] + y_45_.T[1]) / 2)[0]
    y_30_ = net(X_test[i][1].unsqueeze(0)).detach().numpy()
    y_30 = ((y_30_.T[0] + y_30_.T[1]) / 2)[0]
    y_15_ = net(X_test[i][2].unsqueeze(0)).detach().numpy()
    y_15 = ((y_15_.T[0] + y_15_.T[1]) / 2)[0]
    for j in range(3, X_test[i].shape[0]):
        X_test[i][j][-3] = torch.tensor(y_45)
        X_test[i][j][-2] = torch.tensor(y_30)
        X_test[i][j][-1] = torch.tensor(y_15)
        y_pred_j_ = net(X_test[i][j].unsqueeze(0)).detach().numpy()
        y_pred_j = ((y_pred_j_.T[0] + y_pred_j_.T[1]) / 2)[0]
        y_45 = y_30
        y_30 = y_15
        y_15 = y_pred_j
    # end of multi-step ahead
    
    y_pred_test = net(X_test[i])
    y_pred_test= y_pred_test.detach().numpy()
    y_test_i = y_test[i].cpu()
    y_test_i = y_test_i.detach().numpy()
    
    upper_train = y_pred_train.T[0]
    lower_train = y_pred_train.T[1]
    
    upper = y_pred_test.T[0]
    lower = y_pred_test.T[1]
    
    real_y_train = y_scaler[i].inverse_transform(y_train_i.reshape(-1, 1))
    real_y_test = y_scaler[i].inverse_transform(y_test_i.reshape(-1, 1))
    
    upper_train = y_scaler[i].inverse_transform(upper_train.reshape(-1, 1))
    lower_train = y_scaler[i].inverse_transform(lower_train.reshape(-1, 1))
    
    upper = y_scaler[i].inverse_transform(upper.reshape(-1, 1))
    lower = y_scaler[i].inverse_transform(lower.reshape(-1, 1))
    
    real_y_test = real_y_test.flatten()
    real_y_train = real_y_train.flatten()
    
    lower_train = lower_train.flatten()
    upper_train = upper_train.flatten()
    
    lower = lower.flatten()
    upper = upper.flatten()
    
    for j in range(len(lower)):
        if lower[j]<10e-6:
            lower[j]=0
            
    mean = (upper+lower)/2
    std = (mean - lower)/1.96
    
    # Deterministic metrics
    MAE = mean_absolute_error(real_y_test, mean)
    RMSE = mean_squared_error(real_y_test, mean, squared=False)
    MBE = np.mean(mean - real_y_test)
    print(f'MAE: {MAE:.3f}')
    print(f'RMSE: {RMSE:.3f}')
    print(f'MBE: {MBE:.3f}')
    
    # Probabilistic metrics
    PICP = PICP_func(real_y_test, lower, upper)
    PINAW = PINAW_func(real_y_test, lower, upper)
    C = prscore.crps_gaussian(real_y_test, mu=mean, sig=std)
    CRPS = C.mean()
    print(f'PICP: {PICP:.3f}')
    print(f'PINAW: {PINAW:.3f}')
    print(f'CRPS: {CRPS:.3f}')
    print('\n')