## 1. Imports modules

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

## 2. Load the data

In [2]:
df = pd.read_excel('./data/data.xlsx', skiprows=0)
del df['Unnamed: 0']

In [3]:
df.columns

Index(['date', 'value', 'electricity demand',
       'generation of energy from wind sources', 'is_weekend',
       'code of the day', 'value lag24', 'value lag48', 'value lag72',
       'value lag96', 'value lag120', 'value lag144', 'value lag168',
       'value lag336', 'generation of energy from wind sources lag24',
       'generation of energy from wind sources lag48',
       'generation of energy from wind sources lag72',
       'generation of energy from wind sources lag96',
       'generation of energy from wind sources lag120',
       'generation of energy from wind sources lag144',
       'generation of energy from wind sources lag168',
       'generation of energy from wind sources lag336',
       'electricity demand lag24', 'electricity demand lag48',
       'electricity demand lag72', 'electricity demand lag96',
       'electricity demand lag120', 'electricity demand lag144',
       'electricity demand lag168', 'electricity demand lag336'],
      dtype='object')

In [4]:
df = df.set_index("date", drop=True)

In [5]:
df

Unnamed: 0_level_0,value,electricity demand,generation of energy from wind sources,is_weekend,code of the day,value lag24,value lag48,value lag72,value lag96,value lag120,...,generation of energy from wind sources lag168,generation of energy from wind sources lag336,electricity demand lag24,electricity demand lag48,electricity demand lag72,electricity demand lag96,electricity demand lag120,electricity demand lag144,electricity demand lag168,electricity demand lag336
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-01 01:00:00,117.00,14586,3938,1,0,98.00,121.03,121.22,125.60,113.52,...,5042,130,15048,15984,16093,16198,14063,13451,13482,16716
2018-01-01 02:00:00,113.59,14453,3876,1,0,82.00,121.03,120.00,120.42,113.00,...,5091,128,14295,15325,15383,15378,13525,12903,12995,16189
2018-01-01 03:00:00,97.00,13692,3897,1,0,76.14,121.03,119.60,116.30,105.00,...,4999,119,14110,14971,15229,15136,13204,12755,12587,16108
2018-01-01 04:00:00,89.00,13329,4091,1,0,74.70,121.03,119.60,116.30,111.39,...,4884,116,13961,14920,15160,15055,13243,12638,12276,16250
2018-01-01 05:00:00,75.00,13168,4197,1,0,73.78,121.03,121.22,119.20,117.54,...,4673,110,13910,15013,15372,15158,13319,12787,12228,16819
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-27 19:00:00,198.81,18254,5645,1,1,204.91,238.56,180.00,330.20,295.39,...,2653,197,17184,16568,16697,24065,24401,24650,20594,20903
2020-12-27 20:00:00,195.00,18242,5669,1,1,212.91,226.30,155.10,314.38,270.51,...,2611,235,17339,16608,16535,23724,24123,24361,20472,20810
2020-12-27 21:00:00,186.03,18021,5518,1,1,212.91,220.44,130.00,248.57,249.08,...,2435,250,17077,16381,16286,23029,23324,23697,20087,20090
2020-12-27 22:00:00,174.75,17370,5625,1,1,207.89,214.67,122.77,229.32,233.19,...,2266,252,16479,16050,16124,21560,21806,22177,19237,19173


## 3. Standardize the data 

In [11]:
target = "value"
target_mean = df[target].mean()
target_stdev = df[target].std()

for c in df.columns:
    mean = df[c].mean()
    stdev = df[c].std()

    df[c] = (df[c] - mean) / stdev
    

## 4. Implementation of MLP model

In [15]:
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split


y = df[[target]]
X = df.drop(columns=[target])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.0805, shuffle=False)

NameError: name 'target' is not defined

In [14]:
model = MLPRegressor(random_state=1, activation='identity',  learning_rate='constant', learning_rate_init=0.0003,
                     early_stopping=True,  validation_fraction=0.0917, verbose=False, power_t=0.5, batch_size=32, alpha=0.0005, shuffle=False,
                      max_iter=60, solver='adam',  hidden_layer_sizes=(128,2)).fit(X_train, y_train)


In [26]:
prediction = model.predict(X_test)
result = pd.DataFrame(y_test)
result["model forecast"] = prediction
result = result.sort_index()


for c in result.columns:
    result[c] = result[c] * target_stdev + target_mean


In [27]:
df_result

Unnamed: 0,date,value,Model forecast
0,2020-10-01 02:00:00,203.01,210.618177
1,2020-10-01 03:00:00,203.00,210.526457
2,2020-10-01 04:00:00,203.00,213.468515
3,2020-10-01 05:00:00,202.33,215.185313
4,2020-10-01 06:00:00,205.73,222.789610
...,...,...,...
2105,2020-12-27 19:00:00,198.81,170.737333
2106,2020-12-27 20:00:00,195.00,167.450032
2107,2020-12-27 21:00:00,186.03,160.609820
2108,2020-12-27 22:00:00,174.75,150.790470


In [28]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error

def calculate_metrics(df):
    return {'mae' : mean_absolute_error(df['value'], df['model forecast']),
            'rmse' : mean_squared_error(df['value'], df['model forecast']) ** 0.5,
            'r2' : r2_score(df['value'], df['model forecast']),
             'mape' : mean_absolute_percentage_error(df['value'], df['model forecast'])*100}


result_metrics = calculate_metrics(df_result)

In [29]:
result_metrics

{'mae': 16.93343772421987,
 'rmse': 21.61744232645578,
 'r2': 0.8624654892991134,
 'mape': 7.48362889170653}

## 5. Implementation of Prophet model

In [6]:
from prophet import Prophet

model = Prophet(weekly_seasonality=False, daily_seasonality=False, seasonality_mode='additive')

In [7]:
ex_features = ['electricity demand',
               'generation of energy from wind sources', 
               'is_weekend',
               'code of the day', 
               'value lag24', 
               'value lag48', 
               'value lag72',
               'value lag96',
               'value lag120', 
               'value lag144', 
               'value lag168',
               'value lag336', 
               'electricity demand lag24', 
               'electricity demand lag48',
               'electricity demand lag72', 
               'electricity demand lag96',
               'electricity demand lag120', 
               'electricity demand lag144',
               'electricity demand lag168', 
               'electricity demand lag336',
               'generation of energy from wind sources lag24',
               'generation of energy from wind sources lag48',
               'generation of energy from wind sources lag72',
               'generation of energy from wind sources lag96',
               'generation of energy from wind sources lag120',
               'generation of energy from wind sources lag144',
               'generation of energy from wind sources lag168',
               'generation of energy from wind sources lag336']


In [8]:
df = df.reset_index()

In [9]:
df.columns

Index(['date', 'value', 'electricity demand',
       'generation of energy from wind sources', 'is_weekend',
       'code of the day', 'value lag24', 'value lag48', 'value lag72',
       'value lag96', 'value lag120', 'value lag144', 'value lag168',
       'value lag336', 'generation of energy from wind sources lag24',
       'generation of energy from wind sources lag48',
       'generation of energy from wind sources lag72',
       'generation of energy from wind sources lag96',
       'generation of energy from wind sources lag120',
       'generation of energy from wind sources lag144',
       'generation of energy from wind sources lag168',
       'generation of energy from wind sources lag336',
       'electricity demand lag24', 'electricity demand lag48',
       'electricity demand lag72', 'electricity demand lag96',
       'electricity demand lag120', 'electricity demand lag144',
       'electricity demand lag168', 'electricity demand lag336'],
      dtype='object')

In [10]:
train_end = "2020-07-01 01:00:00"
test_start = "2020-10-01 01:00:00"

df_train = df.loc[:train_end]
df_test = df.loc[test_start:]

In [11]:
df_train

Unnamed: 0,date,value,electricity demand,generation of energy from wind sources,is_weekend,code of the day,value lag24,value lag48,value lag72,value lag96,...,generation of energy from wind sources lag168,generation of energy from wind sources lag336,electricity demand lag24,electricity demand lag48,electricity demand lag72,electricity demand lag96,electricity demand lag120,electricity demand lag144,electricity demand lag168,electricity demand lag336
0,2018-01-01 01:00:00,117.00,14586,3938,1,0,98.00,121.03,121.22,125.60,...,5042,130,15048,15984,16093,16198,14063,13451,13482,16716
1,2018-01-01 02:00:00,113.59,14453,3876,1,0,82.00,121.03,120.00,120.42,...,5091,128,14295,15325,15383,15378,13525,12903,12995,16189
2,2018-01-01 03:00:00,97.00,13692,3897,1,0,76.14,121.03,119.60,116.30,...,4999,119,14110,14971,15229,15136,13204,12755,12587,16108
3,2018-01-01 04:00:00,89.00,13329,4091,1,0,74.70,121.03,119.60,116.30,...,4884,116,13961,14920,15160,15055,13243,12638,12276,16250
4,2018-01-01 05:00:00,75.00,13168,4197,1,0,73.78,121.03,121.22,119.20,...,4673,110,13910,15013,15372,15158,13319,12787,12228,16819
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20195,2020-04-21 12:00:00,181.60,19980,1060,0,1,172.00,144.00,172.00,166.00,...,3335,1455,20020,15639,17621,19615,20008,20171,19316,18863
20196,2020-04-21 13:00:00,180.00,19877,1164,0,1,172.00,145.13,164.00,163.00,...,3262,1537,19907,15551,17439,19426,19778,20112,19341,18855
20197,2020-04-21 14:00:00,180.00,19469,1220,0,1,165.00,150.00,163.08,153.00,...,3459,1646,19559,15567,17013,19049,19159,19915,19129,18483
20198,2020-04-21 15:00:00,165.00,18880,1268,0,1,140.99,145.27,157.37,138.00,...,3663,1555,19040,15357,16360,18697,18852,19212,18568,18063


In [12]:
for feature in ex_features:
    model.add_regressor(feature)
    
model.fit(df_train[["date", "value"] + ex_features].rename(columns={"date": "ds", "value": "y"}))

forecast = model.predict(df_test[["date", "value"] + ex_features].rename(columns={"date": "ds"}))

forecast_ = forecast.loc[:, forecast.columns.intersection(['ds','yhat'])]
forecast_ = forecast_.rename(columns={"ds": "date"})
df_test_ =  df_test.loc[:, df_test.columns.intersection(['date','value'])]
new_df = pd.merge(df_test_, forecast_, how='left', on=['date'])




In [14]:
new_df

Unnamed: 0_level_0,value,yhat
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-04-21 17:00:00,160.00,158.111155
2020-04-21 18:00:00,162.28,159.347274
2020-04-21 19:00:00,140.67,165.231429
2020-04-21 20:00:00,166.61,182.578530
2020-04-21 21:00:00,174.95,192.659549
...,...,...
2020-12-27 19:00:00,198.81,136.465919
2020-12-27 20:00:00,195.00,134.094079
2020-12-27 21:00:00,186.03,127.796291
2020-12-27 22:00:00,174.75,117.228991


In [13]:
new_df = new_df.set_index("date")

for c in new_df.columns:
    new_df[c] = new_df[c] * target_stdev + target_mean
    
df_result = new_df

df_result = df_result.reset_index(drop=False)
df_result = df_result.rename(columns={"yhat": "model forecast"})

NameError: name 'target_stdev' is not defined

In [None]:

def calculate_metrics(df):
    return {'mae' : mean_absolute_error(df['value'], df['model forecast']),
            'rmse' : mean_squared_error(df['value'], df['model forecast']) ** 0.5,
            'r2' : r2_score(df['value'], df['model forecast']),
             'mape' : mean_absolute_percentage_error(df['value'], df['model forecast'])*100}


result_metrics = calculate_metrics(df_result)