## Univariate model for deployment

This notebook develops the datasets, persistance model functions, and univariate LSTM model used in V1 of the energy-dashboard.

In [37]:
import pandas as pd
import tensorflow as tf
from entsoe import EntsoePandasClient
import os
pd.set_option('min_rows', 100)

In [3]:
#download training dataset
train_path = '~/github-repos/energy-dashboard/data/training'

train_start = '20150101'
train_end = '20181231'

client = EntsoePandasClient(api_key='909addb7-e4ae-4702-acc7-6b4f4fd9667b')
train = client.query_load("ES", 
                         start=pd.Timestamp(f"{train_start}T0000", tz='UTC'), 
                         end=pd.Timestamp(f"{train_end}T2300", tz='UTC'))

train.to_csv(os.path.join(train_path, 'training-2015-2018.csv'))


In [10]:
print(len(train))
print(train.head())
print(train.tail())
train.describe()

35032
2015-01-01 01:00:00+01:00    24382.0
2015-01-01 02:00:00+01:00    22734.0
2015-01-01 03:00:00+01:00    21286.0
2015-01-01 04:00:00+01:00    20264.0
2015-01-01 05:00:00+01:00    19905.0
dtype: float64
2018-12-31 19:00:00+01:00    30653.0
2018-12-31 20:00:00+01:00    29735.0
2018-12-31 21:00:00+01:00    28071.0
2018-12-31 22:00:00+01:00    25801.0
2018-12-31 23:00:00+01:00    24455.0
dtype: float64


count    35032.000000
mean     28696.974252
std       4575.226062
min      18041.000000
25%      24807.000000
50%      28901.500000
75%      32192.000000
max      41015.000000
dtype: float64

In [11]:
train.isnull().sum()

0

In [12]:
#download the testing dataset
test_path = '~/github-repos/energy-dashboard/data/test'

test_start = '20150101'
test_end = '20181231'

client = EntsoePandasClient(api_key='909addb7-e4ae-4702-acc7-6b4f4fd9667b')
test = client.query_load("ES", 
                         start=pd.Timestamp(f"{train_start}T0000", tz='UTC'), 
                         end=pd.Timestamp(f"{train_end}T2300", tz='UTC'))

test.to_csv(os.path.join(test_path, 'test-2019.csv'))

### Persistance 3 day MA

In [38]:
def persistance_day_ma(series, num_days):
    
    window=24*num_days
    rolling_mean = series.rolling(window=window, min_periods=24, closed='right').mean()
    
    return rolling_mean[-24:]
    
persist_3_day = persistance_day_ma(train, 3)

In [39]:
persist_3_day

2018-12-31 00:00:00+01:00    26834.541667
2018-12-31 01:00:00+01:00    26821.652778
2018-12-31 02:00:00+01:00    26808.763889
2018-12-31 03:00:00+01:00    26794.263889
2018-12-31 04:00:00+01:00    26778.625000
2018-12-31 05:00:00+01:00    26760.902778
2018-12-31 06:00:00+01:00    26734.958333
2018-12-31 07:00:00+01:00    26693.930556
2018-12-31 08:00:00+01:00    26644.805556
2018-12-31 09:00:00+01:00    26596.458333
2018-12-31 10:00:00+01:00    26552.333333
2018-12-31 11:00:00+01:00    26504.458333
2018-12-31 12:00:00+01:00    26453.388889
2018-12-31 13:00:00+01:00    26400.375000
2018-12-31 14:00:00+01:00    26354.263889
2018-12-31 15:00:00+01:00    26308.819444
2018-12-31 16:00:00+01:00    26260.402778
2018-12-31 17:00:00+01:00    26209.152778
2018-12-31 18:00:00+01:00    26172.263889
2018-12-31 19:00:00+01:00    26143.444444
2018-12-31 20:00:00+01:00    26103.069444
2018-12-31 21:00:00+01:00    26046.347222
2018-12-31 22:00:00+01:00    25982.083333
2018-12-31 23:00:00+01:00    25932

### Persitance 3 days hour by hour

In [41]:
series = train[-72:]

In [85]:
days = series.index.day.unique()
hours = series.index.hour[:24]
df = pd.DataFrame(series.values.reshape((3,24)), index=days, columns=hours)

In [86]:
df.mean()

0     24981.000000
1     22844.666667
2     21424.666667
3     20651.333333
4     20397.666667
5     20548.666667
6     21433.666667
7     22945.333333
8     24520.666667
9     26813.666667
10    28710.000000
11    29041.333333
12    28527.666667
13    28450.000000
14    28021.000000
15    26942.000000
16    26268.666667
17    26535.666667
18    29066.333333
19    30048.333333
20    30198.666667
21    29672.666667
22    28099.333333
23    26240.333333
dtype: float64

In [84]:



df = train.tz_convert(None)
len(df.tz_localize('CET', ambiguous='NaT', nonexistent='NaT')['2015-10-26'])

24

In [60]:
len(train['2018-03-25'])

23

In [96]:
def persistance_MA_hourly(series, days):
    
    df = pd.DataFrame(series.values.reshape((days,24)))
    print(type(df))
    mean = df.mean()
    mean.index = series.index[-24:]
    
    return mean
    
    

In [97]:
persistance_MA_hourly(train[-72:], 3)

<class 'pandas.core.frame.DataFrame'>


2018-12-31 00:00:00+01:00    24981.000000
2018-12-31 01:00:00+01:00    22844.666667
2018-12-31 02:00:00+01:00    21424.666667
2018-12-31 03:00:00+01:00    20651.333333
2018-12-31 04:00:00+01:00    20397.666667
2018-12-31 05:00:00+01:00    20548.666667
2018-12-31 06:00:00+01:00    21433.666667
2018-12-31 07:00:00+01:00    22945.333333
2018-12-31 08:00:00+01:00    24520.666667
2018-12-31 09:00:00+01:00    26813.666667
2018-12-31 10:00:00+01:00    28710.000000
2018-12-31 11:00:00+01:00    29041.333333
2018-12-31 12:00:00+01:00    28527.666667
2018-12-31 13:00:00+01:00    28450.000000
2018-12-31 14:00:00+01:00    28021.000000
2018-12-31 15:00:00+01:00    26942.000000
2018-12-31 16:00:00+01:00    26268.666667
2018-12-31 17:00:00+01:00    26535.666667
2018-12-31 18:00:00+01:00    29066.333333
2018-12-31 19:00:00+01:00    30048.333333
2018-12-31 20:00:00+01:00    30198.666667
2018-12-31 21:00:00+01:00    29672.666667
2018-12-31 22:00:00+01:00    28099.333333
2018-12-31 23:00:00+01:00    26240

### Train a simple univeraite LSTM