# Predicting Energy Consumption

In the dataset, there are energy levels recorded for 15 different power plants for each hour in the day; i.e., there are 24 variables, in addition to ID columns, such as date and plant ID.

In the temperature dataset, there are temperatures recorded for 9 different stations for each hour in the day, i.e., there are 24 variables. 

This predicition model will simply attempt to predict loads based on loads from the other 23 hours, without considering temperatures as variables


In [109]:
# imports

import numpy as np
import pandas as pd
import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense, Dropout
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns

from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import rmse, aic

In [65]:
df = pd.read_csv(r"C:\Users\Matthew\PycharmProjects\ISDS-7075-Project\final_project\datasets\transposed.csv")

  df = pd.read_csv(r"C:\Users\Matthew\PycharmProjects\ISDS-7075-Project\final_project\datasets\transposed.csv")


In [66]:
df = df.set_index('date')
df.head()

Unnamed: 0_level_0,S1,S2,S3,S4,S5,S6,S7,S8,S9,load,zone_id
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1/1/2015 1:00,45.0,55.0,47.0,53.0,54.0,51.0,53.0,54.0,52.0,66025,1
1/1/2015 2:00,41.0,55.0,45.0,51.0,52.0,51.0,52.0,53.0,53.0,64655,1
1/1/2015 3:00,40.0,54.0,44.0,49.0,50.0,49.0,51.0,50.0,51.0,63898,1
1/1/2015 4:00,39.0,50.0,39.0,45.0,46.0,48.0,47.0,49.0,43.0,64078,1
1/1/2015 5:00,43.0,48.0,39.0,43.0,42.0,49.0,47.0,44.0,39.0,65734,1


In [69]:
# Fixing data types to float

temp_cols = ['S1','S2','S3','S4','S5','S6','S7','S8','S9']

def floater(index):
    for i in index:
        df[index] = pd.to_numeric(df[index], errors='coerce')
        return df[index]
    
S1 = floater('S1')
S2 = floater('S2')
S3 = floater('S3')
S4 = floater('S4')
S5 = floater('S5')
S6 = floater('S6')
S7 = floater('S7')
S8 = floater('S8')
S9 = floater('S9')

df['S1'] = S1
df['S2'] = S2
df['S3'] = S3
df['S4'] = S4
df['S5'] = S5
df['S6'] = S6
df['S7'] = S7
df['S8'] = S8
df['S9'] = S9



In [94]:
# dfLoadHistory = dfRAWLoadHistory[(dfRAWLoadHistory.obsn != "total")&(~dfRAWLoadHistory['h1'].isnull())]

def trainer(index_to_stop):
    df_for_training = df[~df['load'].isnull()]
    df_for_training = df.iloc[0:index_to_stop,0:10].astype(float)
    
    return df_for_training

In [119]:
# calling trainer function for first training set

df_for_training = trainer(154800)
df_for_training.info()

<class 'pandas.core.frame.DataFrame'>
Index: 154800 entries, 1/1/2015 1:00 to 3/5/2016 0:00
Data columns (total 10 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   S1      154440 non-null  float64
 1   S2      154440 non-null  float64
 2   S3      154440 non-null  float64
 3   S4      154440 non-null  float64
 4   S5      154440 non-null  float64
 5   S6      154440 non-null  float64
 6   S7      154440 non-null  float64
 7   S8      154440 non-null  float64
 8   S9      154440 non-null  float64
 9   load    154800 non-null  float64
dtypes: float64(10)
memory usage: 13.0+ MB


In [120]:
# LTSM uses sigmoid and tanh that are sensitive to magnitude, so values need to be normalized
scaler = StandardScaler()
scaler = scaler.fit(df_for_training)
df_for_training_scaled = scaler.transform(df_for_training)


In [136]:
trainX = []
trainY = []


n_future = 7
n_past = 21



for i in range(n_past, len(df_for_training_scaled) - n_future + 1):
    trainX.append(df_for_training_scaled[i - n_past:i, 0:df_for_training.shape[1]])
    trainY.append(df_for_training_scaled[i + n_future - 1:i + n_future, 0])

trainX, trainY = np.array(trainX), np.array(trainY)

In [144]:
# define Autoencoder model

model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(trainY.shape[1]))

model.compile(optimizer='adam', loss='mse')
model.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_24 (LSTM)              (None, 21, 64)            19200     
                                                                 
 lstm_25 (LSTM)              (None, 32)                12416     
                                                                 
 dropout_9 (Dropout)         (None, 32)                0         
                                                                 
 dense_13 (Dense)            (None, 1)                 33        
                                                                 
Total params: 31,649
Trainable params: 31,649
Non-trainable params: 0
_________________________________________________________________


In [145]:
# fit model
history = model.fit(x=trainX, y=trainY, epochs=1, batch_size=64, validation_split=0.2, verbose=1)



In [146]:
# Forecasting
# start with the last day in training set and predict future
n_future = 105
# forecast_period_dates = pd.date_range(list(train_dates)[-1], periods=n_future, freq='1d').tolist()

forecast = model.predict(trainX[-n_future:])



In [147]:
# Perform inverse transformation to rescale back to original range

forecast_copies = np.repeat(forecast, df_for_training.shape[1], axis=-1)
h1_future = scaler.inverse_transform(forecast_copies)[:,0]


print(h1_future)   

[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]
