In [1]:
# importing libraries for data preprocessing
import pandas as pd 
import numpy as np
from sklearn.preprocessing import MinMaxScaler

#importing libraries for neural network model
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import LSTM

# import libraries for plotting
import cufflinks as cf
import plotly.offline
from  plotly.offline import plot_mpl
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

# import libraries for error metrics
from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# data preprocessing
# reading dataset
data = pd.read_csv("time_series_30min_singleindex.csv")

data['utc_timestamp'] = pd.to_datetime(data['utc_timestamp'])
data['utc_timestamp'] = data['utc_timestamp'].dt.date

In [3]:
# making column datetime
data['utc_timestamp'] = pd.to_datetime(data['utc_timestamp'])

data = data.drop_duplicates(subset='utc_timestamp', keep='first')

# setting inddex
data.set_index('utc_timestamp', inplace=True)
data.head()

Unnamed: 0_level_0,cet_cest_timestamp,CY_load_actual_entsoe_transparency,CY_load_forecast_entsoe_transparency,CY_wind_onshore_generation_actual,FR_load_actual_tso,FR_load_forecast_tso,FR_solar_generation_actual,FR_wind_onshore_generation_actual,GB_EAW_load_actual_tso,GB_GBN_load_actual_entsoe_transparency,...,GB_UKM_wind_onshore_capacity,GB_UKM_wind_onshore_generation_actual,GB_UKM_wind_onshore_profile,IE_load_actual_entsoe_transparency,IE_load_forecast_entsoe_transparency,IE_wind_onshore_generation_actual,IE_sem_load_actual_entsoe_transparency,IE_sem_load_forecast_entsoe_transparency,IE_sem_price_day_ahead,IE_sem_wind_onshore_generation_actual
utc_timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2005-03-31,2005-04-01 01:30:00+0200,,,,,,,,28871.0,,...,1371.0,,,,,,,,,
2005-04-01,2005-04-01 02:00:00+0200,,,,,,,,30340.0,,...,1371.0,,,,,,,,,
2005-04-02,2005-04-02 02:00:00+0200,,,,,,,,28910.0,,...,1371.0,,,,,,,,,
2005-04-03,2005-04-03 02:00:00+0200,,,,,,,,27491.0,,...,1371.0,,,,,,,,,
2005-04-04,2005-04-04 02:00:00+0200,,,,,,,,27054.0,,...,1371.0,,,,,,,,,


In [4]:
# remvoing unwanted data
data = data['GB_EAW_load_actual_tso']
data = data[: int((len(data)*75)/100) ]
data = data[data.index.day == 1]
data = data.astype('float64')
data.head()

utc_timestamp
2005-04-01    30340.0
2005-05-01    25607.0
2005-06-01    24709.0
2005-07-01    24497.0
2005-08-01    22478.0
Name: GB_EAW_load_actual_tso, dtype: float64

In [5]:
# chainging column name
data.columns = ['Energy Production']
data.head()

utc_timestamp
2005-04-01    30340.0
2005-05-01    25607.0
2005-06-01    24709.0
2005-07-01    24497.0
2005-08-01    22478.0
Name: GB_EAW_load_actual_tso, dtype: float64

In [6]:
print('Min', np.min(data))
print('Max', np.max(data))

Min 19188.0
Max 36727.0


In [7]:
# normalizing the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(np.array(data).reshape(-1, 1))

In [8]:
#plotting the data 
data.iplot(title="power", xTitle='TIme Stamp',yTitle='Demand')

In [9]:
print('Min', np.min(scaled))
print('Max', np.max(scaled))

Min 0.0
Max 1.0000000000000002


In [10]:
len(scaled)
# !pip3 install pyramid-arima


127

In [11]:
#create train/test datasets

train_size = int(len(scaled)*.7)
test_size = int(len(scaled - train_size))
train, test = scaled[:train_size, :], scaled[train_size:, :]

print(len(train), len(test))

88 39


In [12]:
#changing data into required window format  
def create_dataset(dataset, look_back = 1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[(i+look_back), 0])
        
    return np.array(dataX), np.array(dataY)

In [13]:
look_back = 12
Xtrain, Ytrain = create_dataset(train, look_back)
Xtest, Ytest = create_dataset(test, look_back)

In [14]:
Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], Xtrain.shape[1], 1))
Xtest = np.reshape(Xtest, (Xtest.shape[0], Xtest.shape[1], 1))
print(Xtrain.shape)
print(Xtest.shape)

(75, 12, 1)
(26, 12, 1)


In [15]:
# Neural network model
batch_size = 1

# model = Sequential()
# layers = [1, 75, 100, prediction_steps]
# model.add(LSTM(layers[1], input_shape=(None, layers[0]), return_sequences=True))  # add first layer
# model.add(Dropout(0.2))  # add dropout for first layer
# model.add(LSTM(layers[2], return_sequences=False))  # add second layer
# model.add(Dropout(0.2))  # add dropout for second layer
# model.add(Dense(layers[3]))  # add output layer
# model.add(Activation('linear'))  # output layer with linear activation
    
    
# model = Sequential()
# model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=1))
# model.add(Dense(1))
# model.compile(loss = 'mean_squared_error', optimizer='adam')
# model.fit(Xtrain, Ytrain, epochs=100, batch_size=batch_size, verbose = 2, shuffle=True)

model = Sequential()
model.add(LSTM(75, input_shape=(None, 1), return_sequences=True))  # add first layer
model.add(Dropout(0.2))  # add dropout for first layer
model.add(LSTM(100, return_sequences=False))  # add second layer
model.add(Dropout(0.2))  # add dropout for second layer
model.add(Dense(1))
model.compile(loss = 'mean_squared_error', optimizer='adam')
model.fit(Xtrain, Ytrain, epochs=50, batch_size=batch_size, verbose = 2, shuffle=True)





Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Epoch 1/50
 - 2s - loss: 0.0825
Epoch 2/50
 - 1s - loss: 0.0660
Epoch 3/50
 - 1s - loss: 0.0628
Epoch 4/50
 - 1s - loss: 0.0519
Epoch 5/50
 - 1s - loss: 0.0247
Epoch 6/50
 - 1s - loss: 0.0202
Epoch 7/50
 - 1s - loss: 0.0145
Epoch 8/50
 - 1s - loss: 0.0129
Epoch 9/50
 - 1s - loss: 0.0147
Epoch 10/50
 - 1s - loss: 0.0149
Epoch 11/50
 - 1s - loss: 0.0138
Epoch 12/50
 - 1s - loss: 0.0149
Epoch 13/50
 - 1s - loss: 0.0119
Epoch 14/50
 - 1s - loss: 0.0105
Epoch 15/50
 - 1s - loss: 0.0167
Epoch 16/50
 - 1s - loss: 0.0115
Epoch 17/50
 - 1s - loss: 0.0143
Epoch 18/50
 - 1s - loss: 0.0125
Epoch 19/50
 - 1s - loss: 0.0113
Epoch 20/50
 - 2s - loss: 0.0228
Epoch 21/50
 - 1s - loss: 0.0098
Epoch 22/50
 - 1s - loss: 0.0149
Epoch 23/50
 - 1s - loss: 0.0124
Epoch 24/50
 - 

<keras.callbacks.History at 0x7f7abbf6e4a8>

In [16]:
# make predictions 

trainPredict = model.predict(Xtrain, batch_size = batch_size)
testPredict = model.predict(Xtest, batch_size = batch_size)

trainPredict = scaler.inverse_transform(trainPredict)
testPredict = scaler.inverse_transform(testPredict)

ytrain = scaler.inverse_transform([Ytrain])
ytest = scaler.inverse_transform([Ytest])
    
trainScore = sqrt(mean_squared_error(ytrain[0], trainPredict[:,0]))    
print("train Score", trainScore)

testScore = sqrt(mean_squared_error(ytest[0], testPredict[:,0]))    
print("train Score", testScore)
print(ytest[0])

train Score 1766.888354634863
train Score 1035.0576673652333
[21355. 19726. 21498. 25122. 28212. 27048. 28160. 28753. 24201. 22884.
 20563. 21058. 20985. 19667. 20830. 22750. 25078. 26351. 27327. 24759.
 23730. 22733. 19188. 21325. 19318. 19440.]


In [17]:
trainPredictPlot = np.empty_like(scaled)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

testPredictPlot = np.empty_like(scaled)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(scaled)-1, :] = testPredict




In [18]:
#plot the two values
testPredictPlot = pd.DataFrame(testPredict,index = data[len(trainPredict)+(look_back*2)+1:len(scaled)-1:].index,columns=['Prediction'])
pd.concat([data[len(trainPredict)+(look_back*2)+1:len(scaled)-1],testPredictPlot],axis=1).iplot(title="Prediction",xTitle='TIme Stamp',yTitle='Demand')

In [19]:
# plotting whole dataset with predictions
pd.concat([data,testPredictPlot],axis=1).iplot()


In [20]:
test_samp = Xtest[0:1]
Fulltest = np.array([])

x = model.predict(test_samp)
x[0]

array([0.09696995], dtype=float32)

In [21]:
#  printing r2 score of our model 
rms = sqrt(mean_squared_error(testPredict, ytest[0]))
print(rms)

1035.0576673652333


In [22]:
#  printing mean square error of our model 
r2 = r2_score(testPredict,ytest[0])
r2

0.8647406838087252

In [23]:
# while len(Fulltest) < len(testPredict):
#     x = model.predict(test_samp)
#     np.concatenate(Fulltest.reshape(1, -1), np.array(x[0][0]).reshape(1, -1));
#     test_samp[0] = test_samp[0][1:]
#     test_samp[0] = test_samp[0].concatenate(np.array(x[0][0]))
