In [1]:
# importing libraries for data preprocessing
import pandas as pd 
import numpy as np
from sklearn.preprocessing import MinMaxScaler

#importing libraries for neural network model
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import LSTM

# import libraries for plotting
import cufflinks as cf
import plotly.offline
from  plotly.offline import plot_mpl
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

# import libraries for error metrics
from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# data preprocessing
# reading dataset
data = pd.read_csv("SMARD_Realisierter_Stromverbrauch_201811170000_202011122345_1.csv")
data = data.drop_duplicates(subset='Date', keep='first')
data.set_index('Date', inplace=True)
data.head()

Unnamed: 0_level_0,Time,Demand[MWh]
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
17.11.2018,00:00,118
18.11.2018,00:00,995
19.11.2018,00:00,985
20.11.2018,00:00,11425
21.11.2018,00:00,1265


In [3]:
# making column datetime
data.index = pd.to_datetime(data.index)

# removing unwanted data and sorting
data = data['Demand[MWh]']
data.columns = ['Energy Production']
data = data.sort_index()

print(len(data))
data = data['2019-01-01 00:00':'2019-06-01 00:00,']
data.tail()

365


Date
2019-05-28    12925
2019-05-29    12625
2019-05-30     1245
2019-05-31      110
2019-06-01    15525
Name: Demand[MWh], dtype: int64

In [4]:
# chainging column name
data.columns = ['Energy Production']
data.head()

Date
2019-01-01    13225
2019-01-02        0
2019-01-03    10725
2019-01-04     1005
2019-01-05    12125
Name: Demand[MWh], dtype: int64

In [5]:
print('Min', np.min(data))
print('Max', np.max(data))

Min 0
Max 17375


In [6]:
# normalizing the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(np.array(data).reshape(-1, 1))


Data with input dtype int64 was converted to float64 by MinMaxScaler.



In [24]:
#plotting the data 
data.iplot(title="power", xTitle='TIme Stamp',yTitle='Demand')

In [8]:
print('Min', np.min(scaled))
print('Max', np.max(scaled))

Min 0.0
Max 1.0


In [9]:
len(scaled)
# !pip3 install pyramid-arima


147

In [10]:
#create train/test datasets

train_size = int(len(scaled)*.7)
test_size = int(len(scaled - train_size))
train, test = scaled[:train_size, :], scaled[train_size:, :]

print(len(train), len(test))

102 45


In [11]:
#changing data into required window format  
def create_dataset(dataset, look_back = 1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[(i+look_back), 0])
        
    return np.array(dataX), np.array(dataY)

In [12]:
look_back = 12
Xtrain, Ytrain = create_dataset(train, look_back)
Xtest, Ytest = create_dataset(test, look_back)

In [13]:
Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], Xtrain.shape[1], 1))
Xtest = np.reshape(Xtest, (Xtest.shape[0], Xtest.shape[1], 1))
print(Xtrain.shape)
print(Xtest.shape)

(89, 12, 1)
(32, 12, 1)


In [14]:
# Neural network model
batch_size = 1

# model = Sequential()
# layers = [1, 75, 100, prediction_steps]
# model.add(LSTM(layers[1], input_shape=(None, layers[0]), return_sequences=True))  # add first layer
# model.add(Dropout(0.2))  # add dropout for first layer
# model.add(LSTM(layers[2], return_sequences=False))  # add second layer
# model.add(Dropout(0.2))  # add dropout for second layer
# model.add(Dense(layers[3]))  # add output layer
# model.add(Activation('linear'))  # output layer with linear activation
    
    
# model = Sequential()
# model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=1))
# model.add(Dense(1))
# model.compile(loss = 'mean_squared_error', optimizer='adam')
# model.fit(Xtrain, Ytrain, epochs=100, batch_size=batch_size, verbose = 2, shuffle=True)

model = Sequential()
model.add(LSTM(75, input_shape=(None, 1), return_sequences=True))  # add first layer
model.add(Dropout(0.2))  # add dropout for first layer
model.add(LSTM(100, return_sequences=False))  # add second layer
model.add(Dropout(0.2))  # add dropout for second layer
model.add(Dense(1))
model.compile(loss = 'mean_squared_error', optimizer='adam')
model.fit(Xtrain, Ytrain, epochs=50, batch_size=batch_size, verbose = 2, shuffle=True)





Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Epoch 1/50
 - 2s - loss: 0.1619
Epoch 2/50
 - 1s - loss: 0.1493
Epoch 3/50
 - 1s - loss: 0.1419
Epoch 4/50
 - 1s - loss: 0.1492
Epoch 5/50
 - 1s - loss: 0.1505
Epoch 6/50
 - 1s - loss: 0.1469
Epoch 7/50
 - 1s - loss: 0.1482
Epoch 8/50
 - 1s - loss: 0.1451
Epoch 9/50
 - 1s - loss: 0.1460
Epoch 10/50
 - 1s - loss: 0.1461
Epoch 11/50
 - 1s - loss: 0.1464
Epoch 12/50
 - 1s - loss: 0.1475
Epoch 13/50
 - 1s - loss: 0.1467
Epoch 14/50
 - 1s - loss: 0.1413
Epoch 15/50
 - 1s - loss: 0.1463
Epoch 16/50
 - 1s - loss: 0.1419
Epoch 17/50
 - 1s - loss: 0.1420
Epoch 18/50
 - 1s - loss: 0.1412
Epoch 19/50
 - 1s - loss: 0.1440
Epoch 20/50
 - 1s - loss: 0.1429
Epoch 21/50
 - 1s - loss: 0.1469
Epoch 22/50
 - 1s - loss: 0.1442
Epoch 23/50
 - 1s - loss: 0.1432
Epoch 24/50
 - 

<keras.callbacks.History at 0x7f24f040af98>

In [26]:
# make predictions 

trainPredict = model.predict(Xtrain, batch_size = batch_size)
testPredict = model.predict(Xtest, batch_size = batch_size)

trainPredict = scaler.inverse_transform(trainPredict)
testPredict = scaler.inverse_transform(testPredict)

ytrain = scaler.inverse_transform([Ytrain])
ytest = scaler.inverse_transform([Ytest])
    
trainScore = sqrt(mean_squared_error(ytrain[0], trainPredict[:,0]))    
print("train Score", trainScore)

testScore = sqrt(mean_squared_error(ytest[0], testPredict[:,0]))    
print("train Score", testScore)
print(ytest[0])

train Score 6374.07568266362
train Score 5184.047509006994
[ 8175.  8825.     0.     0.  9125.   134.  8125. 12525.   128.    99.
   103.   111.    98.    96.   575.     0.  7725.   965.  1285.   125.
 11225.   116.  1175.  1325. 13475.  1315. 11575. 10875. 12925. 12625.
  1245.   110.]


In [27]:
trainPredictPlot = np.empty_like(scaled)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

testPredictPlot = np.empty_like(scaled)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(scaled)-1, :] = testPredict




In [28]:
#plot the two values
testPredictPlot = pd.DataFrame(testPredict,index = data[len(trainPredict)+(look_back*2)+1:len(scaled)-1:].index,columns=['Prediction'])
pd.concat([data[len(trainPredict)+(look_back*2)+1:len(scaled)-1],testPredictPlot],axis=1).iplot(title="Prediction",xTitle='TIme Stamp',yTitle='Demand')

In [18]:
# plotting whole dataset with predictions
pd.concat([data,testPredictPlot],axis=1).iplot()


In [19]:
test_samp = Xtest[0:1]
Fulltest = np.array([])

x = model.predict(test_samp)
x[0]

array([0.2853029], dtype=float32)

In [20]:
#  printing r2 score of our model 
rms = sqrt(mean_squared_error(testPredict, ytest[0]))
print(rms)

5184.047509006994


In [21]:
#  printing mean square error of our model 
r2 = r2_score(testPredict,ytest[0])
r2

-56.487448967425436

In [22]:
# while len(Fulltest) < len(testPredict):
#     x = model.predict(test_samp)
#     np.concatenate(Fulltest.reshape(1, -1), np.array(x[0][0]).reshape(1, -1));
#     test_samp[0] = test_samp[0][1:]
#     test_samp[0] = test_samp[0].concatenate(np.array(x[0][0]))
