In [1]:
# importing libraries for data preprocessing
import pandas as pd 
import numpy as np
from sklearn.preprocessing import MinMaxScaler

#importing libraries for neural network model
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import LSTM

# import libraries for plotting
import cufflinks as cf
import plotly.offline
from  plotly.offline import plot_mpl
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

# import libraries for error metrics
from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# data preprocessing
# reading dataset
data = pd.read_csv("upsldc.csv",index_col=0)
data = data[674:1050]
data.head()


Unnamed: 0_level_0,Schedule(MW),Drawl(MW),OD/UD(MW),Demand(MW),Total SSGS(MW),UP Thermal Generation (MW),IPP Thermal Generation (MW),UP Hydro Generation (MW),Co-gen/CPP Generation (MW),RE/Solar Generation (MW),Frequency (Hz),Deviation Rate (Paise/Unit)
Time Stamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2019-04-24 10:17:04.681890,5798,5593,-205,14232,8639,3852,4001,416,600,31,49.92,551.03
2019-04-24 10:17:26.585791,5798,5593,-205,14232,8639,3852,4001,416,600,31,49.92,551.03
2019-04-24 10:17:38.545493,5798,5593,-205,14232,8639,3852,4001,416,600,31,49.92,551.03
2019-04-24 10:18:56.758521,5798,5593,-205,14232,8639,3852,4001,416,600,31,49.92,551.03
2019-04-24 10:19:19.920844,5798,5593,-205,14232,8639,3852,4001,416,600,31,49.92,551.03


In [3]:
# making column datetime
data.index = pd.to_datetime(data.index)
data.tail()

Unnamed: 0_level_0,Schedule(MW),Drawl(MW),OD/UD(MW),Demand(MW),Total SSGS(MW),UP Thermal Generation (MW),IPP Thermal Generation (MW),UP Hydro Generation (MW),Co-gen/CPP Generation (MW),RE/Solar Generation (MW),Frequency (Hz),Deviation Rate (Paise/Unit)
Time Stamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2019-04-25 20:23:54.536126,8843,8747,-96,18818,10072,3861,5336,504,600,31,50.05,0.0
2019-04-25 20:26:20.786947,8843,8701,-142,18812,10110,3879,5361,504,600,31,50.0,335.92
2019-04-25 20:27:19.724482,8843,8701,-142,18812,10110,3879,5361,504,600,31,50.0,335.92
2019-04-25 20:28:42.000203,8843,8701,-142,18812,10110,3879,5361,504,600,31,50.0,335.92
2019-04-25 20:31:14.371810,8843,8701,-142,18811,10110,3866,5378,499,600,31,50.01,243.98


In [4]:
# removing unwanted data
data = data['Demand(MW)']

# changing column type
data = data.astype('float64')

data.head()

Time Stamp
2019-04-24 10:17:04.681890    14232.0
2019-04-24 10:17:26.585791    14232.0
2019-04-24 10:17:38.545493    14232.0
2019-04-24 10:18:56.758521    14232.0
2019-04-24 10:19:19.920844    14232.0
Name: Demand(MW), dtype: float64

In [5]:
print('Min', np.min(data))
print('Max', np.max(data))

Min 14232.0
Max 18822.0


In [6]:
# normalizing the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(np.array(data).reshape(-1, 1))

In [7]:
#plotting the data 
data.iplot(title="power", xTitle='TIme Stamp',yTitle='Demand')

In [8]:
print('Min', np.min(scaled))
print('Max', np.max(scaled))

Min 0.0
Max 0.9999999999999996


In [9]:
len(scaled)
# !pip3 install pyramid-arima


376

In [10]:
#create train/test datasets

train_size = int(len(scaled)*.7)
test_size = int(len(scaled - train_size))
train, test = scaled[:train_size, :], scaled[train_size:, :]

print(len(train), len(test))

263 113


In [11]:
#changing data into required window format  
def create_dataset(dataset, look_back = 1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[(i+look_back), 0])
        
    return np.array(dataX), np.array(dataY)

In [12]:
look_back = 12
Xtrain, Ytrain = create_dataset(train, look_back)
Xtest, Ytest = create_dataset(test, look_back)

In [13]:
Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], Xtrain.shape[1], 1))
Xtest = np.reshape(Xtest, (Xtest.shape[0], Xtest.shape[1], 1))
print(Xtrain.shape)
print(Xtest.shape)

(250, 12, 1)
(100, 12, 1)


In [14]:
# Neural network model
batch_size = 1

# model = Sequential()
# layers = [1, 75, 100, prediction_steps]
# model.add(LSTM(layers[1], input_shape=(None, layers[0]), return_sequences=True))  # add first layer
# model.add(Dropout(0.2))  # add dropout for first layer
# model.add(LSTM(layers[2], return_sequences=False))  # add second layer
# model.add(Dropout(0.2))  # add dropout for second layer
# model.add(Dense(layers[3]))  # add output layer
# model.add(Activation('linear'))  # output layer with linear activation
    
    
# model = Sequential()
# model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=1))
# model.add(Dense(1))
# model.compile(loss = 'mean_squared_error', optimizer='adam')
# model.fit(Xtrain, Ytrain, epochs=100, batch_size=batch_size, verbose = 2, shuffle=True)

model = Sequential()
model.add(LSTM(75, input_shape=(None, 1), return_sequences=True))  # add first layer
model.add(Dropout(0.2))  # add dropout for first layer
model.add(LSTM(100, return_sequences=False))  # add second layer
model.add(Dropout(0.2))  # add dropout for second layer
model.add(Dense(1))
model.compile(loss = 'mean_squared_error', optimizer='adam')
model.fit(Xtrain, Ytrain, epochs=50, batch_size=batch_size, verbose = 2, shuffle=True)





Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Epoch 1/50
 - 4s - loss: 0.0247
Epoch 2/50
 - 3s - loss: 0.0155
Epoch 3/50
 - 3s - loss: 0.0106
Epoch 4/50
 - 3s - loss: 0.0099
Epoch 5/50
 - 3s - loss: 0.0096
Epoch 6/50
 - 3s - loss: 0.0080
Epoch 7/50
 - 3s - loss: 0.0086
Epoch 8/50
 - 3s - loss: 0.0084
Epoch 9/50
 - 3s - loss: 0.0073
Epoch 10/50
 - 3s - loss: 0.0072
Epoch 11/50
 - 3s - loss: 0.0068
Epoch 12/50
 - 4s - loss: 0.0061
Epoch 13/50
 - 4s - loss: 0.0062
Epoch 14/50
 - 4s - loss: 0.0065
Epoch 15/50
 - 4s - loss: 0.0065
Epoch 16/50
 - 3s - loss: 0.0057
Epoch 17/50
 - 3s - loss: 0.0064
Epoch 18/50
 - 3s - loss: 0.0056
Epoch 19/50
 - 3s - loss: 0.0056
Epoch 20/50
 - 3s - loss: 0.0050
Epoch 21/50
 - 3s - loss: 0.0052
Epoch 22/50
 - 3s - loss: 0.0056
Epoch 23/50
 - 3s - loss: 0.0057
Epoch 24/50
 - 

<keras.callbacks.History at 0x7f84ccde77f0>

In [15]:
# make predictions 

trainPredict = model.predict(Xtrain, batch_size = batch_size)
testPredict = model.predict(Xtest, batch_size = batch_size)

trainPredict = scaler.inverse_transform(trainPredict)
testPredict = scaler.inverse_transform(testPredict)

ytrain = scaler.inverse_transform([Ytrain])
ytest = scaler.inverse_transform([Ytest])
    
trainScore = sqrt(mean_squared_error(ytrain[0], trainPredict[:,0]))    
print("train Score", trainScore)

testScore = sqrt(mean_squared_error(ytest[0], testPredict[:,0]))    
print("train Score", testScore)
print(ytest[0])

train Score 273.03068471596157
train Score 334.54048322233933
[15615. 15615. 15615. 15615. 15615. 15615. 15615. 15615. 15615. 15615.
 15463. 15463. 15463. 15463. 15463. 15463. 15463. 15463. 15463. 15463.
 15480. 15480. 15480. 15480. 15480. 15480. 15480. 15480. 15480. 15480.
 15384. 15384. 15384. 15384. 15384. 15384. 15384. 15384. 15384. 15384.
 15400. 15400. 15400. 15400. 15400. 15400. 15400. 15400. 15400. 15400.
 15505. 15505. 15505. 15505. 15505. 18658. 18658. 18658. 18658. 18672.
 18672. 18672. 18672. 18672. 18672. 18672. 18672. 18672. 18781. 18781.
 18781. 18781. 18781. 18781. 18781. 18781. 18822. 18822. 18822. 18822.
 18822. 18822. 18822. 18819. 18819. 18819. 18819. 18819. 18819. 18820.
 18820. 18820. 18820. 18818. 18818. 18818. 18818. 18812. 18812. 18812.]


In [16]:
trainPredictPlot = np.empty_like(scaled)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

testPredictPlot = np.empty_like(scaled)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(scaled)-1, :] = testPredict




In [17]:
#plot the two values
testPredictPlot = pd.DataFrame(testPredict,index = data[len(trainPredict)+(look_back*2)+1:len(scaled)-1:].index,columns=['Prediction'])
pd.concat([data[len(trainPredict)+(look_back*2)+1:len(scaled)-1],testPredictPlot],axis=1).iplot(title="Prediction",xTitle='TIme Stamp',yTitle='Demand')

In [18]:
# plotting whole dataset with predictions
pd.concat([data,testPredictPlot],axis=1).iplot()


In [19]:
test_samp = Xtest[0:1]
Fulltest = np.array([])

x = model.predict(test_samp)
x[0]

array([0.31633314], dtype=float32)

In [20]:
#  printing r2 score of our model 
rms = sqrt(mean_squared_error(testPredict, ytest[0]))
print(rms)

334.54048322233933


In [21]:
#  printing mean square error of our model 
r2 = r2_score(testPredict,ytest[0])
r2

0.9544725128345691

In [22]:
# while len(Fulltest) < len(testPredict):
#     x = model.predict(test_samp)
#     np.concatenate(Fulltest.reshape(1, -1), np.array(x[0][0]).reshape(1, -1));
#     test_samp[0] = test_samp[0][1:]
#     test_samp[0] = test_samp[0].concatenate(np.array(x[0][0]))
