In [6]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource,Panel, Tabs
from bokeh.palettes import Spectral3
from bokeh.layouts import column, row, gridplot
from math import sqrt
from sklearn.metrics import mean_squared_error,mean_absolute_error

output_file('Test_Prediction.html')



In [8]:
data = pd.read_csv('../Data/DrahiX_Data.csv', index_col=0, parse_dates=True)
data = data.resample('15Min').mean()
data = data.fillna(method='ffill')
data = data.iloc[int(len(data)*0.75):]


Consumption = data['T1']+data['T2']+data['T3']+data['T4']

df12 = data.iloc[:]
df24 = data.iloc[:]
df96 = data.iloc[:]

df12.index = df12.index + pd.Timedelta(minutes=15*12)
df24.index = df24.index + pd.Timedelta(minutes=15*24)
df96.index = df96.index + pd.Timedelta(minutes=15*96)

#Adding new features so that we can use the date and time in the model
df12['day of the week'] = df12.index.dayofweek
df12['day of the year'] = df12.index.dayofyear
df12['hour of the day'] = df12.index.hour
df12['minute of the hour'] = df12.index.minute
df12["Consumption"] = Consumption

#Adding new features so that we can use the date and time in the model
df24['day of the week'] = df24.index.dayofweek
df24['day of the year'] = df24.index.dayofyear
df24['hour of the day'] = df24.index.hour
df24['minute of the hour'] = df24.index.minute
df24["Consumption"] = Consumption

#Adding new features so that we can use the date and time in the model
df96['day of the week'] = df96.index.dayofweek
df96['day of the year'] = df96.index.dayofyear
df96['hour of the day'] = df96.index.hour
df96['minute of the hour'] = df96.index.minute
df96["Consumption"] = Consumption

df12 = df12.iloc[:-12]
df12 = df12.drop(columns = ['TGBT','T1','T2','T3','T4','T5','T6','T7'])

df24 = df24.iloc[:-24]
df24 = df24.drop(columns = ['TGBT','T1','T2','T3','T4','T5','T6','T7'])

df96 = df96.iloc[:-96]
df96 = df96.drop(columns = ['TGBT','T1','T2','T3','T4','T5','T6','T7'])


# Test Models

In [49]:
def predict(data, size):

    tf.compat.v1.reset_default_graph()
    sequence_length = int(size)  

    #Organizing the data and keeping only the columns that will be used in the model
    features = ['day of the week','day of the year','hour of the day', 'AirTemp','rh']
    labels   = ['Consumption']
    inputs   = features + labels
    data = data[inputs]

    num_features = len(features)
    num_labels = len(labels)
    num_inputs = num_features + num_labels

    #Normalizing data between 1 and -1
    scaler = MinMaxScaler(feature_range=(-1, 1))
    
    data_scaled = pd.DataFrame(scaler.fit_transform(data.values), columns=data.columns, index=data.index)
    x_test_scaled = np.asarray(data_scaled[features])
    y_test_scaled = np.asarray(data_scaled[labels])


    sess=tf.Session()   
    #First let's load meta graph and restore weights
    saver = tf.train.import_meta_graph('Model'+str(size)+'/my_test_model-1000.meta')
    saver.restore(sess,tf.train.latest_checkpoint('./Model'+str(size)+'/'))
    graph = tf.get_default_graph()
    x = graph.get_tensor_by_name("x:0")
    outputs = graph.get_tensor_by_name("op_to_restore:0")

    y_pred = sess.run(outputs, feed_dict={x: [x_test_scaled[:size]]})
    y_test = y_pred.reshape(-1, num_labels)
    scaler.fit(data[labels].iloc[:size])

    DF = pd.DataFrame(scaler.inverse_transform(y_test), index=data.iloc[:size].index, columns=labels)
    
    
    for i in range(int(len(data)/size) -2):
            scaler = MinMaxScaler(feature_range=(-1, 1))
            data_scaled = pd.DataFrame(scaler.fit_transform(data.iloc[size + size*i:2*size + size*i].values), columns=data.iloc[size + size*i:2*size + size*i].columns, index=data.iloc[size + size*i:2*size + size*i].index)
            x_test_scaled = np.asarray(data_scaled[features])
            y_test_scaled = np.asarray(data_scaled[labels])
        
            y_pred = sess.run(outputs, feed_dict={x: [x_test_scaled]})
            y_test = y_pred.reshape(-1, num_labels)
            scaler.fit(data[labels].iloc[size + size*i:2*size + size*i])
    
            result = pd.DataFrame(scaler.inverse_transform(y_test), index=data.iloc[size + size*i:2*size + size*i].iloc[0:sequence_length].index, columns=labels)
            DF = pd.concat([DF,result])
        
    return DF

In [50]:
result12 = predict(df12, 12)
result24 = predict(df24, 24)
result96 = predict(df96, 96)   

INFO:tensorflow:Restoring parameters from ./Model12/my_test_model-1000
INFO:tensorflow:Restoring parameters from ./Model24/my_test_model-1000
INFO:tensorflow:Restoring parameters from ./Model96/my_test_model-1000


In [54]:
mse12 = mean_squared_error(result12["Consumption"], df12["Consumption"].iloc[:len(result12["Consumption"])])
rmse12 = sqrt(mse12)
mae12 = mean_absolute_error(result12["Consumption"], df12["Consumption"].iloc[:len(result12["Consumption"])])

mse24 = mean_squared_error(result24["Consumption"], df24["Consumption"].iloc[:len(result24["Consumption"])])
rmse24 = sqrt(mse24)
mae24 = mean_absolute_error(result24["Consumption"], df24["Consumption"].iloc[:len(result24["Consumption"])])

mse96 = mean_squared_error(result96["Consumption"], df96["Consumption"].iloc[:len(result96["Consumption"])])
rmse96 = sqrt(mse96)
mae96 = mean_absolute_error(result96["Consumption"], df96["Consumption"].iloc[:len(result96["Consumption"])])


print('MSE : %.3f' % mse12)
print('MAE : %.3f' % mae12)
print('RMSE : %.3f' % rmse12)

print('\nMSE : %.3f' % mse24)
print('MAE : %.3f' % mae24)
print('RMSE : %.3f' % rmse24)

print('\nMSE : %.3f' % mse96)
print('MAE : %.3f' % mae96)
print('RMSE : %.3f' % rmse96)


MSE : 6.952
MAE : 1.717
RMSE : 2.637

MSE : 9.751
MAE : 2.111
RMSE : 3.123

MSE : 10.766
MAE : 2.420
RMSE : 3.281


In [56]:

p1 = figure(x_axis_type="datetime",plot_width=1200)
p1.title.text ='Model 3h'+'\t\t\tMSE : %.3f' % mse12 + '\t\t\tMAE : %.3f' % mae12 + '\t\t\tRMSE : %.3f' % rmse12
p1.line(x='Date and time (UTC)', y='Consumption', line_width=2, source=df12, legend='Real Consumption')
p1.line(x='Date and time (UTC)', y='Consumption', line_width=2, source=result12, color=Spectral3[2],legend='Prediction', alpha = 0.8)

p2 = figure(x_axis_type="datetime",plot_width=1200)
p2.title.text ='Model 6h'+'\t\t\tMSE : %.3f' % mse24 + '\t\t\tMAE : %.3f' % mae24 + '\t\t\tRMSE : %.3f' % rmse24
p2.line(x='Date and time (UTC)', y='Consumption', line_width=2, source=df24, legend='Real Consumption')
p2.line(x='Date and time (UTC)', y='Consumption', line_width=2, source=result24, color=Spectral3[2],legend='Prediction',alpha = 0.8)

p3 = figure(x_axis_type="datetime",plot_width=1200)
p3.title.text ='Model 24h'+'\t\t\tMSE : %.3f' % mse96 + '\t\t\tMAE : %.3f' % mae96 + '\t\t\tRMSE : %.3f' % rmse96
p3.line(x='Date and time (UTC)', y='Consumption', line_width=2, source=df96, legend='Real Consumption')
p3.line(x='Date and time (UTC)', y='Consumption', line_width=2, source=result96, color=Spectral3[2],legend='Prediction',alpha = 0.8)


show(column(p1,p2,p3))



# Test Time intervals

In [9]:
def predict_time(df12,df24,df96, time):

    tf.compat.v1.reset_default_graph()

    #Organizing the data and keeping only the columns that will be used in the model
    features = ['day of the week','day of the year','hour of the day', 'AirTemp','rh']
    labels   = ['Consumption']
    inputs   = features + labels
    
    df12 = df12[inputs]
    df24 = df24[inputs]
    df96 = df96[inputs]

    num_features = len(features)
    num_labels = len(labels)
    num_inputs = num_features + num_labels
    scaler = MinMaxScaler(feature_range=(-1, 1))

    
    DF = []
    
    sess=tf.Session()  
    saver = tf.train.import_meta_graph('Model12/my_test_model-1000.meta')
    saver.restore(sess,tf.train.latest_checkpoint('./Model12/'))
    graph = tf.get_default_graph()
    x = graph.get_tensor_by_name("x:0")
    outputs = graph.get_tensor_by_name("op_to_restore:0")
    
    for i in range(int((len(data)-96)/time)-3):
        
        data_scaled = pd.DataFrame(scaler.fit_transform(df12.iloc[84+i*time:96+i*time].values), columns=df12.iloc[84+i*time:96+i*time].columns, index=df12.iloc[84+i*time:96+i*time].index)
        x_test_scaled = np.asarray(data_scaled[features])
        y_test_scaled = np.asarray(data_scaled[labels])
    
        y_pred = sess.run(outputs, feed_dict={x: [x_test_scaled]})
        y_test = y_pred.reshape(-1, num_labels)
        scaler.fit(df12[labels].iloc[84+i*time:96+i*time])

        DF += [pd.DataFrame(scaler.inverse_transform(y_test), index=df12.iloc[84+i*time:96+i*time].index, columns=labels).iloc[:min(12,time)]]
    
    
    if time > 12 :
        tf.compat.v1.reset_default_graph()
        sess=tf.Session()  
        saver = tf.train.import_meta_graph('Model24/my_test_model-1000.meta')
        saver.restore(sess,tf.train.latest_checkpoint('./Model24/'))
        graph = tf.get_default_graph()
        x = graph.get_tensor_by_name("x:0")
        outputs = graph.get_tensor_by_name("op_to_restore:0")

        for i in range(int((len(data)-96)/time)-3):

            data_scaled = pd.DataFrame(scaler.fit_transform(df24.iloc[72+i*time:96+i*time].values), columns=df24.iloc[72+i*time:96+i*time].columns, index=df24.iloc[72+i*time:96+i*time].index)
            x_test_scaled = np.asarray(data_scaled[features])
            y_test_scaled = np.asarray(data_scaled[labels])

            y_pred = sess.run(outputs, feed_dict={x: [x_test_scaled]})
            y_test = y_pred.reshape(-1, num_labels)
            scaler.fit(df24[labels].iloc[72+i*time:96+i*time])

            DF += [pd.DataFrame(scaler.inverse_transform(y_test), index=df24.iloc[72+i*time:96+i*time].index, columns=labels).iloc[12: min(24,time)]]
    
    if time > 24 :
        tf.compat.v1.reset_default_graph()
        sess=tf.Session()  
        saver = tf.train.import_meta_graph('Model96/my_test_model-1000.meta')
        saver.restore(sess,tf.train.latest_checkpoint('./Model96/'))
        graph = tf.get_default_graph()
        x = graph.get_tensor_by_name("x:0")
        outputs = graph.get_tensor_by_name("op_to_restore:0")

        for i in range(int((len(data)-96)/time)-3):

            data_scaled = pd.DataFrame(scaler.fit_transform(df96.iloc[i*time:96+i*time].values), columns=df96.iloc[i*time:96+i*time].columns, index=df96.iloc[i*time:96+i*time].index)
            x_test_scaled = np.asarray(data_scaled[features])
            y_test_scaled = np.asarray(data_scaled[labels])

            y_pred = sess.run(outputs, feed_dict={x: [x_test_scaled]})
            y_test = y_pred.reshape(-1, num_labels)
            scaler.fit(df96[labels].iloc[i*time:96+i*time])

            DF += [pd.DataFrame(scaler.inverse_transform(y_test), index=df96.iloc[i*time:96+i*time].index, columns=labels).iloc[24: min(96,time)]]


    
    DF = pd.concat(DF)
    DF = DF.sort_index()
        
    return DF

In [10]:

result = []
for i in range(24):
    result += [predict_time(df12,df24,df96, 4 + i*4)]

INFO:tensorflow:Restoring parameters from ./Model12/my_test_model-1000
INFO:tensorflow:Restoring parameters from ./Model12/my_test_model-1000
INFO:tensorflow:Restoring parameters from ./Model12/my_test_model-1000
INFO:tensorflow:Restoring parameters from ./Model12/my_test_model-1000
INFO:tensorflow:Restoring parameters from ./Model24/my_test_model-1000
INFO:tensorflow:Restoring parameters from ./Model12/my_test_model-1000
INFO:tensorflow:Restoring parameters from ./Model24/my_test_model-1000
INFO:tensorflow:Restoring parameters from ./Model12/my_test_model-1000
INFO:tensorflow:Restoring parameters from ./Model24/my_test_model-1000
INFO:tensorflow:Restoring parameters from ./Model12/my_test_model-1000
INFO:tensorflow:Restoring parameters from ./Model24/my_test_model-1000
INFO:tensorflow:Restoring parameters from ./Model96/my_test_model-1000
INFO:tensorflow:Restoring parameters from ./Model12/my_test_model-1000
INFO:tensorflow:Restoring parameters from ./Model24/my_test_model-1000
INFO:t

                           Consumption   TGBT
Date and time (UTC)                          
2020-03-04 12:00:00+00:00    24.907337  25.44
2020-03-04 12:15:00+00:00    24.533831  21.03
2020-03-04 12:30:00+00:00    24.071806  24.13
2020-03-04 12:45:00+00:00    23.955013  24.33
2020-03-04 13:00:00+00:00    23.746357  23.66
...                                ...    ...
2021-05-19 16:45:00+00:00     9.839521   7.75
2021-05-19 17:00:00+00:00     9.825625   8.00
2021-05-19 17:15:00+00:00     9.783289   8.71
2021-05-19 17:30:00+00:00     9.653268   7.77
2021-05-19 17:45:00+00:00     9.668092   6.03

[42360 rows x 2 columns]


In [22]:
tab = []
mse = []
rmse = []
mae = []

for i in range(23):
    
    result[i]["TGBT"] = Consumption
    
    mse += [mean_squared_error(result[i]["Consumption"], result[i]["TGBT"])]
    rmse += [sqrt(mse[i])]
    mae += [mean_absolute_error(result[i]["Consumption"], result[i]["TGBT"])]
    
    p1 = figure(x_axis_type="datetime",plot_width=1500)
    p1.title.text ='Model 3h'+'\t\t\tMSE : %.3f' % mse[i] + '\t\t\tMAE : %.3f' % mae[i] + '\t\t\tRMSE : %.3f' % rmse[i]
    p1.line(x='Date and time (UTC)', y='TGBT', line_width=2, source=result[i], legend='Real Consumption')
    p1.line(x='Date and time (UTC)', y='Consumption', line_width=2, source=result[i], color=Spectral3[2],legend='Prediction', alpha = 0.8)
    tab += [Panel(child=p1, title=str(i+1))]
    



In [23]:
p1 = figure(plot_width=500, plot_height= 300)
p1.title.text ='MSE'
p1.line(range(25)[1:], mse, line_width=2)
tab1 = [Panel(child=p1, title="MSE")]
              
p2 = figure(plot_width=500, plot_height= 300)
p2.title.text ='RMSE'
p2.line(range(25)[1:], rmse, line_width=2)
tab2 = [Panel(child=p2, title="RMSE")]
              
p3 = figure(plot_width=500, plot_height= 300)
p3.title.text ='MAE'
p3.line(range(25)[1:], mae, line_width=2)
tab3 = [Panel(child=p3, title="MAE")]

show(column(Tabs(tabs=tab),row(p1,p2,p3)))


