In [46]:
import pandas as pd
import numpy as np
import matplotlib as plt
import os, math
import preprocess

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import RepeatVector
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers import Masking

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [283]:
np.concatenate((X[-1:], yhat[-1:]), axis=1)[:,1:]

array([[[ 3.42806266e-01,  0.00000000e+00,  1.88052969e-01,
          2.01579934e-01,  1.23954572e-01,  1.64325819e-01,
          9.74287052e-02,  0.00000000e+00,  1.55124093e-01,
          1.61463376e-01,  1.62402225e-01,  1.06829753e-01],
        [ 4.73946228e-01,  0.00000000e+00,  1.96221546e-01,
          2.06670506e-01,  1.16606722e-01,  1.77341314e-01,
          9.58774938e-02,  0.00000000e+00,  1.75192875e-01,
          1.72326599e-01,  1.69578104e-01,  1.06465051e-01],
        [ 4.84600538e-01,  0.00000000e+00,  1.25786836e-01,
          2.14065119e-01,  1.18889261e-01,  1.76667223e-01,
          9.29644630e-02,  0.00000000e+00,  1.53715355e-01,
          2.01260053e-01,  1.77716900e-01,  1.10877617e-01],
        [ 4.65936756e-01,  0.00000000e+00,  1.34263661e-01,
          2.15572136e-01,  1.35856806e-01,  1.68116426e-01,
          9.38482928e-02,  0.00000000e+00,  1.59102516e-01,
          2.00450223e-01,  1.90728548e-01,  1.20359871e-01],
        [ 4.76100266e-01,  4.0438696

In [273]:
yhat[-1:]

array([[[ 4.7610027e-01,  4.0438697e-03,  1.8031746e-01,  1.9410238e-01,
          1.4199179e-01,  1.7130542e-01,  7.9064801e-02, -2.8566271e-04,
          1.7097814e-01,  1.7647074e-01,  1.7368537e-01,  1.2511222e-01]]],
      dtype=float32)

In [262]:
X[-1]

array([[0.17831412, 0.        , 0.35450699, 0.22637166, 0.11449053,
        0.15388203, 0.1000351 , 0.        , 0.13888231, 0.14485467,
        0.12775868, 0.09471244],
       [0.34280627, 0.        , 0.18805297, 0.20157993, 0.12395457,
        0.16432582, 0.09742871, 0.        , 0.15512409, 0.16146338,
        0.16240223, 0.10682975],
       [0.47394623, 0.        , 0.19622155, 0.20667051, 0.11660672,
        0.17734131, 0.09587749, 0.        , 0.17519288, 0.1723266 ,
        0.1695781 , 0.10646505],
       [0.48460054, 0.        , 0.12578684, 0.21406512, 0.11888926,
        0.17666722, 0.09296446, 0.        , 0.15371536, 0.20126005,
        0.1777169 , 0.11087762],
       [0.46593676, 0.        , 0.13426366, 0.21557214, 0.13585681,
        0.16811643, 0.09384829, 0.        , 0.15910252, 0.20045022,
        0.19072855, 0.12035987]])

In [290]:
X[-1:].shape

(1, 30, 12)

array([[[[0.47610027, 0.00404387, 0.18031746, ..., 0.17647074,
          0.17368537, 0.12511222]]],


       [[[0.4046338 , 0.00490309, 0.19677058, ..., 0.12904078,
          0.16026232, 0.13126871]]],


       [[[0.38509163, 0.01332764, 0.18496355, ..., 0.08822899,
          0.14247677, 0.13820067]]],


       ...,


       [[[0.26651266, 0.27709633, 0.12902212, ..., 0.18290485,
          0.23904714, 0.07319686]]],


       [[[0.2545814 , 0.29020745, 0.15405287, ..., 0.20887057,
          0.26352262, 0.07848839]]],


       [[[0.29586592, 0.31415254, 0.1614595 , ..., 0.22121522,
          0.28409946, 0.0951845 ]]]], dtype=float32)

In [322]:
data = pd.read_csv("Data/Electrical_Averaged.csv")
data["Timestamp"] = pd.to_datetime(data['Timestamp'])
data.set_index('Timestamp', inplace=True)
building_data = preprocess.remove_outliers(data, 15)

# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(building_data)

n_steps_in = 30
n_steps_out = 1
mask_value = 0
tf.random.set_seed(1)

# set check point
checkpoint_path = f"training_1/checkpoint__{n_steps_in}in_{n_steps_out}_out"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

def generate_sequence(data, n_steps_in, n_steps_out):
    X, y = list(), list()
    for index in range(n_steps_in, len(data)-n_steps_out+1):
        X.append(data[index-n_steps_in : index])
        y.append(data[index : index+n_steps_out])
    return np.array(X), np.array(y)

scaled_data[np.isnan(scaled_data)] = mask_value
X, y = generate_sequence(scaled_data, n_steps_in, n_steps_out)

n_features = X.shape[2]
print(scaled_data.shape, X.shape, y.shape)

# define model
model = Sequential()
model.add(Masking(mask_value= mask_value, input_shape=(n_steps_in, n_features)))
model.add(LSTM(200, activation='relu'))
model.add(RepeatVector(n_steps_out))
model.add(LSTM(200, activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(n_features)))
model.compile(optimizer='adam', loss='mse')

(3661, 12) (3631, 30, 12) (3631, 1, 12)


In [364]:
# Restore the weights
model.load_weights(f'./Weight/weight_{n_steps_in}in_{n_steps_out}_out')

ahead = 365

def predict_ahead(X, model, n_ahead):
    yhat = list()
    X_copy = np.copy(X)
    for _ in range(n_ahead):
        prediction = model.predict(X_copy, verbose=0)
        yhat.append(prediction[0, 0])
        X_copy = np.concatenate((X_copy, prediction), axis=1)[:,1:]
    return np.array(yhat)
yhat = predict_ahead(X[-1:], model, ahead)    
yhat = scaler.inverse_transform(yhat)
time_period = pd.date_range(building_data.index[-1], periods=ahead+1, freq="d")[1:]
yhat = pd.DataFrame(yhat, columns = building_data.columns, index = time_period)
yhat.index.name = "Timestamp"

In [369]:
yhat.index.name = "Timestamp"
yhat.to_csv("Data/Electrical_Predict.csv")

In [368]:
yhat

Unnamed: 0_level_0,Adam Joseph Lewis Center - Gross Electricity Consumption (kWh),Kohl Building - Total Electricity Use (kWh),Science Center - Electricity Use (kWh),Asia House - Residential Electricity Use (kWh),East - Electricity Use (kWh),Kahn - Total Electricity Use (kWh),Noah - Electricity Use (kWh),North - Electricity Use (kWh),Fairchild - Residential Electricity Use (kWh),Harvey - Electricity Use (kWh),Talcott - Residential Electricity Use (kWh),Lord-Saunders Electricity use (kWh)
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2023-01-10,4.844020,9.338751,85.974747,1.759958,5.365248,10.951530,1.785280,2.795058,1.059328,1.117871,0.905499,2.222209
2023-01-11,4.554914,9.348683,85.415588,1.678147,4.979685,11.297711,1.688166,3.326328,1.125216,1.103990,0.875803,2.270878
2023-01-12,4.507038,9.304446,87.322571,1.685512,4.683290,11.628461,1.696692,3.874186,1.204186,1.093908,0.814620,2.582888
2023-01-13,5.157771,9.689322,88.750023,1.669623,4.125453,12.312021,1.750597,4.394192,1.239826,1.029317,0.794851,3.003645
2023-01-14,5.074459,9.859255,89.917305,1.662609,3.689127,12.257896,1.793534,4.912844,1.214975,0.956194,0.789003,3.195107
...,...,...,...,...,...,...,...,...,...,...,...,...
2024-01-05,2.519449,9.176046,72.005424,2.557931,9.425213,35.505325,4.370117,9.346543,0.922248,2.184968,0.942454,4.409547
2024-01-06,2.399322,9.174148,72.763832,2.580471,9.274823,28.780018,4.274760,9.035376,0.792589,2.045092,0.976758,4.303084
2024-01-07,2.012581,9.180553,72.144310,2.537083,10.066600,19.062843,3.929247,8.527460,0.683538,1.804317,0.988012,4.116850
2024-01-08,1.566469,8.579676,71.852928,2.487388,10.657007,10.973708,3.571054,8.363589,0.619010,1.745108,1.005041,4.283971


In [358]:
building_data.index[-1]

Timestamp('2023-01-09 00:00:00')

In [349]:
yhat.reset_index(drop=True, inplace=True)

In [344]:
yhat = pd.DataFrame(yhat, columns = building_data.columns)

In [351]:
pd.concat( [building_data, yhat], axis=0, ignore_index=True) 

Unnamed: 0,Adam Joseph Lewis Center - Gross Electricity Consumption (kWh),Kohl Building - Total Electricity Use (kWh),Science Center - Electricity Use (kWh),Asia House - Residential Electricity Use (kWh),East - Electricity Use (kWh),Kahn - Total Electricity Use (kWh),Noah - Electricity Use (kWh),North - Electricity Use (kWh),Fairchild - Residential Electricity Use (kWh),Harvey - Electricity Use (kWh),Talcott - Residential Electricity Use (kWh),Lord-Saunders Electricity use (kWh)
0,4.192083,,,0.692230,5.139488,31.907547,1.650107,5.054331,0.109023,0.365547,1.084768,0.297761
1,5.782357,,,1.029615,5.830313,36.063542,1.745208,5.880209,0.166367,0.406081,1.446849,0.362760
2,5.467969,,,1.374802,6.049635,38.786458,1.913125,6.676406,0.278346,0.405599,1.481589,0.396133
3,4.394909,,,1.287357,6.200052,36.956250,1.978438,6.795990,0.298997,0.528997,1.681315,1.673138
4,4.390560,,,1.528151,6.384844,38.659375,2.145729,6.791406,0.451419,0.619505,1.756328,2.675169
...,...,...,...,...,...,...,...,...,...,...,...,...
4021,2.519449,9.176046,72.005424,2.557931,9.425213,35.505325,4.370117,9.346543,0.922248,2.184968,0.942454,4.409547
4022,2.399322,9.174148,72.763832,2.580471,9.274823,28.780018,4.274760,9.035376,0.792589,2.045092,0.976758,4.303084
4023,2.012581,9.180553,72.144310,2.537083,10.066600,19.062843,3.929247,8.527460,0.683538,1.804317,0.988012,4.116850
4024,1.566469,8.579676,71.852928,2.487388,10.657007,10.973708,3.571054,8.363589,0.619010,1.745108,1.005041,4.283971


In [347]:
yhat

Unnamed: 0,Adam Joseph Lewis Center - Gross Electricity Consumption (kWh),Kohl Building - Total Electricity Use (kWh),Science Center - Electricity Use (kWh),Asia House - Residential Electricity Use (kWh),East - Electricity Use (kWh),Kahn - Total Electricity Use (kWh),Noah - Electricity Use (kWh),North - Electricity Use (kWh),Fairchild - Residential Electricity Use (kWh),Harvey - Electricity Use (kWh),Talcott - Residential Electricity Use (kWh),Lord-Saunders Electricity use (kWh)
0,4.844020,9.338751,85.974747,1.759958,5.365248,10.951530,1.785280,2.795058,1.059328,1.117871,0.905499,2.222209
1,4.554914,9.348683,85.415588,1.678147,4.979685,11.297711,1.688166,3.326328,1.125216,1.103990,0.875803,2.270878
2,4.507038,9.304446,87.322571,1.685512,4.683290,11.628461,1.696692,3.874186,1.204186,1.093908,0.814620,2.582888
3,5.157771,9.689322,88.750023,1.669623,4.125453,12.312021,1.750597,4.394192,1.239826,1.029317,0.794851,3.003645
4,5.074459,9.859255,89.917305,1.662609,3.689127,12.257896,1.793534,4.912844,1.214975,0.956194,0.789003,3.195107
...,...,...,...,...,...,...,...,...,...,...,...,...
360,2.519449,9.176046,72.005424,2.557931,9.425213,35.505325,4.370117,9.346543,0.922248,2.184968,0.942454,4.409547
361,2.399322,9.174148,72.763832,2.580471,9.274823,28.780018,4.274760,9.035376,0.792589,2.045092,0.976758,4.303084
362,2.012581,9.180553,72.144310,2.537083,10.066600,19.062843,3.929247,8.527460,0.683538,1.804317,0.988012,4.116850
363,1.566469,8.579676,71.852928,2.487388,10.657007,10.973708,3.571054,8.363589,0.619010,1.745108,1.005041,4.283971


In [339]:
yhat

array([[ 4.84402   ,  9.338751  , 85.97475   , ...,  1.1178707 ,
         0.90549886,  2.2222087 ],
       [ 4.5549135 ,  9.348683  , 85.41559   , ...,  1.1039898 ,
         0.8758035 ,  2.2708778 ],
       [ 4.507038  ,  9.304446  , 87.32257   , ...,  1.0939077 ,
         0.8146197 ,  2.5828884 ],
       ...,
       [ 2.0125813 ,  9.180553  , 72.14431   , ...,  1.804317  ,
         0.9880116 ,  4.11685   ],
       [ 1.5664688 ,  8.579676  , 71.85293   , ...,  1.7451077 ,
         1.0050409 ,  4.2839713 ],
       [ 1.3064969 ,  8.483692  , 71.666695  , ...,  1.8959812 ,
         1.0932703 ,  4.683419  ]], dtype=float32)

In [None]:
df.append(data_to_append, ignore_index = True)

In [None]:
# fit model
model.fit(X, y, epochs=200, verbose=0, callbacks=[cp_callback])
# Save the weights
model.save_weights(f'./Weight/weight_{n_steps_in}in_{n_steps_out}_out')

In [None]:
# fit model
model.fit(X, y, epochs=200, verbose=0, callbacks=[cp_callback])
# Save the weights
model.save_weights(f'./Weight/weight_{n_steps_in}in_{n_steps_out}_out')

In [225]:
y[-5:]

array([[[0.17831412, 1.        , 0.35450699, 0.22637166, 0.11449053,
         0.15388203, 0.1000351 , 1.        , 0.13888231, 0.14485467,
         0.12775868, 0.09471244],
        [0.34280627, 1.        , 0.18805297, 0.20157993, 0.12395457,
         0.16432582, 0.09742871, 1.        , 0.15512409, 0.16146338,
         0.16240223, 0.10682975]],

       [[0.34280627, 1.        , 0.18805297, 0.20157993, 0.12395457,
         0.16432582, 0.09742871, 1.        , 0.15512409, 0.16146338,
         0.16240223, 0.10682975],
        [0.47394623, 1.        , 0.19622155, 0.20667051, 0.11660672,
         0.17734131, 0.09587749, 1.        , 0.17519288, 0.1723266 ,
         0.1695781 , 0.10646505]],

       [[0.47394623, 1.        , 0.19622155, 0.20667051, 0.11660672,
         0.17734131, 0.09587749, 1.        , 0.17519288, 0.1723266 ,
         0.1695781 , 0.10646505],
        [0.48460054, 1.        , 0.12578684, 0.21406512, 0.11888926,
         0.17666722, 0.09296446, 1.        , 0.15371536, 0.20126005

In [None]:
# # split a multivariate sequence into samples
# def split_sequences(sequences, n_steps_in, n_steps_out):
#     X, y = list(), list()
#     for i in range(len(sequences)):
#         # find the end of this pattern
#         end_ix = i + n_steps_in
#         out_end_ix = end_ix + n_steps_out
#         # check if we are beyond the dataset
#         if out_end_ix > len(sequences):
#             break
#         # gather input and output parts of the pattern
#         seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix:out_end_ix, :]
#         X.append(seq_x)
#         y.append(seq_y)
#     return array(X), array(y)
 
# # define input sequence
# in_seq1 = array([10, 20, 30, 40, 50, 60, 70, 80, 90])
# in_seq2 = array([15, 25, 35, 45, 55, 65, 75, 85, 95])
# out_seq = array([in_seq1[i]+in_seq2[i] for i in range(len(in_seq1))])
# # convert to [rows, columns] structure
# in_seq1 = in_seq1.reshape((len(in_seq1), 1))
# in_seq2 = in_seq2.reshape((len(in_seq2), 1))
# out_seq = out_seq.reshape((len(out_seq), 1))
# # horizontally stack columns
# dataset = hstack((in_seq1, in_seq2, out_seq))
# # choose a number of time steps
# n_steps_in, n_steps_out = 3, 2
# # covert into input/output
# X, y = split_sequences(dataset, n_steps_in, n_steps_out)
# # the dataset knows the number of features, e.g. 2
# n_features = X.shape[2]
# print(X.shape, y.shape)
# # summarize the data
# for i in range(len(X)):
#     print(X[i], y[i])