# Multivariate Modeling Using RNN

#### (Meant to be run within Google Colab)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, SimpleRNN
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.utils import timeseries_dataset_from_array
from tensorflow.keras.callbacks import EarlyStopping

np.random.seed(42)

In [2]:
#mount drive
# from google.colab import drive
# drive.mount('/content/drive')

In [4]:
#import campus5

df = pd.read_csv('campus5.csv')

In [5]:
df.index = pd.to_datetime(df['Timestamp'])
df.drop(columns = ['Timestamp', 'CampusKey', 'time'], inplace = True)
df.shape

(79319, 7)

In [6]:
#ww stands for With Weather info. Which starts at index #33311
#Interpolating any missing weather interval data
ww = df.iloc[33311:]
ww = ww.interpolate(method = 'linear', limit_direction = 'forward')
ww.isnull().sum()

SolarGeneration        0
ApparentTemperature    0
AirTemperature         0
DewPointTemperature    0
RelativeHumidity       0
WindSpeed              0
WindDirection          0
dtype: int64

In [7]:
#Setting X and Y
features = [i for i in ww.columns if i != 'SolarGeneration']
X = ww[features]
y = ww['SolarGeneration']
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = False, test_size = 0.2)

In [8]:
X_train.iloc[-1]

ApparentTemperature     14.100000
AirTemperature          17.200000
DewPointTemperature     12.200000
RelativeHumidity        72.500000
WindSpeed               18.318182
WindDirection          220.363636
Name: 2022-01-18 03:15:00, dtype: float64

In [9]:
X_test.iloc[0]

ApparentTemperature     13.900000
AirTemperature          17.000000
DewPointTemperature     12.100000
RelativeHumidity        73.000000
WindSpeed               18.318182
WindDirection          220.363636
Name: 2022-01-18 03:30:00, dtype: float64

In [10]:
#Standard Scaling
ss = StandardScaler()
X_train_sc = ss.fit_transform(X_train)
X_test_sc = ss.transform(X_test)

In [11]:
def print_last(res):
  '''
  Helper function to just print out the last training and testing loss 
  function values as well as the last metrics of a fitted model
  '''
  print(f"Mean Absolute Error - Training: {np.round(res.history['loss'][-1], 5)}")
  print(f"Mean Absolute Error - Testing: {np.round(res.history['val_loss'][-1], 5)}")
  print(f"Root Mean Squared Error - Training: {np.round(np.sqrt(res.history['mse'][-1]), 5)}")
  print(f"Root Mean Squared Error - Testing: {np.round(np.sqrt(res.history['val_mse'][-1]), 5)}")

First investigating sequences 4 intervals long (1 hour)

In [12]:
#timeseries generator, input shape, and early stopping for modeling

train_sequences = TimeseriesGenerator(X_train_sc, y_train, length = 4, batch_size = 64)
test_sequences = TimeseriesGenerator(X_test_sc, y_test, length = 4, batch_size = 64)
input_shape = train_sequences[0][0][0].shape
early_stop = EarlyStopping(monitor = 'val_loss', patience = 5)

In [13]:
model = Sequential()

model.add(LSTM(96, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

  super().__init__(**kwargs)
  self._warn_if_super_not_called()


Mean Absolute Error - Training: 0.95058
Mean Absolute Error - Testing: 0.75021
Root Mean Squared Error - Training: 1.76877
Root Mean Squared Error - Testing: 1.2032


In [14]:
model = Sequential()

model.add(LSTM(8, input_shape = input_shape, return_sequences = True))
model.add(LSTM(32, return_sequences = False))
model.add(Dense(8, activation = 'relu'))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 1.05637
Mean Absolute Error - Testing: 1.09387
Root Mean Squared Error - Training: 1.90334
Root Mean Squared Error - Testing: 1.65777


In [15]:
model = Sequential()

model.add(LSTM(8, input_shape = input_shape, return_sequences = False))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 1.04678
Mean Absolute Error - Testing: 1.43587
Root Mean Squared Error - Training: 1.89877
Root Mean Squared Error - Testing: 2.19031


In [16]:
model = Sequential()

model.add(LSTM(32, input_shape = input_shape, return_sequences = True, ))
model.add(LSTM(32, input_shape = input_shape, return_sequences = True))
model.add(LSTM(64, input_shape = input_shape, return_sequences = False))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 1.05795
Mean Absolute Error - Testing: 1.03635
Root Mean Squared Error - Training: 1.91572
Root Mean Squared Error - Testing: 1.59709


In [None]:
model = Sequential()

model.add(LSTM(32, input_shape = input_shape, return_sequences = True))
model.add(LSTM(64, input_shape = input_shape, return_sequences = True))
model.add(LSTM(96, input_shape = input_shape, return_sequences = False))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))

model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Investigating sequences of 16 intervals in length (4 hours)

In [None]:
train_sequences = TimeseriesGenerator(X_train_sc, y_train, length = 16, batch_size = 96)
test_sequences = TimeseriesGenerator(X_test_sc, y_test, length = 16, batch_size = 96)
input_shape = train_sequences[0][0][0].shape

In [None]:
model = Sequential()

model.add(LSTM(96, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

In [None]:
model = Sequential()

model.add(LSTM(8, input_shape = input_shape, return_sequences = True))
model.add(LSTM(32, return_sequences = False))
model.add(Dense(8, activation = 'relu'))

model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

In [None]:
model = Sequential()

model.add(LSTM(8, input_shape = input_shape, return_sequences = False))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))

model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

In [None]:
model = Sequential()

model.add(LSTM(96, input_shape = input_shape, return_sequences = False))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))

model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

In [None]:
model = Sequential()

model.add(LSTM(32, input_shape = input_shape, return_sequences = True, ))
model.add(LSTM(32, input_shape = input_shape, return_sequences = True))
model.add(LSTM(64, input_shape = input_shape, return_sequences = False))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))

model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Investigating sequences of 96 intervals in length (1 Day)

In [None]:
train_sequences = TimeseriesGenerator(X_train_sc, y_train, length = 96, batch_size = 672)
test_sequences = TimeseriesGenerator(X_test_sc, y_test, length = 96, batch_size = 672)
input_shape = train_sequences[0][0][0].shape

In [None]:
model = Sequential()

model.add(LSTM(96, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

In [None]:
model = Sequential()

model.add(LSTM(192, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

In [None]:
model = Sequential()

model.add(LSTM(8, input_shape = input_shape, return_sequences = True))
model.add(LSTM(32, return_sequences = False))
model.add(Dense(8, activation = 'relu'))

model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

In [None]:
model = Sequential()

model.add(LSTM(8, input_shape = input_shape, return_sequences = False))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))

model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

In [None]:
model = Sequential()

model.add(LSTM(32, input_shape = input_shape, return_sequences = True, ))
model.add(LSTM(32, input_shape = input_shape, return_sequences = True))
model.add(LSTM(64, input_shape = input_shape, return_sequences = False))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))

model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

It seems Univariate Modeling gives much better scores. Probably due to the weather data variability compounding to the variability of the Solar Generation

---
# Hourly Modeling
---

In [None]:
hourly = ww.resample('H').mean().ffill()

In [None]:
features = [i for i in hourly.columns if i != 'SolarGeneration']
X = hourly[features]
y = hourly['SolarGeneration']
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = False, test_size = 0.2)

In [None]:
ss = StandardScaler()
X_train_sc = ss.fit_transform(X_train)
X_test_sc = ss.transform(X_test)

First investigating 12-hour sequences

In [None]:
train_sequences = TimeseriesGenerator(X_train_sc, y_train, length = 12, batch_size = 48)
test_sequences = TimeseriesGenerator(X_test_sc, y_test, length = 12, batch_size = 48)
input_shape = train_sequences[0][0][0].shape

In [None]:
model = Sequential()

model.add(LSTM(24, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

In [None]:
model = Sequential()

model.add(LSTM(48, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

In [None]:
model = Sequential()

model.add(LSTM(144, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Investigating 24-hour sequences

In [None]:
train_sequences = TimeseriesGenerator(X_train_sc, y_train, length = 24, batch_size = 72)
test_sequences = TimeseriesGenerator(X_test_sc, y_test, length = 24, batch_size = 72)
input_shape = train_sequences[0][0][0].shape

In [None]:
model = Sequential()

model.add(LSTM(24, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

In [None]:
model = Sequential()

model.add(LSTM(48, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

In [None]:
model = Sequential()

model.add(LSTM(144, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

---
# Daily Investigation
---

In [None]:
daily = df.resample('D').max()

In [None]:
features = [i for i in ww.columns if i != 'SolarGeneration']
X = ww[features]
y = ww['SolarGeneration']
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = False, test_size = 0.2)

In [None]:
ss = StandardScaler()
X_train_sc = ss.fit_transform(X_train)
X_test_sc = ss.transform(X_test)

First looking at 4-day sequences

In [None]:
train_sequences = TimeseriesGenerator(X_train_sc, y_train, length = 4, batch_size = 12)
test_sequences = TimeseriesGenerator(X_test_sc, y_test, length = 4, batch_size = 12)
input_shape = train_sequences[0][0][0].shape

In [None]:
model = Sequential()

model.add(LSTM(10, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

In [None]:
model = Sequential()

model.add(LSTM(30, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

In [None]:
model = Sequential()

model.add(LSTM(10, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

In [None]:
 model = Sequential()

model.add(SimpleRNN(7, input_shape = input_shape, return_sequences = True))
model.add(SimpleRNN(7))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)