In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.layers import LSTM, Flatten
from numpy import array

In [None]:
url = 'https://raw.githubusercontent.com/rantao-usc/problemset6/main/ps6_trainvalid.csv'
df = pd.read_csv(url)

# Using Multiple features

In [2]:
# split a multivariate sequence into samples
def split_sequences(sequences, n_steps):
  X, y = list(), list()
  for i in range(len(sequences)):
    # find the end of this pattern
    end_ix = i + n_steps
    # check if we are beyond the dataset
    if end_ix > len(sequences)-1:
      break
    # gather input and output parts of the pattern
    seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix, :]
    X.append(seq_x)
    y.append(seq_y[0])
  return array(X), array(y)

In [5]:
from numpy import hstack
# define input sequence
in_seq1 = array([10, 20, 30, 40, 50, 60, 70, 80, 90])
in_seq2 = array([15, 25, 35, 45, 55, 65, 75, 85, 95])
out_seq = array([in_seq1[i]+in_seq2[i] for i in range(len(in_seq1))])
# convert to [rows, columns] structure
in_seq1 = in_seq1.reshape((len(in_seq1), 1))
in_seq2 = in_seq2.reshape((len(in_seq2), 1))
out_seq = out_seq.reshape((len(out_seq), 1))
# horizontally stack columns
dataset = hstack((in_seq1, in_seq2, out_seq))
print(dataset)
# choose a number of time steps
n_steps = 3
# convert into input/output
X, y = split_sequences(dataset, n_steps)
print(X.shape, y.shape)
# summarize the data
for i in range(len(X)):
	print(X[i], y[i])

[[ 10  15  25]
 [ 20  25  45]
 [ 30  35  65]
 [ 40  45  85]
 [ 50  55 105]
 [ 60  65 125]
 [ 70  75 145]
 [ 80  85 165]
 [ 90  95 185]]
(6, 3, 3) (6,)
[[10 15 25]
 [20 25 45]
 [30 35 65]] 40
[[20 25 45]
 [30 35 65]
 [40 45 85]] 50
[[ 30  35  65]
 [ 40  45  85]
 [ 50  55 105]] 60
[[ 40  45  85]
 [ 50  55 105]
 [ 60  65 125]] 70
[[ 50  55 105]
 [ 60  65 125]
 [ 70  75 145]] 80
[[ 60  65 125]
 [ 70  75 145]
 [ 80  85 165]] 90


In [None]:
cols = list(df)[1:4]
cols

['humidity', 'pressure']

In [None]:
df = df[cols]

In [None]:
df.shape

(45013, 3)

In [None]:
df = df.dropna()

In [None]:
df.shape

(44671, 3)

In [None]:
n_steps = 24 * 5

In [None]:
cols_1 = ['temperature', 'humidity']
cols_2 = ['temperature', 'pressure']

In [None]:
df_1 = df[cols_1].values
df_2 = df[cols_2].values

## Using temperature and humidity

In [None]:
X, y = split_sequences(df_1, n_steps)
print(X.shape, y.shape)

(44551, 120, 2) (44551,)


In [None]:
n_features = X.shape[2]
n_features

2

In [None]:
# Split training and validation set
n = len(X)
train_X = X[0:int(n*0.7),:]
val_X = X[int(n*0.7):,:]

train_y = y[0:int(n*0.7)]
val_y = y[int(n*0.7):]

print("Shape of training X: {}".format(train_X.shape))
print("Shape of validation X: {}".format(val_X.shape))

print("Shape of training y: {}".format(train_y.shape))
print("Shape of validation y: {}".format(val_y.shape))

Shape of training X: (31185, 120, 2)
Shape of validation X: (13366, 120, 2)
Shape of training y: (31185,)
Shape of validation y: (13366,)


In [None]:
# Normalization
train_X_mean = np.mean(np.mean(train_X, axis=0), axis=0)
train_X_std = np.std(np.std(train_X, axis=0), axis=0)

train_y_mean = train_y.mean()
train_y_std = train_y.std()

normalized_train_X = (train_X - train_X_mean)/train_X_std
normalized_val_X = (val_X - train_X_mean)/train_X_std

normalized_train_y = (train_y - train_y_mean)/train_y_std
normalized_val_y = (val_y - train_y_mean)/train_y_std

In [None]:
# define model
RNN_model_2 = Sequential()
RNN_model_2.add(LSTM(10, activation='relu', input_shape=(n_steps, n_features)))
RNN_model_2.add(Dense(1))
optimizer = optimizers.Adam(clipvalue=0.5)
RNN_model_2.compile(optimizer=optimizer, loss='mse')
print(RNN_model_2.summary())

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 10)                520       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 11        
Total params: 531
Trainable params: 531
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
# fit model
RNN_model_2.fit(normalized_train_X, normalized_train_y, epochs=100, validation_split=0.3, verbose=2)

Epoch 1/100
683/683 - 25s - loss: 3901247.0000 - val_loss: 2291.1516
Epoch 2/100
683/683 - 26s - loss: 531.4390 - val_loss: 19.0430
Epoch 3/100
683/683 - 26s - loss: 3.4406 - val_loss: 0.3437
Epoch 4/100
683/683 - 26s - loss: 0.2204 - val_loss: 0.1866
Epoch 5/100
683/683 - 26s - loss: 0.1903 - val_loss: 0.1354
Epoch 6/100
683/683 - 26s - loss: 0.1633 - val_loss: 0.3714
Epoch 7/100
683/683 - 26s - loss: 0.2896 - val_loss: 0.1366
Epoch 8/100
683/683 - 27s - loss: 0.1444 - val_loss: 0.0961
Epoch 9/100
683/683 - 26s - loss: 0.1283 - val_loss: 0.1036
Epoch 10/100
683/683 - 26s - loss: 0.1282 - val_loss: 0.0999
Epoch 11/100
683/683 - 26s - loss: 0.1257 - val_loss: 0.1105
Epoch 12/100
683/683 - 26s - loss: 0.1224 - val_loss: 0.1083
Epoch 13/100
683/683 - 25s - loss: 0.1171 - val_loss: 0.0919
Epoch 14/100
683/683 - 25s - loss: 0.1225 - val_loss: 0.0986
Epoch 15/100
683/683 - 25s - loss: 0.1223 - val_loss: 0.1050
Epoch 16/100
683/683 - 25s - loss: 0.1173 - val_loss: 0.1179
Epoch 17/100
683/683 

<tensorflow.python.keras.callbacks.History at 0x7f8a53021e50>

In [None]:
y_true = val_y
y_true.shape

(13366,)

In [None]:
normalized_y_pred = RNN_model_2.predict(normalized_val_X)
y_pred = normalized_y_pred * train_y_std + train_y_mean
y_pred.shape

(13366, 1)

In [None]:
# Evaluation
valScore = mean_squared_error(y_true, y_pred)
print('Mean Squared Error is: %.2f' % (valScore))

Mean Squared Error is: 1.85


## Using temperature and pressure

In [None]:
X, y = split_sequences(df_2, n_steps)
print(X.shape, y.shape)

(44551, 120, 2) (44551,)


In [None]:
n_features = X.shape[2]
n_features

2

In [None]:
# Split training and validation set
n = len(X)
train_X = X[0:int(n*0.7),:]
val_X = X[int(n*0.7):,:]

train_y = y[0:int(n*0.7)]
val_y = y[int(n*0.7):]

print("Shape of training X: {}".format(train_X.shape))
print("Shape of validation X: {}".format(val_X.shape))

print("Shape of training y: {}".format(train_y.shape))
print("Shape of validation y: {}".format(val_y.shape))

Shape of training X: (31185, 120, 2)
Shape of validation X: (13366, 120, 2)
Shape of training y: (31185,)
Shape of validation y: (13366,)


In [None]:
# Normalization
train_X_mean = np.mean(np.mean(train_X, axis=0), axis=0)
train_X_std = np.std(np.std(train_X, axis=0), axis=0)

train_y_mean = train_y.mean()
train_y_std = train_y.std()

normalized_train_X = (train_X - train_X_mean)/train_X_std
normalized_val_X = (val_X - train_X_mean)/train_X_std

normalized_train_y = (train_y - train_y_mean)/train_y_std
normalized_val_y = (val_y - train_y_mean)/train_y_std

In [None]:
# define model
from keras import optimizers

RNN_model_3 = Sequential()
RNN_model_3.add(LSTM(10, activation='relu', input_shape=(n_steps, n_features)))
RNN_model_3.add(Dense(1))
# To deal with exploding gradients 
optimizer = optimizers.Adam(clipvalue=0.5)
RNN_model_3.compile(optimizer=optimizer, loss='mse')
print(RNN_model_3.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 10)                520       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 11        
Total params: 531
Trainable params: 531
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
# fit model
RNN_model_3.fit(normalized_train_X, normalized_train_y, epochs=100, validation_split=0.3, verbose=2)

Epoch 1/100
683/683 - 27s - loss: 6710080.5000 - val_loss: 70.4580
Epoch 2/100
683/683 - 26s - loss: 120.6870 - val_loss: 0.7246
Epoch 3/100
683/683 - 26s - loss: 1.1369 - val_loss: 0.7658
Epoch 4/100
683/683 - 25s - loss: 0.5751 - val_loss: 0.6275
Epoch 5/100
683/683 - 25s - loss: 0.4856 - val_loss: 0.3804
Epoch 6/100
683/683 - 25s - loss: 0.7754 - val_loss: 0.4584
Epoch 7/100
683/683 - 25s - loss: 0.4769 - val_loss: 0.3711
Epoch 8/100
683/683 - 25s - loss: 0.4673 - val_loss: 0.4875
Epoch 9/100
683/683 - 25s - loss: 0.6982 - val_loss: 0.3305
Epoch 10/100
683/683 - 25s - loss: 0.7309 - val_loss: 0.3809
Epoch 11/100
683/683 - 25s - loss: 1.1047 - val_loss: 5.7696
Epoch 12/100
683/683 - 25s - loss: 0.5251 - val_loss: 0.3425
Epoch 13/100
683/683 - 26s - loss: 0.4135 - val_loss: 0.4063
Epoch 14/100
683/683 - 26s - loss: 0.4131 - val_loss: 0.3557
Epoch 15/100
683/683 - 26s - loss: 0.3895 - val_loss: 0.3123
Epoch 16/100
683/683 - 25s - loss: 0.3893 - val_loss: 0.3366
Epoch 17/100
683/683 - 2

<tensorflow.python.keras.callbacks.History at 0x7f8a550e8910>

In [None]:
y_true = val_y
y_true.shape

(13366,)

In [None]:
normalized_y_pred = RNN_model_3.predict(normalized_val_X)
y_pred = normalized_y_pred * train_y.std() + train_y.mean()
y_pred.shape

(13366, 1)

In [None]:
# Evaluation
valScore = mean_squared_error(y_true, y_pred)
print('Mean Squared Error is: %.2f' % (valScore))

Mean Squared Error is: 2.45
