# Exercise - RNN Classification

In this notebook, we will perform a classification task using RNNs (i.e., a sequence to value prediction). We have hourly power consumption of households for 12 hours. Based on this, we will determine whether the power grid is strained (1) or not (0). 

Therefore, use the columns from `Hour 0` to `Hour 11` to predict the `target` column in the `power.csv` data set.

Hint1: Use Tutorial 1 for help.

Hint2: Don't forget to adjust the number of neurons in the input layers correctly. Otherwise, you will run into errors.

In [15]:
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import mean_squared_error


# Common imports
import numpy as np
import os
import pandas as pd

# to make this notebook's output stable across runs
np.random.seed(39)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)



# Read the Dataset

In [16]:
power = pd.read_csv('power.csv')

power.head()

Unnamed: 0,Hour 0,Hour 1,Hour 2,Hour 3,Hour 4,Hour 5,Hour 6,Hour 7,Hour 8,Hour 9,Hour 10,Hour 11,target
0,2.550633,2.5234,2.582333,2.541667,2.475733,2.476233,2.4558,2.4472,2.441733,3.146133,2.661733,2.576,1
1,1.596933,1.619567,2.473733,2.731133,2.431133,2.479667,1.6902,1.332133,1.375167,1.0509,0.5859,2.6519,1
2,0.534933,0.540467,0.575367,0.5265,0.5219,0.565333,1.426467,0.602067,0.547433,0.525067,1.2703,0.393767,0
3,1.085867,0.651233,0.6346,0.653,0.646067,0.6284,0.611067,0.612533,0.6601,0.606067,1.471867,0.834533,0
4,0.456,0.2863,0.310833,0.250933,0.277667,0.308633,0.6104,1.563533,1.421867,3.3244,3.207567,1.425433,1


In [17]:
power.shape

(1417, 13)

# Split the Data



In [18]:
# First 1000 days are for train
train = power.iloc[:1000]

# Remaining 417 days are for test
test = power.iloc[-417:]

In [19]:
train.shape

(1000, 13)

In [20]:
test.shape

(417, 13)

# Create Input and Target values

The first 12 columns (hourly data) will be input to predict the last column (i.e., target)

In [21]:
# The first 12 columns (from 0 to 11) are inputs

train_inputs = train.iloc[:,:12]

## Add one more dimension to make it ready for RNNs

In [22]:
#Create an additional dimension for train

train_x = np.array(train_inputs).reshape(1000,12,1)

train_x.shape 

(1000, 12, 1)

## Set the target

In [23]:
# The last column is TARGET

train_target = train.iloc[:,-1]

## Repeat for TEST

In [24]:
test.shape

(417, 13)

In [25]:
# The first 12 columns are inputs

test_inputs = test.iloc[:,:12]

In [26]:
#Create an additional dimension for test

test_x = np.array(test_inputs).reshape(417,12,1)

test_x.shape 

(417, 12, 1)

In [27]:
# The last column is TARGET

test_target = test.iloc[:,-1]

In [30]:
from sklearn.dummy import DummyClassifier

dummy_clf = DummyClassifier(strategy="most_frequent")

dummy_clf.fit(train_x, train_target)

In [29]:
from sklearn.metrics import accuracy_score

In [31]:
#Baseline Train Accuracy
dummy_train_pred = dummy_clf.predict(train_x)

baseline_train_acc = accuracy_score(train_target, dummy_train_pred)

print('Baseline Train Accuracy: {}' .format(baseline_train_acc))

Baseline Train Accuracy: 0.505


In [32]:
#Baseline Test Accuracy
dummy_test_pred = dummy_clf.predict(test_x)

baseline_test_acc = accuracy_score(test_target, dummy_test_pred)

print('Baseline Test Accuracy: {}' .format(baseline_test_acc))

Baseline Test Accuracy: 0.49640287769784175


# Build a normal (cross-sectional) NN

This model assumes that the data is NOT a time-series data set. It treats the data as cross-sectional and the columns being independent of each other.

In [36]:
model = keras.models.Sequential([
    
    keras.layers.Flatten(input_shape=[12, 1]),
    keras.layers.Dense(12, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
    
])

In [76]:
np.random.seed(25)
tf.random.set_seed(25)

optimizer = tf.keras.optimizers.Nadam(learning_rate=0.01)

# If multiclass, use "sparse_categorical_crossentropy" as the loss function
model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=['accuracy'])


history = model.fit(train_x, train_target, epochs=50,
                    validation_data=(test_x, test_target))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [39]:
# evaluate the model

scores = model.evaluate(test_x, test_target, verbose=0)

scores

# In results, first is loss, second is accuracy

[0.5453658103942871, 0.7338129281997681]

In [40]:
# extract the accuracy from model.evaluate

print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


loss: 0.55
accuracy: 73.38%


# Build a simple RNN with one layer

In [41]:
n_steps = 12
n_inputs = 1


model = keras.models.Sequential([
    
    keras.layers.SimpleRNN(32, input_shape=[n_steps, n_inputs]),
    keras.layers.Dense(1, activation='sigmoid')
])

In [42]:
from tensorflow.keras.callbacks import EarlyStopping


earlystop = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')

callback = [earlystop]

In [45]:
np.random.seed(42)
tf.random.set_seed(42)

optimizer = tf.keras.optimizers.Nadam(learning_rate=0.01)

# If multiclass, use "sparse_categorical_crossentropy" as the loss function
model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=['accuracy'])


history = model.fit(train_x, train_target, epochs=50,
                    validation_data=(test_x, test_target), callbacks=callback)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 20: early stopping


In [77]:
# evaluate the model

scores = model.evaluate(test_x, test_target, verbose=0)

scores

# In results, first is loss, second is accuracy

[0.9094948172569275, 0.7314148545265198]

In [78]:
# extract the accuracy from model.evaluate

print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


loss: 0.91
accuracy: 73.14%


In [79]:
# Predictions are probabilities.

predictions = model.predict(test_x)



In [80]:
# Rounding the probabilities determines 1 or 0

np.round(predictions)

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [1.],

In [81]:
from sklearn.metrics import confusion_matrix

confusion_matrix(test_target, np.round(predictions))

array([[153,  57],
       [ 55, 152]], dtype=int64)

# Build a simple RNN with two or more layers

In [82]:
n_steps = 12
n_inputs = 1


model = keras.models.Sequential([
    keras.layers.SimpleRNN(32, return_sequences=True, input_shape=[n_steps, n_inputs] ),
    keras.layers.SimpleRNN(32, return_sequences=True),
    keras.layers.SimpleRNN(32), 
    keras.layers.Dense(1, activation='sigmoid')
])


In [83]:
np.random.seed(30)
tf.random.set_seed(30)

optimizer = keras.optimizers.Nadam(learning_rate=0.01)

model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=['accuracy'])

history = model.fit(train_x, train_target, epochs=20,
                   validation_data = (test_x, test_target), callbacks=callback)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 13: early stopping


In [84]:
# evaluate the model

scores = model.evaluate(test_x, test_target, verbose=0)

scores

# In results, first is loss, second is accuracy

[0.5087302923202515, 0.7458033561706543]

In [85]:
# extract the accuracy from model.evaluate

print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


loss: 0.51
accuracy: 74.58%


# Build a LSTM with one layer

In [86]:
n_steps = 12
n_inputs = 1

model = keras.models.Sequential([
    
    keras.layers.LSTM(32, input_shape=[n_steps, n_inputs]),
    keras.layers.Dense(1, activation='sigmoid')
])

In [93]:
np.random.seed(57)
tf.random.set_seed(57)

optimizer = keras.optimizers.Nadam(learning_rate=0.0001)

model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=['accuracy'])

history = model.fit(train_x, train_target, epochs=20,
                   validation_data = (test_x, test_target), callbacks=callback)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 16: early stopping


In [94]:
# evaluate the model

scores = model.evaluate(test_x, test_target, verbose=0)

scores

# In results, first is loss, second is accuracy

[0.5016409158706665, 0.7482014298439026]

In [95]:
# extract the accuracy from model.evaluate

print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


loss: 0.50
accuracy: 74.82%


# Build a LSTM with two or more layers

In [60]:
n_steps = 12
n_inputs = 1

model = keras.models.Sequential([
    keras.layers.LSTM(32, return_sequences=True, input_shape=[n_steps, n_inputs]),
    keras.layers.LSTM(32, return_sequences=True),
    keras.layers.LSTM(32),
    keras.layers.Dense(1, activation='sigmoid')
])

In [96]:
np.random.seed(89)
tf.random.set_seed(89)

optimizer = keras.optimizers.Nadam(learning_rate=0.001)

model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=['accuracy'])

history = model.fit(train_x, train_target, epochs=20,
                   validation_data = (test_x, test_target), callbacks=callback)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [97]:
# evaluate the model

scores = model.evaluate(test_x, test_target, verbose=0)

scores

# In results, first is loss, second is accuracy

[0.49387261271476746, 0.7577937841415405]

In [98]:
# extract the accuracy from model.evaluate

print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


loss: 0.49
accuracy: 75.78%


# Build a GRU with one layer

In [66]:
n_steps = 12
n_inputs = 1

model = keras.models.Sequential([
    keras.layers.GRU(32, input_shape=[n_steps, n_inputs]),
    keras.layers.Dense(1, activation='sigmoid')
])

In [105]:
np.random.seed(34)
tf.random.set_seed(34)

optimizer = keras.optimizers.Nadam(learning_rate=0.01)

model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=['accuracy'])

history = model.fit(train_x, train_target, epochs=20,
                   validation_data = (test_x, test_target), callbacks=callback)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 6: early stopping


In [106]:
# evaluate the model

scores = model.evaluate(test_x, test_target, verbose=0)

scores

# In results, first is loss, second is accuracy

[0.500237762928009, 0.7577937841415405]

In [107]:
# extract the accuracy from model.evaluate

print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


loss: 0.50
accuracy: 75.78%


# Build a GRU with two or more layers

In [108]:
n_steps = 12
n_inputs = 1

model = keras.models.Sequential([
    keras.layers.GRU(32, return_sequences=True, input_shape=[n_steps, n_inputs]),
    keras.layers.GRU(32, return_sequences=True),
    keras.layers.GRU(32),
    keras.layers.Dense(1, activation='sigmoid')
])

In [111]:
np.random.seed(75)
tf.random.set_seed(75)

optimizer = keras.optimizers.Nadam(learning_rate=0.001)

model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=['accuracy'])

history = model.fit(train_x, train_target, epochs=20,
                   validation_data = (test_x, test_target), callbacks=callback)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 6: early stopping


In [112]:
# evaluate the model

scores = model.evaluate(test_x, test_target, verbose=0)

scores

# In results, first is loss, second is accuracy

[0.507764995098114, 0.7529975771903992]

In [113]:
# extract the accuracy from model.evaluate

print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


loss: 0.51
accuracy: 75.30%
