##  GRU

### Load data

In [20]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd

np.random.seed(1)
tf.random.set_seed(1)

In [21]:
X_test = pd.read_csv('data/bb_test_X.csv')
y_test = pd.read_csv('data/bb_test_y.csv')
X_train = pd.read_csv('data/bb_train_X.csv')
y_train = pd.read_csv('data/bb_train_y.csv')

## Data Transformation

In [22]:
#Target variables need to be an array with integer type
y_train = np.array(y_train)
y_test = np.array(y_test)

y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)

In [23]:
#Check the first 10 values of the train_y data set
y_train[0:10]

array([[0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0]])

In [24]:
#Convert input variables to a 2-D array with float data type
X_train = np.array(X_train)
X_test = np.array(X_test)

X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)

In [25]:
X_train

array([[ -3.,  -3.,  -2., ...,  -5.,  -3.,  -7.],
       [  0.,   1.,  -1., ...,  10.,  12.,  15.],
       [ -3.,  -2.,  -2., ...,  14.,  12.,  12.],
       ...,
       [  2.,   6.,   8., ...,  -2.,   1.,   0.],
       [ -2.,   1.,  -1., ..., -19., -21., -14.],
       [  2.,   1.,   3., ...,   0.,   2.,   4.]], dtype=float32)

In [26]:
#Keras expects a different input format:
#Data needs to have 3 dimensions

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [27]:
X_train.shape, y_train.shape

((861, 36, 1), (861, 1))

In [28]:
X_train

array([[[ -3.],
        [ -3.],
        [ -2.],
        ...,
        [ -5.],
        [ -3.],
        [ -7.]],

       [[  0.],
        [  1.],
        [ -1.],
        ...,
        [ 10.],
        [ 12.],
        [ 15.]],

       [[ -3.],
        [ -2.],
        [ -2.],
        ...,
        [ 14.],
        [ 12.],
        [ 12.]],

       ...,

       [[  2.],
        [  6.],
        [  8.],
        ...,
        [ -2.],
        [  1.],
        [  0.]],

       [[ -2.],
        [  1.],
        [ -1.],
        ...,
        [-19.],
        [-21.],
        [-14.]],

       [[  2.],
        [  1.],
        [  3.],
        ...,
        [  0.],
        [  2.],
        [  4.]]], dtype=float32)

# Conv1D with one layer

In [29]:
n_steps = 36
n_inputs = 1

model = keras.models.Sequential([
    keras.layers.Conv1D(filters=10, kernel_size=3, strides=2, padding="valid", input_shape=[n_steps, n_inputs]),
    keras.layers.Dense(1, activation='sigmoid')
])

In [30]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')

np.random.seed(42)
tf.random.set_seed(42)

optimizer = keras.optimizers.Nadam(learning_rate=0.01)

model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=['accuracy'])

history = model.fit(
    X_train, 
    y_train, 
    epochs=20,
    validation_data=(X_test, y_test),
    callbacks=[early_stop]) # we defined this early_stop callback function earlier

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 11: early stopping


In [31]:
# evaluate the model

scores = model.evaluate(X_test, y_test, verbose=0)
scores

# In results, first is loss, second is accuracy

[0.5619614720344543, 0.7003028988838196]

In [32]:
# extract the accuracy from model.evaluate
print(f"{model.metrics_names[0]:s}: {scores[0]:.2f}")
print(f"{model.metrics_names[1]:s}: {scores[1]*100:.2f}")

loss: 0.56
accuracy: 70.03


# Conv1D with more layers

In [33]:
n_steps = 36
n_inputs = 1

model = keras.models.Sequential([
    keras.layers.Conv1D(filters=10, kernel_size=3, strides=2, padding="valid", input_shape=[n_steps, n_inputs]),
    keras.layers.Conv1D(filters=20, kernel_size=3, strides=1, padding="valid", dilation_rate=2),
    keras.layers.LSTM(32, return_sequences=True),
    keras.layers.LSTM(32),
    keras.layers.Dense(1, activation='sigmoid')
])

In [34]:
np.random.seed(42)
tf.random.set_seed(42)

optimizer = keras.optimizers.Nadam(learning_rate=0.01)

model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=['accuracy'])

history = model.fit(
    X_train, 
    y_train, 
    epochs=20,
    validation_data=(X_test, y_test), 
    callbacks=[early_stop] # we defined this early_stop callback function earlier
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 9: early stopping


In [35]:
# evaluate the model

scores = model.evaluate(X_test, y_test, verbose=0)
scores

# In results, first is loss, second is accuracy

[0.5336984992027283, 0.7344173192977905]

In [36]:
# extract the accuracy from model.evaluate

print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


loss: 0.53
accuracy: 73.44%
