## Create the machine learning algorithm



### Import the relevant libraries

In [1]:
# we must import the libraries once again since we haven't imported them in this file
import numpy as np
import tensorflow as tf

### Data

In [2]:
# let's create a temporary variable npz, where we will store each of the three Audiobooks datasets
npz = np.load('files/Audiobooks_data_train.npz')

# we extract the inputs using the keyword under which we saved them
# to ensure that they are all floats, let's also take care of that
train_inputs = npz['inputs'].astype(np.float)
# targets must be int because of sparse_categorical_crossentropy (we want to be able to smoothly one-hot encode them)
train_targets = npz['targets'].astype(np.int)

# we load the validation data in the temporary variable
npz = np.load('files/Audiobooks_data_validation.npz')
# we can load the inputs and the targets in the same line
validation_inputs, validation_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

# we load the test data in the temporary variable
npz = np.load('files/Audiobooks_data_test.npz')
# we create 2 variables that will contain the test inputs and the test targets
test_inputs, test_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

### Model

In [10]:
input_size = 10
output_size = 2
hidden_layer_size = 50

# Model contains 2 hidden layers with 50 nodes each
model = tf.keras.Sequential([
    tf.keras.layers.Dense(hidden_layer_size, activation="relu"),
    tf.keras.layers.Dense(hidden_layer_size, activation="relu"),
    tf.keras.layers.Dense(output_size, activation="softmax")
])

# Compile the model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=['accuracy'])

batch_size = 100
max_epochs = 100

# Default model tries to overfit...So need mechanism for early stop
# model.fit(train_inputs, 
#           train_targets, 
#           batch_size=batch_size, 
#           epochs=max_epochs,
#           validation_data = (validation_inputs, validation_targets),
#           verbose=2)

early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)

model.fit(train_inputs, 
          train_targets, 
          batch_size=batch_size, 
          epochs=max_epochs,
          callbacks=[early_stopping],
          validation_data = (validation_inputs, validation_targets),
          verbose=2,)

Train on 3579 samples, validate on 447 samples
Epoch 1/100
3579/3579 - 2s - loss: 0.6039 - accuracy: 0.6996 - val_loss: 0.4557 - val_accuracy: 0.8635
Epoch 2/100
3579/3579 - 0s - loss: 0.3958 - accuracy: 0.8673 - val_loss: 0.3478 - val_accuracy: 0.8747
Epoch 3/100
3579/3579 - 0s - loss: 0.3275 - accuracy: 0.8799 - val_loss: 0.3244 - val_accuracy: 0.8747
Epoch 4/100
3579/3579 - 0s - loss: 0.3021 - accuracy: 0.8877 - val_loss: 0.3097 - val_accuracy: 0.8747
Epoch 5/100
3579/3579 - 0s - loss: 0.2869 - accuracy: 0.8910 - val_loss: 0.3043 - val_accuracy: 0.8814
Epoch 6/100
3579/3579 - 0s - loss: 0.2767 - accuracy: 0.8944 - val_loss: 0.2989 - val_accuracy: 0.8859
Epoch 7/100
3579/3579 - 0s - loss: 0.2673 - accuracy: 0.8980 - val_loss: 0.2957 - val_accuracy: 0.8859
Epoch 8/100
3579/3579 - 0s - loss: 0.2617 - accuracy: 0.8980 - val_loss: 0.2983 - val_accuracy: 0.8881
Epoch 9/100
3579/3579 - 0s - loss: 0.2592 - accuracy: 0.9016 - val_loss: 0.2891 - val_accuracy: 0.8904
Epoch 10/100
3579/3579 - 0

<tensorflow.python.keras.callbacks.History at 0x29a6b2318c8>

### Test the model

In [11]:
test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)



In [12]:
print("Test Loss: {0:.2f}, Test Accuracy: {1:.2f}%".format(test_loss, test_accuracy*100.))

Test Loss: 0.27, Test Accuracy: 89.96%
