## Business case - creating the machine learning algorithm

<img src='audobooks_deep learning1.png'>

### Import the libraries

In [1]:
import numpy as np
import tensorflow as tf

### Data

In [2]:
npz = np.load('Audiobooks_data_train.npz')

train_inputs = npz['inputs'].astype(np.float)   # we expect all inputs to be floats
train_targets = npz['targets'].astype(np.int)

npz = np.load('Audiobooks_data_validation.npz')
validation_inputs, validation_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

npz = np.load('Audiobooks_data_test.npz')
test_inputs, test_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)


In [3]:
# check if the data are balanced
print(np.sum(train_targets), train_inputs.shape[0], np.sum(train_targets)/train_inputs.shape[0])
print(np.sum(validation_targets), validation_inputs.shape[0], np.sum(validation_targets)/validation_inputs.shape[0])
print(np.sum(test_targets), test_inputs.shape[0], np.sum(test_targets)/test_inputs.shape[0])

1785 3579 0.49874266554903607
225 447 0.5033557046979866
227 448 0.5066964285714286


In [4]:
train_inputs.shape

(3579, 10)

### Model
Outline, optimizers, loss, early stopping and training

In [5]:
## We need to choose the hyperparameters: width abd depth
INPUT_SIZE = 10
OUTPUT_SIZE = 2
HIDDEN_LAYER_SIZE = 50

model = tf.keras.Sequential([
    tf.keras.layers.Dense(HIDDEN_LAYER_SIZE, activation='relu'),
    tf.keras.layers.Dense(HIDDEN_LAYER_SIZE, activation='relu'),
    tf.keras.layers.Dense(OUTPUT_SIZE, activation='softmax')
])

model.compile(optimizer='adam',
#              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [16]:
BATCH_SIZE = 100
MAX_EPOCHS = 100
EARLY_STOPPING = tf.keras.callbacks.EarlyStopping(patience=2)

model.fit(
    train_inputs,
    train_targets,
    batch_size=BATCH_SIZE,
    epochs=MAX_EPOCHS,
    callbacks=[EARLY_STOPPING],
    validation_data=(validation_inputs, validation_targets),
    verbose=2
)

Train on 3579 samples, validate on 447 samples
Epoch 1/100
3579/3579 - 0s - loss: 0.3074 - accuracy: 0.8315 - val_loss: 0.3383 - val_accuracy: 0.8322
Epoch 2/100
3579/3579 - 0s - loss: 0.3075 - accuracy: 0.8310 - val_loss: 0.3414 - val_accuracy: 0.8389
Epoch 3/100
3579/3579 - 0s - loss: 0.3052 - accuracy: 0.8293 - val_loss: 0.3460 - val_accuracy: 0.8367


<tensorflow.python.keras.callbacks.History at 0x23f1c948a48>

## Test the model

In [17]:
test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)



In [18]:
print('\nTest loss: {0:.2f}. Test Accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100))


Test loss: 0.35. Test Accuracy: 82.59%
