# Audiobooks business case

## Create the machine learning algorithm

In [1]:
import numpy as np
import tensorflow as tf
import pickle

### Data

In [2]:
# temporary variable npz, where datasets are stored before loading in own variables
npz = np.load('Audiobooks_data_train.npz')

# extract the inputs using the keywords under which they were saved plus making sure they have
# the correct datatypes before training
train_inputs, train_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

# validation data
npz = np.load('Audiobooks_data_validation.npz')
validation_inputs, validation_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

# test data
npz = np.load('Audiobooks_data_test.npz')
test_inputs, test_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

### Model Hyperparameters

In [3]:
# layer sizes
input_size = 10
output_size = 2
hidden_layer_size = 200

# training params

batch_size = 100
max_epochs = 100
early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)
custom_optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

### Model

In [4]:
### Layerstack

model = tf.keras.Sequential([
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

### Compile

model.compile(optimizer=custom_optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

### Training

model.fit(train_inputs, # train inputs
          train_targets, # train targets
          batch_size=batch_size, # batch size
          epochs=max_epochs, # epochs that we will train for (assuming early stopping doesn't kick in)
          callbacks=[early_stopping],
          validation_data=(validation_inputs, validation_targets), # validation data
          verbose = 2 # making sure we get enough information about the training process
          )

Epoch 1/100
36/36 - 0s - loss: 0.4142 - accuracy: 0.8474 - val_loss: 0.2694 - val_accuracy: 0.9038
Epoch 2/100
36/36 - 0s - loss: 0.2966 - accuracy: 0.8871 - val_loss: 0.2529 - val_accuracy: 0.9105
Epoch 3/100
36/36 - 0s - loss: 0.2799 - accuracy: 0.8888 - val_loss: 0.2321 - val_accuracy: 0.9172
Epoch 4/100
36/36 - 0s - loss: 0.2654 - accuracy: 0.8980 - val_loss: 0.2267 - val_accuracy: 0.9239
Epoch 5/100
36/36 - 0s - loss: 0.2565 - accuracy: 0.9039 - val_loss: 0.2235 - val_accuracy: 0.9150
Epoch 6/100
36/36 - 0s - loss: 0.2509 - accuracy: 0.9008 - val_loss: 0.2211 - val_accuracy: 0.9195
Epoch 7/100
36/36 - 0s - loss: 0.2445 - accuracy: 0.9053 - val_loss: 0.2181 - val_accuracy: 0.9262
Epoch 8/100
36/36 - 0s - loss: 0.2451 - accuracy: 0.9072 - val_loss: 0.2288 - val_accuracy: 0.9016
Epoch 9/100
36/36 - 0s - loss: 0.2408 - accuracy: 0.9078 - val_loss: 0.2085 - val_accuracy: 0.9262
Epoch 10/100
36/36 - 0s - loss: 0.2377 - accuracy: 0.9092 - val_loss: 0.2094 - val_accuracy: 0.9329
Epoch 11/

<tensorflow.python.keras.callbacks.History at 0x1926b85ac88>

### Testing the Model

In [5]:
model.evaluate(test_inputs, test_targets)



[0.2562461793422699, 0.9107142686843872]

### Saving the Model

In [6]:
# use the tf.keras-build in save function to save model with all weights as *.h5
# model.save('model.h5')

### Example: Using the Model to predict new Data (after deploying it)
the output is still one-hot encoded without further formating so the predictions here are: customer1 will most likely not buy again while customer2 most likely will

In [7]:
# array containing new data, exactly the same order than the model was trained with
new_inputs = [[1900, 0, 9, 5.87, 0, 5, 0, 1, 0, 253], [1620, 1600, 5.87, 5.87, 0, 5, 0, 1, 0, 253]]

# load the serialized scaler object that was fitted with the whole dataset 
with open('scaler.pickle', 'rb') as f:
    audiobook_scaler = pickle.load(f)

# loading the serialized model to use it for predictions
model = tf.keras.models.load_model('model.h5')

# scaling the data with the scaler
new_inputs = audiobook_scaler.transform(new_inputs)

# get the models predictions (one-hot encoded nd-array)
model.predict(new_inputs)

array([[0.99759763, 0.00240239],
       [0.03611296, 0.9638871 ]], dtype=float32)