In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
import tensorflow as tf


In [None]:
col = ['ID','book_length(mins)_overall','book_length(mins)_avg','price_overall',
       'price_avg','review0/1','review10/10','minute_listened','completion','support_request', 
       'last visit minus purchase date','target']

df = pd.read_csv('Audiobooks_data.csv', names=col)
df = df.sample(frac=1) #random the row
df.reset_index(inplace=True)
df.drop(columns=['index','ID'], axis=1,inplace=True)

df.head()

In [33]:
ones_count = df['target'].sum() #this return 2237
zeros_count = 0
index_to_remove = []

for i in range(len(df)):
  if df.iloc[i,10] == 0:
    zeros_count += 1
    if zeros_count > ones_count:
      index_to_remove.append(i)

In [None]:
drop_df = df.drop(index_to_remove)
drop_df.describe() #noted that the mean of target is 0.5 = 0:1 equally

Scaled the input to have a standardize data

In [None]:
col_to_scale = ['book_length(mins)_overall','book_length(mins)_avg','price_overall',
       'price_avg','review0/1','review10/10','minute_listened','completion','support_request', 
       'last visit minus purchase date']

scaled_inputs = drop_df[col_to_scale].apply(lambda x: preprocessing.scale(x)) #scale the input in the df
scaled_inputs.head()

Transform DF into tensor for using with tensorflow

In [36]:
input_tensor = tf.convert_to_tensor(scaled_inputs)
target_tensor = tf.convert_to_tensor(drop_df['target'])

In [None]:
#just checking
input_tensor.shape

In [None]:
#just checking
target_tensor.shape

### Split the data into 3 groups
Let's slize the observation into 80 Train:10 Validation:10 Tests.

In [None]:
num_train = int(0.8 * len(input_tensor))
num_valid = int(0.1 * len(input_tensor))
num_test = len(input_tensor) - num_train - num_valid

print(num_train, num_valid, num_test) #just checking

In [60]:
# the actual split
train_input, valid_input, test_input = tf.split(input_tensor,[num_train, num_valid, num_test],axis=0)
train_target, valid_target, test_target = tf.split(target_tensor,[num_train, num_valid, num_test],axis=0)

Save the datasets into npz file

In [61]:
np.savez('Audiobooks_data_train',input=train_input, target=train_target)
np.savez('Audiobooks_data_validation',input=valid_input, target=valid_target)
np.savez('Audiobooks_data_test',input=test_input, target=test_target)

# Building the model

let's start by loading the data from npz file into variables.

In [62]:
npz = np.load('Audiobooks_data_train.npz')
train_inputs, train_targets = npz['input'], npz['target']

npz = np.load('Audiobooks_data_validation.npz')
validation_inputs, validation_targets = npz['input'], npz['target']

npz = np.load('Audiobooks_data_test.npz')
test_inputs, test_targets = npz['input'], npz['target']

The actual model

The reason we are using 'relu' activation here is that relu is more compulational efficient than sigmoid (relu derivative is either 0 or 1 while sigmoid is a exponentials)

Also the sigmoid sometime cause gradient vanishing from the multiplication of many layers of the fractions from the sigmoid derivatives. The gradient goes to zero (vanish) quite quickly.

Still, the relu need to be fix that somethime the NN with relu 'died' from having too much of zero from the output.

In [65]:
input_size =  train_inputs.shape[1] #or 10 since we have (3579,10) shape tensor as an input
output_size = 2 #we will classify into 2 groups of will buy and won't buy
hidden_layer_size = 100 #arbitrary
max_epochs = 100 #arbitary
batch_size =  100 #arbitary
early_stop = tf.keras.callbacks.EarlyStopping(patience=2)

model = tf.keras.Sequential([
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'), #1st hidden
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'), #2nd hidden
                            tf.keras.layers.Dense(hidden_layer_size, activation='tanh'), #4nd hidden

                            tf.keras.layers.Dense(output_size, activation='softmax') #since we are classifying, softmax return a total prob. of all classifications.                            
                            ])

model.compile(optimizer='adam',loss= 'sparse_categorical_crossentropy',metrics=['accuracy'])

model.fit(train_inputs,
          train_target,
          batch_size = batch_size,
          epochs = max_epochs,
          validation_data = (validation_inputs, validation_targets),
          callbacks = [early_stop],
          verbose = 2
          )

Epoch 1/100
36/36 - 1s - loss: 0.4565 - accuracy: 0.7742 - val_loss: 0.4183 - val_accuracy: 0.7204 - 639ms/epoch - 18ms/step
Epoch 2/100
36/36 - 0s - loss: 0.3807 - accuracy: 0.8089 - val_loss: 0.5168 - val_accuracy: 0.6846 - 123ms/epoch - 3ms/step
Epoch 3/100
36/36 - 0s - loss: 0.3731 - accuracy: 0.8170 - val_loss: 0.4221 - val_accuracy: 0.7248 - 117ms/epoch - 3ms/step


<keras.callbacks.History at 0x7f14dcca8f50>

### The actual testing

In [66]:
test_loss, test_accuracy = model.evaluate(test_inputs,test_targets)

