<a href="https://colab.research.google.com/github/sspaulc/sturdy-computing-machine/blob/main/Predict_chances_of_customer_buying_an_audiobook_again.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Extract Dataset
import numpy as np
import pandas as pd
from sklearn import preprocessing
import tensorflow as tf

raw_data=np.loadtxt('/Audiobooks_data.csv',delimiter=',')


In [None]:
unscaled_inputs_all=raw_data[:,1:-1]
targets_all= raw_data[:,-1]


In [None]:
#Balancing the dataset: Count no of 1 and keep as many '0' as '1'
num_one_targets=int(np.sum(targets_all))
zero_targets_counter= 0
indices_to_remove = []

for i in range (targets_all.shape[0]):
  if targets_all[i]==0:
    zero_targets_counter=zero_targets_counter +1
    if zero_targets_counter>num_one_targets:
      indices_to_remove.append(i)


In [None]:
unscaled_inputs_equal_priors=np.delete( unscaled_inputs_all, indices_to_remove,axis=0)
target_equal_priors=np.delete(targets_all,indices_to_remove,axis=0)


In [None]:
#Standardize inputs to increase prediction values by 10%
scaled_inputs= preprocessing.scale(unscaled_inputs_equal_priors)

In [None]:
#Shuffle the data for batching for random spread 
shuffled_indices=np.arange(scaled_inputs.shape[0]) #returns evenly spaced values within given interval
np.random.shuffle(shuffled_indices)

shuffled_inputs= scaled_inputs[shuffled_indices]
shuffled_targets=target_equal_priors[shuffled_indices]

In [None]:
#Split data in train,validation and test
samples_count= shuffled_inputs.shape[0]

train_samples_count= int(0.8*samples_count)
validation_samples_count= int(0.1*samples_count)
test_samples_count= samples_count - train_samples_count- validation_samples_count
train_inputs= shuffled_inputs [:train_samples_count]
train_targets= shuffled_targets [:train_samples_count]


validation_inputs =shuffled_inputs [train_samples_count:validation_samples_count+train_samples_count]
validation_targets = shuffled_targets [train_samples_count:validation_samples_count+train_samples_count]

test_inputs = shuffled_inputs [train_samples_count+validation_samples_count:]
test_targets= shuffled_inputs [train_samples_count+validation_samples_count:]

In [None]:
#Save three datasets in npz
np.savez('Audiobooks_data_train',inputs=train_inputs,targets= train_targets)
np.savez('Audiobook_data_validation',inputs=validation_inputs, targets= validation_targets)
np.savez('Audiobook_data_test',inputs=test_inputs,targets=test_targets)


In [None]:
#Load the data
npz= np.load('Audiobooks_data_train.npz')

train_inputs= npz['inputs'].astype(np.float)
train_targets= npz['targets'].astype(np.int)



In [None]:


npz=np.load('Audiobook_data_validation.npz')

validation_inputs=npz['inputs'].astype(np.float)
validation_targets=npz['targets'].astype(np.int)



In [None]:
npz=np.load('Audiobook_data_test.npz')

test_inputs=npz['inputs'].astype(np.float)
test_targets=npz['targets'].astype(np.int)

In [None]:
#Create the ML Algorithm
input_size=10
output_size=2
hidden_layer_size=50
model=tf.keras.Sequential([
               
          tf.keras.layers.Dense(hidden_layer_size,activation='relu') ,
          tf.keras.layers.Dense(hidden_layer_size,activation='relu') ,
          tf.keras.layers.Dense(output_size,activation='sigmoid') #output is a classifier
                          ])
model.compile(optimizer='Adam',loss='sparse_categorical_crossentropy',metrics=['accuracy']) #optimizer and loss function

batch_size=100

max_epochs=100

early_stopping=tf.keras.callbacks.EarlyStopping(patience=2)

model.fit(
           train_inputs,
           train_targets,
           batch_size=batch_size,
           epochs=max_epochs,
           callbacks= [early_stopping],
           validation_data=(validation_inputs,validation_targets),
           verbose= 2
)


Epoch 1/100
36/36 - 1s - loss: 0.5989 - accuracy: 0.6879 - val_loss: 0.5337 - val_accuracy: 0.7450
Epoch 2/100
36/36 - 0s - loss: 0.4777 - accuracy: 0.7656 - val_loss: 0.4488 - val_accuracy: 0.7763
Epoch 3/100
36/36 - 0s - loss: 0.4247 - accuracy: 0.7770 - val_loss: 0.3987 - val_accuracy: 0.8098
Epoch 4/100
36/36 - 0s - loss: 0.3955 - accuracy: 0.7921 - val_loss: 0.3827 - val_accuracy: 0.7919
Epoch 5/100
36/36 - 0s - loss: 0.3781 - accuracy: 0.8019 - val_loss: 0.3587 - val_accuracy: 0.8188
Epoch 6/100
36/36 - 0s - loss: 0.3648 - accuracy: 0.8069 - val_loss: 0.3587 - val_accuracy: 0.8188
Epoch 7/100
36/36 - 0s - loss: 0.3608 - accuracy: 0.8030 - val_loss: 0.3478 - val_accuracy: 0.8166
Epoch 8/100
36/36 - 0s - loss: 0.3522 - accuracy: 0.8097 - val_loss: 0.3443 - val_accuracy: 0.8210
Epoch 9/100
36/36 - 0s - loss: 0.3469 - accuracy: 0.8170 - val_loss: 0.3316 - val_accuracy: 0.8188
Epoch 10/100
36/36 - 0s - loss: 0.3461 - accuracy: 0.8114 - val_loss: 0.3375 - val_accuracy: 0.8233
Epoch 11/

<tensorflow.python.keras.callbacks.History at 0x7fec2b175d68>

In [None]:
#testing the model
test_loss, test_accuracy= model.evaluate(test_inputs, test_targets)

ValueError: ignored