In [1]:
import numpy as np
from sklearn import preprocessing
import pandas as pd

In [2]:
raw_csv_data=np.loadtxt('Audiobooks_data.csv',delimiter=',')
raw_csv_data

array([[9.9400e+02, 1.6200e+03, 1.6200e+03, ..., 5.0000e+00, 9.2000e+01,
        0.0000e+00],
       [1.1430e+03, 2.1600e+03, 2.1600e+03, ..., 0.0000e+00, 0.0000e+00,
        0.0000e+00],
       [2.0590e+03, 2.1600e+03, 2.1600e+03, ..., 0.0000e+00, 3.8800e+02,
        0.0000e+00],
       ...,
       [3.1134e+04, 2.1600e+03, 2.1600e+03, ..., 0.0000e+00, 0.0000e+00,
        0.0000e+00],
       [3.2832e+04, 1.6200e+03, 1.6200e+03, ..., 0.0000e+00, 9.0000e+01,
        0.0000e+00],
       [2.5100e+02, 1.6740e+03, 3.3480e+03, ..., 0.0000e+00, 0.0000e+00,
        1.0000e+00]])

# Preprocessing

In [4]:
unscaled_inputs_all=raw_csv_data[:,1:-1]
target_all=raw_csv_data[:,-1]
target_all

array([0., 0., 0., ..., 0., 0., 1.])

In [5]:
## we have to balanced the data so we are taking same number of data in which target=0 as target =1,remaining data has to delete to balanced the data
num_one_targets=int(np.sum(target_all))
zero_target_counter=0
indices_to_remove=[]
for i in range(target_all.shape[0]):
    if target_all[i]==0:
        zero_target_counter+=1
        if zero_target_counter>num_one_targets:
           indices_to_remove.append(i)
#unscaled_inputs_equal_priors=np.delete(unscaled_inputs_all,indices_to_remove,axis=0)
#targets_equal_priors=np.delete(target_all,indices_to_remove,axis=0)
unscaled_inputs_equal_priors=unscaled_inputs_all
targets_equal_priors=target_all

# Standardized

In [7]:
scaled_inputs=preprocessing.scale(unscaled_inputs_equal_priors)

In [8]:
shuffled_indices=np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indices)
shuffled_inputs=scaled_inputs[shuffled_indices]
shuffled_targets=targets_equal_priors[shuffled_indices]

In [9]:
samples_count=shuffled_inputs.shape[0]
train_samples_count=int(0.8*samples_count)
validation_samples_count=int(0.1*samples_count)
test_samples_count=samples_count-train_samples_count-validation_samples_count

train_inputs=shuffled_inputs[:train_samples_count]
train_targets=shuffled_targets[:train_samples_count]

validation_inputs=shuffled_inputs[train_samples_count:train_samples_count+validation_samples_count]
validation_targets=shuffled_targets[train_samples_count:train_samples_count+validation_samples_count]

test_inputs = shuffled_inputs[train_samples_count+validation_samples_count:]
test_targets = shuffled_targets[train_samples_count+validation_samples_count:]

In [10]:
np.savez('Audiobooks_data_train', inputs=train_inputs, targets=train_targets)
np.savez('Audiobooks_data_validation', inputs=validation_inputs, targets=validation_targets)
np.savez('Audiobooks_data_test', inputs=test_inputs, targets=test_targets)

# Data

In [12]:
import tensorflow as tf

In [13]:
 npz=np.load('Audiobooks_data_train.npz')
train_inputs=npz['inputs'].astype(np.float64)
train_targets=npz['targets'].astype(np.int64)

npz=np.load('Audiobooks_data_validation.npz')
validation_inputs,validation_targets=npz['inputs'].astype(np.float64),npz['targets'].astype(np.int64)

npz = np.load('Audiobooks_data_test.npz')
test_inputs, test_targets = npz['inputs'].astype(np.float64), npz['targets'].astype(np.int64)

# Model

In [32]:
data=train_inputs.shape[0]
data

11267

In [34]:
input_size=train_inputs.shape[1]
output_size=2
hidden_layer_size=100

model = tf.keras.Sequential([
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 1st hidden layer
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 2nd hidden layer
    # we just make sure to activate it with softmax
    tf.keras.layers.Dense(output_size, activation='softmax') # output layer
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)
batch_size = 100
max_epochs=100
model.fit(train_inputs,
          train_targets,
          batch_size=batch_size,
          epochs=max_epochs,
          callbacks=[early_stopping],
          validation_data=(validation_inputs,validation_targets),
          verbose=2)
          

Epoch 1/100
113/113 - 2s - 20ms/step - accuracy: 0.8766 - loss: 0.3440 - val_accuracy: 0.9006 - val_loss: 0.2627
Epoch 2/100
113/113 - 0s - 3ms/step - accuracy: 0.9021 - loss: 0.2642 - val_accuracy: 0.9062 - val_loss: 0.2457
Epoch 3/100
113/113 - 0s - 3ms/step - accuracy: 0.9050 - loss: 0.2503 - val_accuracy: 0.9084 - val_loss: 0.2322
Epoch 4/100
113/113 - 0s - 3ms/step - accuracy: 0.9051 - loss: 0.2460 - val_accuracy: 0.9084 - val_loss: 0.2296
Epoch 5/100
113/113 - 0s - 3ms/step - accuracy: 0.9080 - loss: 0.2409 - val_accuracy: 0.9006 - val_loss: 0.2645
Epoch 6/100
113/113 - 0s - 4ms/step - accuracy: 0.9081 - loss: 0.2388 - val_accuracy: 0.9070 - val_loss: 0.2259
Epoch 7/100
113/113 - 0s - 3ms/step - accuracy: 0.9079 - loss: 0.2380 - val_accuracy: 0.9084 - val_loss: 0.2329
Epoch 8/100
113/113 - 0s - 4ms/step - accuracy: 0.9089 - loss: 0.2362 - val_accuracy: 0.9126 - val_loss: 0.2262


<keras.src.callbacks.history.History at 0x1f5df9d6b80>

In [16]:
## As we see validation loss is increasing sometimes this means our model is overfit so we need to early stop (one of type of callBack)

# Testing the model

In [17]:
test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9254 - loss: 0.2162
