### Importing relevant packages

In [101]:
import numpy as np
from sklearn import preprocessing
import keras

### Loading Dataset

In [102]:
raw_data_with_all_columns=np.loadtxt('./Datasets/Audiobooks_data.csv',delimiter=',')

In [103]:
raw_data_with_all_columns.shape

(14084, 12)

In [104]:
raw_input_without_scaled=raw_data_with_all_columns[:,1:-1]
raw_targets_without_scaled=raw_data_with_all_columns[:,-1]

### Balancing the Dataset

In [105]:
count_ones_in_target=int(np.sum(raw_targets_without_scaled))
count_zeroes_in_target=0
index_to_be_removed=[]
for i in range(len(raw_targets_without_scaled)):
    if(raw_targets_without_scaled[i]==0):
        count_zeroes_in_target+=1
        if(count_zeroes_in_target>count_ones_in_target):
            index_to_be_removed.append(i)

unscaled_input_prior=np.delete(raw_input_without_scaled,index_to_be_removed,axis=0)
unscaled_target_prior=np.delete(raw_targets_without_scaled,index_to_be_removed,axis=0)

### Standardize the inputs

In [106]:

scaled_inputs=preprocessing.scale(unscaled_input_prior)

In [107]:
scaled_inputs

array([[ 0.21053387, -0.18888517,  1.97823887, ...,  4.80955413,
        11.83828419,  0.09415043],
       [ 1.27894497,  0.41646744, -0.39082475, ..., -0.41569922,
        -0.20183481, -0.80255852],
       [ 1.27894497,  0.41646744, -0.39082475, ..., -0.41569922,
        -0.20183481,  2.979214  ],
       ...,
       [ 1.27894497,  0.41646744, -0.39082475, ..., -0.41569922,
        -0.20183481, -0.7440775 ],
       [ 0.31737498,  1.7482432 ,  0.04679395, ..., -0.41569922,
        -0.20183481, -0.80255852],
       [ 0.31737498,  1.7482432 , -0.39082475, ..., -0.41569922,
        -0.20183481, -0.80255852]])

### Shuffle the data

In [108]:
from random import shuffle
indices=np.arange(len(scaled_inputs))
shuffle(indices)
shuffled_input=scaled_inputs[indices]
shuffled_target=unscaled_target_prior[indices]

### Dividing data into training,validation,testing

In [109]:
total_sample=len(shuffled_target)
training_sample=int(total_sample*0.8)
validation_sample=int(total_sample*0.1)
testing_sample=total_sample-training_sample-validation_sample
training_inputs=shuffled_input[:training_sample]
training_targets=shuffled_target[:training_sample]
validation_inputs=shuffled_input[training_sample:training_sample+validation_sample]
validation_targets=shuffled_target[training_sample:training_sample+validation_sample]
testing_inputs=shuffled_input[validation_sample+training_sample:]
testing_targets=shuffled_target[validation_sample+training_sample:]
print(sum(training_targets))
print(sum(testing_targets))
print(sum(validation_targets))


1759.0
225.0
253.0


### Save all the datasets in npz format

In [110]:
np.savez('./Prepossesed Dataset/audiobook_data_train',inputs=training_inputs,targets=training_targets)
np.savez('./Prepossesed Dataset/audiobook_data_validation',inputs=validation_inputs,targets=validation_targets)
np.savez('./Prepossesed Dataset/audiobook_data_testing',inputs=testing_inputs,targets=testing_targets)

FileNotFoundError: [Errno 2] No such file or directory: './Prepossesed Dataset/audiobook_data_train.npz'

### Data

In [111]:
npz=np.load('./Prepossesed_Dataset/audiobook_data_train.npz')
train_inputs=npz['inputs'].astype(np.float)
train_targets=npz['targets'].astype(np.int)



npz=np.load('./Prepossesed_Dataset/audiobook_data_validation.npz')
validation_inputs=npz['inputs'].astype(np.float)
validation_targets=npz['targets'].astype(np.int)



npz=np.load('./Prepossesed_Dataset/audiobook_data_testing.npz')
testing_inputs=npz['inputs'].astype(np.float)
testing_targets=npz['targets'].astype(np.int)

### Model

In [112]:
input_size=10
output_size=2
hidden_nodes=50
early_Stopping=keras.callbacks.EarlyStopping(patience=2)
model=keras.Sequential([
                     
                        keras.layers.Dense(hidden_nodes,activation='relu'),
                        keras.layers.Dense(hidden_nodes,activation='relu'),
                        keras.layers.Dense(output_size,activation='softmax')
])

In [113]:
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])# By using Sparse Categorical CrossEntropy we will not have to do one hot encoding

In [114]:
model.fit(train_inputs,train_targets,batch_size=100,epochs=100,verbose=1,callbacks=[early_Stopping],validation_data=(validation_inputs,validation_targets ))

Train on 3579 samples, validate on 447 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100


<keras.callbacks.callbacks.History at 0xa94e29fbe0>

In [117]:
model.evaluate(testing_inputs,testing_targets)



[0.34201934933662415, 0.828125]