In [1]:
import numpy as np 
from sklearn import preprocessing

raw_csv_data = np.loadtxt('Audiobooks_data.csv.csv' , delimiter=',')

In [2]:
unscaled_input_all   = raw_csv_data[:,1:-1]
targets_all = raw_csv_data[:,-1]

In [3]:
no_of_zeros = 0
no_of_ones = int(np.sum(targets_all))
indices_to_remove = []

for i in range (targets_all.shape[0]):
    if targets_all[i]==0:
        no_of_zeros+=1
        if no_of_zeros>no_of_ones:
            indices_to_remove.append(i)
            
unscaled_input_zero_and_ones_balanced = np.delete(unscaled_input_all,indices_to_remove,axis=0)
targets_zero_and_ones_balanced = np.delete(targets_all,indices_to_remove,axis=0)

In [4]:
scaled_inputs = preprocessing.scale(unscaled_input_zero_and_ones_balanced)

In [5]:
shuffled_indices = np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indices)

shuffled_data = scaled_inputs[shuffled_indices]
shuffled_targets = targets_zero_and_ones_balanced[shuffled_indices]

print(shuffled_data.shape)
print(shuffled_targets.shape)

(4474, 10)
(4474,)


In [6]:
no_of_samples = shuffled_data.shape[0]

train_set_no = int(0.8*no_of_samples)
validation_set_no = int(0.1*no_of_samples)
test_set_no = no_of_samples-train_set_no-validation_set_no

training_set_inputs = shuffled_data[:train_set_no]
training_targets = shuffled_targets[:train_set_no]

validation_data_inputs = shuffled_data[train_set_no:train_set_no+validation_set_no]
validation_data_targets = shuffled_targets[train_set_no:train_set_no+validation_set_no]

test_data_inputs = shuffled_data[train_set_no+validation_set_no:]
test_data_targets = shuffled_targets[train_set_no+validation_set_no:]



In [7]:
np.savez('Audiobooks_data_train',   inputs=training_set_inputs ,    targets=training_targets)
np.savez('Audiobooks_data_validate',inputs=validation_data_inputs , targets=validation_data_targets)
np.savez('Audiobooks_data_test',    inputs=test_data_inputs ,       targets=test_data_targets)

In [8]:
#BATCHING

In [9]:
class Audiobooks_Data_Batching():
    
    def __init__(self,dataset,batch_size=None):
        npz = np.load('Audiobooks_data_{0}.npz'.format(dataset))
        self.inputs  = npz['inputs'].astype(np.float) 
        self.targets = npz['targets'].astype(np.int)
        
        if batch_size==None:
            self.batch_size = self.inputs.shape[0]
        else:
            self.batch_size = batch_size
            
        self.current_batch = 0
        self.no_of_batches = self.inputs.shape[0]//self.batch_size
        
    def __next__(self):
        
        if self.current_batch >= self.no_of_batches:
            self.current_batch = 0
            raise StopIteration()
            
        batch_slice = slice(self.current_batch*self.batch_size,(self.current_batch + 1)*self.batch_size)
        input_batch  = self.inputs[batch_slice] 
        target_batch = self.targets[batch_slice]
        self.current_batch +=1
        
        classes_num = 2
        targets_one_hot_encoded = np.zeros((target_batch.shape[0],classes_num))
        targets_one_hot_encoded[range(target_batch.shape[0]),target_batch]=1
        
        return input_batch,targets_one_hot_encoded
    
    def __iter__(self):
        return self

In [10]:
import tensorflow as tf

input_size=10
hidden_layer_size=50
output_size=2

tf.reset_default_graph()

inputs = tf.placeholder(tf.float32,[None,input_size])
targets = tf.placeholder(tf.int32,[None,output_size])

weights_1 = tf.get_variable('weights_1',[input_size,hidden_layer_size])
biases_1  = tf.get_variable('biases_1',[hidden_layer_size])
outputs_1 = tf.nn.relu(tf.matmul(inputs,weights_1)+biases_1)

weights_2 = tf.get_variable('weights_2',[hidden_layer_size,hidden_layer_size])
biases_2  = tf.get_variable('biases_2',[hidden_layer_size])
outputs_2 = tf.nn.relu(tf.matmul(outputs_1,weights_2)+biases_2)


weights_3 = tf.get_variable('weights_3',[hidden_layer_size,output_size])
biases_3  = tf.get_variable('biases_3',[output_size])
output = tf.matmul(outputs_2,weights_3)+biases_3


loss = tf.nn.softmax_cross_entropy_with_logits(logits=output , labels=targets)
mean_loss = tf.reduce_mean(loss)
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(mean_loss)

output_equals_target = tf.equal(tf.arg_max(output,1),tf.arg_max(targets,1))
accuracy = tf.reduce_mean(tf.cast(output_equals_target,tf.float32))

sess = tf.InteractiveSession()
initializer = tf.global_variables_initializer()
sess.run(initializer)

batch_size = 100

prev_validation_loss = 9999999.

max_epochs = 50

train_data = Audiobooks_Data_Batching('train',batch_size)
validation_data = Audiobooks_Data_Batching('validation')


for e in range(max_epochs):
    total_loss = 0.
    
    for input_data,target_data in train_data:
        _,batch_loss = sess.run([optimizer,mean_loss],
                              feed_dict={inputs:input_data,targets:target_data})
        total_loss+=batch_loss
    average_training_loss = total_loss/max_epochs
    
    validation_loss=0.
    validation_accuracy=0.
    
    for input_data,target_data in validation_data:
        validation_loss,validation_accuracy = sess.run([mean_loss,accuracy],
                              feed_dict={inputs:input_data,targets:target_data})
    
    print('Epoch : '+str(e+1)+
              ' Training Loss : '+'{0:.3f}'.format(average_training_loss)+
              ' Validation Loss : '+'{0:.3f}'.format(validation_loss)+
              ' Validation Accuracy : '+'{0:.3f}%'.format(validation_accuracy*100.))
    
    
    if validation_loss > prev_validation_loss:
        break
    prev_validation_loss = validation_loss     

print('End of training !!!')        

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
W1005 12:57:48.950674 21788 deprecation.py:506] From C:\Users\sharma ji\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\ops\init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of

Epoch : 1 Training Loss : 0.470 Validation Loss : 0.538 Validation Accuracy : 72.036%
Epoch : 2 Training Loss : 0.340 Validation Loss : 0.462 Validation Accuracy : 75.615%
Epoch : 3 Training Loss : 0.299 Validation Loss : 0.420 Validation Accuracy : 79.195%
Epoch : 4 Training Loss : 0.280 Validation Loss : 0.393 Validation Accuracy : 80.313%
Epoch : 5 Training Loss : 0.268 Validation Loss : 0.377 Validation Accuracy : 80.761%
Epoch : 6 Training Loss : 0.261 Validation Loss : 0.365 Validation Accuracy : 81.208%
Epoch : 7 Training Loss : 0.255 Validation Loss : 0.357 Validation Accuracy : 81.879%
Epoch : 8 Training Loss : 0.252 Validation Loss : 0.350 Validation Accuracy : 82.103%
Epoch : 9 Training Loss : 0.248 Validation Loss : 0.346 Validation Accuracy : 82.103%
Epoch : 10 Training Loss : 0.246 Validation Loss : 0.341 Validation Accuracy : 82.550%
Epoch : 11 Training Loss : 0.244 Validation Loss : 0.338 Validation Accuracy : 83.221%
Epoch : 12 Training Loss : 0.242 Validation Loss : 0

In [15]:
#TESTING
test_data = Audiobooks_Data_Batching('test')

for input_data,target_data in test_data:
    acc = sess.run([accuracy],
                   feed_dict={inputs:input_data,targets:target_data})

print('Test accuracy is {0:.3f}%'.format(acc[0]*100))

Test accuracy is 83.259%
