# Audiobooks Example

## Create methods that will batch the data

In [1]:
import numpy as np

class Bank_Acounts_Data_Reader():
    
    def __init__(self,dataset, batch_size=None):
        npz = np.load('AI_HACK_data_{0}.npz'.format(dataset))
        
        self.inputs, self.targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)
        
        if batch_size is None:
            self.batch_size = self.inputs.shape[0]
        else:
            self.batch_size = batch_size
        self.curr_batch = 0
        self.batch_count = self.inputs.shape[0] // self.batch_size
        
    def __next__(self):
        if self.curr_batch >= self.batch_count:
            self.curr_batch = 0
            raise StopIteration()
            
        batch_slice = slice(self.curr_batch*self.batch_size, (self.curr_batch+1)*self.batch_size)
        inputs_batch = self.inputs[batch_slice]
        targets_batch = self.targets[batch_slice]
        
        self.curr_batch += 1 
        classes_num = 2
        targets_one_hot = np.zeros((targets_batch.shape[0], classes_num))
        targets_one_hot[range(targets_batch.shape[0]), targets_batch] = 1
        
        return inputs_batch, targets_one_hot
    
    
    def __iter__(self):
        return self

## Create the machine learning algorihm

In [7]:
import tensorflow as tf

input_size = 31
output_size = 2
hidden_layer_size = 2000

tf.reset_default_graph()

inputs = tf.placeholder(tf.float32,[None,input_size])
targets = tf.placeholder(tf.int32,[None,output_size])


weights_1 = tf.get_variable("weights_1",[input_size,hidden_layer_size])
biases_1 = tf.get_variable("biases_1",[1,hidden_layer_size])
outputs_1 = tf.nn.relu(tf.matmul(inputs,weights_1)+biases_1)

weights_2 = tf.get_variable("weights_2",[hidden_layer_size,hidden_layer_size])
biases_2 = tf.get_variable("biases_2",[hidden_layer_size])
outputs_2 = tf.nn.relu(tf.matmul(outputs_1,weights_2)+biases_2)


weights_6 = tf.get_variable("weights_6",[hidden_layer_size,output_size])
biases_6 = tf.get_variable("biases_6",[output_size])

outputs = tf.matmul(outputs_2,weights_6)+biases_6

loss = tf.nn.softmax_cross_entropy_with_logits(logits=outputs,labels=targets)

mean_loss = tf.reduce_mean(loss)

optimize = tf.train.AdamOptimizer(learning_rate=0.000001).minimize(mean_loss)

out_equals_target = tf.equal(tf.argmax(outputs,1),tf.argmax(targets,1))

accuracy = tf.reduce_mean(tf.cast(out_equals_target,tf.float32))  

sess = tf.InteractiveSession()

initializer = tf.global_variables_initializer()

sess.run(initializer)

batch_size = 100


max_epochs = 50

prev_validation_loss = 9999999999.


train_data = Bank_Acounts_Data_Reader('train',batch_size)
validation_data = Bank_Acounts_Data_Reader('validation')

for epoch_counter in range(max_epochs):
    
    curr_epoch_loss = 0.
    
    for input_batch, target_batch in train_data:
        _, batch_loss = sess.run([optimize, mean_loss],
                    feed_dict={inputs:input_batch, targets:target_batch})
        curr_epoch_loss += batch_loss
        
    curr_epoch_loss /= train_data.batch_count
    
    validation_loss = 0.
    validation_accuracy = 0.
    
    for input_batch, target_batch in validation_data:
        validation_loss, validation_accuracy = sess.run([mean_loss, accuracy],
                    feed_dict={inputs:input_batch, targets:target_batch})
        
        print('Epoch '+str(epoch_counter+1)+
         '. Training loss: '+'{0:.3f}'.format(curr_epoch_loss)+
         '. Validation loss: '+'{0:.3f}'.format(validation_loss)+
         '. Validation accuracy: '+'{0:.2f}'.format(validation_accuracy *100.)+'%')
    
    if validation_loss > prev_validation_loss :
        break
    prev_validation_loss = validation_loss
print('End of training.')
        





Epoch 1. Training loss: 0.611. Validation loss: 0.531. Validation accuracy: 85.03%
Epoch 2. Training loss: 0.483. Validation loss: 0.461. Validation accuracy: 85.03%
Epoch 3. Training loss: 0.432. Validation loss: 0.426. Validation accuracy: 85.03%
Epoch 4. Training loss: 0.402. Validation loss: 0.401. Validation accuracy: 85.03%
Epoch 5. Training loss: 0.379. Validation loss: 0.382. Validation accuracy: 85.03%
Epoch 6. Training loss: 0.362. Validation loss: 0.366. Validation accuracy: 85.63%
Epoch 7. Training loss: 0.348. Validation loss: 0.354. Validation accuracy: 86.31%
Epoch 8. Training loss: 0.337. Validation loss: 0.344. Validation accuracy: 86.52%
Epoch 9. Training loss: 0.328. Validation loss: 0.336. Validation accuracy: 86.95%
Epoch 10. Training loss: 0.321. Validation loss: 0.330. Validation accuracy: 87.20%
Epoch 11. Training loss: 0.316. Validation loss: 0.325. Validation accuracy: 87.41%
Epoch 12. Training loss: 0.311. Validation loss: 0.321. Validation accuracy: 87.54%
E

## Test

In [3]:
test_data = Bank_Acounts_Data_Reader('test')

In [4]:

 for input_batch, target_batch in test_data:
        test_accuracy = sess.run([accuracy],
                    feed_dict={inputs:input_batch, targets:target_batch})
        
test_accuracy_percent = test_accuracy[0]*100.
    
print('Test accuracy: '+'{0:.2f}'.format(test_accuracy_percent)+'%')


Test accuracy: 88.82%


## AI Hack test

In [5]:
test_npz = np.load('AI_HACK_data_hack_test.npz') 
test_inputs = test_npz['inputs'].astype(np.float)
_,predictions = sess.run([outputs,tf.argmax(outputs,1)],feed_dict={inputs:test_inputs})
predictions

array([1, 1, 0, ..., 0, 0, 0], dtype=int64)

In [6]:
uniquid_npz = np.load('AI_HACK_data_hack_test_uniquid.npz',allow_pickle=True) 
uniquid = uniquid_npz['uniquid']
uniquid = uniquid.reshape(-1,1)
predictions = predictions.reshape(-1,1)
submissions = np.concatenate((uniquid,predictions),axis=1)
np.savetxt('submissions.csv',submissions,delimiter=',',fmt='%s')