<a href="https://colab.research.google.com/github/theroyalraj/Deep-Learning/blob/master/Traffic_volume_DNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np

In [5]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).
/gdrive


In [0]:
class traffic():
    # Dataset is a mandatory arugment, while the batch_size is optional
    # If you don't input batch_size, it will automatically take the value: None
    def __init__(self, dataset, batch_size = None):
    
        # The dataset that loads is one of "train", "validation", "test".
        # e.g. if I call this class with x('train',5), it will load 'Audiobooks_data_train.npz' with a batch size of 5.
        npz = np.load('/gdrive/My Drive/Colab Notebooks/comp_data_{0}.npz'.format(dataset))
        
        # Two variables that take the values of the inputs and the targets. Inputs are floats, targets are integers
        self.inputs, self.targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)
        
        # Counts the batch number, given the size you feed it later
        # If the batch size is None, we are either validating or testing, so we want to take the data in a single batch
        if batch_size is None:
            self.batch_size = self.inputs.shape[0]
        else:
            self.batch_size = batch_size
        self.curr_batch = 0
        self.batch_count = self.inputs.shape[0] // self.batch_size
    
    # A method which loads the next batch
    def __next__(self):
        if self.curr_batch >= self.batch_count:
            self.curr_batch = 0
            raise StopIteration()
            
        # You slice the dataset in batches and then the "next" function loads them one after the other
        batch_slice = slice(self.curr_batch * self.batch_size, (self.curr_batch + 1) * self.batch_size)
        inputs_batch = self.inputs[batch_slice]
        targets_batch = self.targets[batch_slice]
        self.curr_batch += 1
        
        # One-hot encode the targets. In this example it's a bit superfluous since we have a 0/1 column 
        classes_num = 7280
        targets_one_hot = np.zeros((targets_batch.shape[0], classes_num))
        targets_one_hot[range(targets_batch.shape[0]), targets_batch] = 1
        
        # The function will return the inputs batch and the one-hot encoded targets
        return inputs_batch, targets_one_hot

    def __iter__(self):
        return self

In [19]:
import tensorflow as tf

# Input size depends on the number of input variables. We have 10 of them
input_size = 12
# Output size is 2, as we one-hot encoded the targets.
output_size = 7280
# Choose a hidden_layer_size
hidden_layer_size = 200

# Reset the default graph, so you can fiddle with the hyperparameters and then rerun the code.
tf.reset_default_graph()

# Create the placeholders
inputs = tf.placeholder(tf.float32, [None, input_size])
targets = tf.placeholder(tf.int32, [None, output_size])

# Outline the model. We will create a net with 2 hidden layers
weights_1 = tf.get_variable("weights_1", [input_size, hidden_layer_size])
biases_1 = tf.get_variable("biases_1", [hidden_layer_size])
outputs_1 = tf.nn.relu(tf.matmul(inputs, weights_1) + biases_1)

weights_2 = tf.get_variable("weights_2", [hidden_layer_size, hidden_layer_size])
biases_2 = tf.get_variable("biases_2", [hidden_layer_size])
outputs_2 = tf.nn.relu(tf.matmul(outputs_1, weights_2) + biases_2)

weights_3 = tf.get_variable("weights_3", [hidden_layer_size, hidden_layer_size])
biases_3 = tf.get_variable("biases_3", [hidden_layer_size])
outputs_3 = tf.nn.relu(tf.matmul(outputs_2, weights_3) + biases_3)

weights_4 = tf.get_variable("weights_4", [hidden_layer_size, output_size])
biases_4 = tf.get_variable("biases_4", [output_size])
# We will incorporate the softmax activation into the loss, as in the previous example
outputs = tf.matmul(outputs_3, weights_4) + biases_4

# Use the softmax cross entropy loss with logits
loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=outputs, labels=targets)
mean_loss = tf.reduce_mean(loss)

# Get a 0 or 1 for every input indicating whether it output the correct answer
out_equals_target = tf.equal(tf.argmax(outputs, 1), tf.argmax(targets, 1))
accuracy = tf.reduce_mean(tf.cast(out_equals_target, tf.float32))

# Optimize with Adam
optimize = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(mean_loss)

# Create a session
sess = tf.InteractiveSession()

# Initialize the variables
initializer = tf.global_variables_initializer()
sess.run(initializer)

# Choose the batch size
batch_size = 50

# Set early stopping mechanisms
max_epochs = 100
prev_validation_loss = float('inf')

# Load the first batch of training and validation, using the class we created. 
train_data = traffic('train', batch_size)
validation_data = traffic('validation')

# Create the loop for epochs 
for epoch_counter in range(max_epochs):
    
    # Set the epoch loss to 0, and make it a float
    curr_epoch_loss = 0.
    
   
    for input_batch, target_batch in train_data:
        _, batch_loss = sess.run([optimize, mean_loss], 
            feed_dict={inputs: input_batch, targets: target_batch})
        
        #Record the batch loss into the current epoch loss
        curr_epoch_loss += batch_loss
    
    # Find the mean curr_epoch_loss
    # batch_count is a variable, defined in the Audiobooks_Data_Reader class
    curr_epoch_loss /= train_data.batch_count
    
    # Set validation loss and accuracy for the epoch to zero
    validation_loss = 0.
    validation_accuracy = 0.
    
    # Use the same logic of the code to forward propagate the validation set
    # There will be a single batch, as the class was created in this way
    for input_batch, target_batch in validation_data:
        validation_loss, validation_accuracy = sess.run([mean_loss, accuracy],
            feed_dict={inputs: input_batch, targets: target_batch})
    
    # Print statistics for the current epoch
    print('Epoch '+str(epoch_counter+1)+
          '. Training loss: '+'{0:.4f}'.format(curr_epoch_loss)+
          '. Validation loss: '+'{0:.4f}'.format(validation_loss)+
          '. Validation accuracy: '+'{0:.3f}'.format(validation_accuracy * 100.)+'%')
    
    # Trigger early stopping if validation loss begins increasing.
    if validation_loss > prev_validation_loss:
        print("\n------Early Stopping------\n")
        break
        
    # Store this epoch's validation loss to be used as previous in the next iteration.
    prev_validation_loss = validation_loss
    
print('\nEnd of training.')
validation_accuracy * 100



Epoch 1. Training loss: 2.9219. Validation loss: 1.3618. Validation accuracy: 56.504%
Epoch 2. Training loss: 1.3200. Validation loss: 1.2614. Validation accuracy: 58.667%
Epoch 3. Training loss: 1.2441. Validation loss: 1.2118. Validation accuracy: 59.674%
Epoch 4. Training loss: 1.2013. Validation loss: 1.1798. Validation accuracy: 60.504%
Epoch 5. Training loss: 1.1722. Validation loss: 1.1597. Validation accuracy: 60.533%
Epoch 6. Training loss: 1.1519. Validation loss: 1.1449. Validation accuracy: 61.274%
Epoch 7. Training loss: 1.1373. Validation loss: 1.1341. Validation accuracy: 61.570%
Epoch 8. Training loss: 1.1263. Validation loss: 1.1262. Validation accuracy: 61.570%
Epoch 9. Training loss: 1.1179. Validation loss: 1.1202. Validation accuracy: 61.541%
Epoch 10. Training loss: 1.1111. Validation loss: 1.1153. Validation accuracy: 61.511%
Epoch 11. Training loss: 1.1055. Validation loss: 1.1112. Validation accuracy: 61.778%
Epoch 12. Training loss: 1.1007. Validation loss: 1.

64.32592868804932

In [20]:
test_data = traffic('test')

for inputs_batch, targets_batch in test_data:
    test_accuracy = sess.run([accuracy],
                     feed_dict={inputs: inputs_batch, targets: targets_batch})


test_accuracy_percent = test_accuracy[0] * 100.

# Print the test accuracy
print('Test accuracy: '+'{0:.2f}'.format(test_accuracy_percent)+'%')

Test accuracy: 64.98%


In [21]:
prediction=tf.argmax(targets,1)
best = sess.run([prediction],{inputs: inputs_batch, targets: targets_batch})
print(best)

[array([1, 1, 3, ..., 0, 4, 5])]


In [30]:
best

[array([1, 1, 3, ..., 0, 4, 5])]

In [0]:
import pandas as pd

In [0]:
arr=pd.DataFrame(best)

In [33]:
arr

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,3335,3336,3337,3338,3339,3340,3341,3342,3343,3344,3345,3346,3347,3348,3349,3350,3351,3352,3353,3354,3355,3356,3357,3358,3359,3360,3361,3362,3363,3364,3365,3366,3367,3368,3369,3370,3371,3372,3373,3374
0,1,1,3,2,0,6,0,5,8,1,1,1,0,5,1,1,1,0,6,1,0,6,1,1,1,6,0,8,0,5,8,6,6,0,4,0,1,5,6,0,...,10,1,8,1,6,5,5,0,1,0,1,10,1,0,0,1,1,0,8,5,1,5,8,1,2,1,5,10,5,1,0,1,0,4,1,8,1,0,4,5
