# A Deep Learning Model with Batch Normalization

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow import keras

Let us take the input data and put it in a dataframe

In [2]:
Input_Data = {'Peak':[],'Width': [], 'N': [] }
Labels = []
for i in range(2, 5):
    Data = np.load('%d-D_Minkowski.npy'%i).tolist()
    Input_Data['Peak']+=Data['peak']
    Input_Data['Width']+=Data['width']
    Input_Data['N']+=Data['N']
    Labels += [i]*len(Data['peak'])
df = pd.DataFrame.from_dict(Input_Data)

In [3]:
input_size = 3
output_size = 3
l1_nodes = 512
l2_nodes = 512
l3_nodes = 512
learning_rate = .001

We want to create a deep leaning graph of the following shape:

![title](batch.png)

In [4]:
#input
with tf.variable_scope('input'):
    X = tf.placeholder(tf.float32, shape = [None, input_size])

#Batch Normalization 1
with tf.variable_scope('batch1'):
    batch1_out = tf.layers.batch_normalization(X)
 
#layers
with tf.variable_scope('layer1'):
    weight = tf.get_variable(name = 'weight1', shape = [input_size, l1_nodes], 
                             initializer = tf.random_normal_initializer())
    biases = tf.get_variable(name = 'bias1', shape = [l1_nodes], initializer = tf.random_normal_initializer())
    l1_output = tf.nn.relu(tf.matmul(batch1_out, weight)+biases)

#Batch Normalization 2
with tf.variable_scope('batch2'):
    batch2_out = tf.layers.batch_normalization(l1_output)
    
with tf.variable_scope('layer2'):
    weight = tf.get_variable(name = 'weight2', shape = [l1_nodes, l2_nodes], 
                             initializer = tf.random_normal_initializer())
    biases = tf.get_variable(name = 'bias2', shape = [l2_nodes], initializer = tf.random_normal_initializer())
    l2_output = tf.nn.relu(tf.matmul(batch2_out, weight)+biases)

#Batch Normalization 3
with tf.variable_scope('batch3'):
    batch3_out = tf.layers.batch_normalization(l2_output)    
    
with tf.variable_scope('layer3'):
    weight = tf.get_variable(name = 'weight3', shape = [l2_nodes, l3_nodes], 
                             initializer = tf.random_normal_initializer())
    biases = tf.get_variable(name = 'bias3', shape = [l3_nodes], initializer = tf.random_normal_initializer())
    l3_output = tf.nn.relu(tf.matmul(batch3_out, weight)+biases)

#Batch Normalization 4
with tf.variable_scope('batch4'):
    batch4_out = tf.layers.batch_normalization(l3_output)       

#Output
with tf.variable_scope('output'):
    weight = tf.get_variable(name = 'weight4', shape = [l3_nodes, output_size], 
                             initializer = tf.random_normal_initializer())
    biases = tf.get_variable(name = 'bias4', shape = [output_size], initializer = tf.random_normal_initializer())
    predictions = tf.matmul(batch4_out, weight)+biases
    
#cost
with tf.variable_scope('cost'):
    Y = tf.placeholder(tf.int32, shape = [None, output_size])
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = predictions, labels = Y))
    #cost = tf.reduce_mean(keras.backend.sparse_categorical_crossentropy(Y, predictions, from_logits = True))
    
#training
with tf.variable_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
    
#logging
with tf.variable_scope('logging'):
    tf.summary.scalar('current_cost', cost)
    summary = tf.summary.merge_all()

In [5]:
#Data Manipulation
X_train, X_test, y_train, y_test = train_test_split(df.values, np.array(Labels), test_size = .1, random_state = 0)
n_classes = 3
y_cat_train = [[1 if i==y-2 else 0 for i in range(n_classes)] for y in y_train]
y_cat_test = [[1 if i==y-2 else 0 for i in range(n_classes)] for y in y_test]

In [6]:
def model(X_train, X_test, y_train, y_test, n_epochs = 1000):
    with tf.Session() as sess:
        #initializing the variables
        sess.run(tf.global_variables_initializer())

        training_writer = tf.summary.FileWriter('logs_4batch/training', sess.graph)
        testing_writer = tf.summary.FileWriter('logs_4batch/testing', sess.graph)

        for epoch in range(n_epochs):
            sess.run(optimizer, feed_dict = {X: X_train, Y: y_train})


            print '\r epoch: %d'%(epoch+1), 

            if(epoch+1) % (n_epochs/10) == 0:
                [training_cost, training_summary] = sess.run([cost, summary], feed_dict = {
                    X: X_train, Y: y_train})
                [testing_cost, testing_summary] = sess.run([cost, summary], feed_dict = {
                    X: X_test, Y: y_test})
                print training_cost, testing_cost

                training_writer.add_summary(training_summary, epoch)
                testing_writer.add_summary(testing_summary, epoch)

        print 'done!'

        final_training_cost = sess.run(cost, feed_dict = {X: X_train, Y: y_train})
        final_testing_cost = sess.run(cost, feed_dict = {X: X_test, Y: y_test})
        print 'final training cost: ', training_cost, 'final testing cost: ', testing_cost

        correct = tf.equal(tf.argmax(predictions, 1), tf.argmax(Y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
        return accuracy.eval({X:np.array(X_test),Y:np.array(y_test)})

In [7]:
a = [model(X_train, X_test, y_cat_train, y_cat_test) for i in range(5)]

 epoch: 100 2085.1104 2008.4938                             
 epoch: 200                                                   6572.543 6930.2886
 epoch: 300 4559.6064 3854.0845                              
 epoch: 400                                                   1105.857 902.0296
 epoch: 500 201.03676 127.407364                             
 epoch: 600                                                   25.391928 0.0
 epoch: 700 112.47049 69.6575                                
 epoch: 800                                                   21.806019 0.0
 epoch: 900 817.1382 921.26733                               
 epoch: 1000                                                  314.89578 370.89496
done!
final training cost:  314.89578 final testing cost:  370.89496
 epoch: 100                                                 1479.4156 1513.4423
 epoch: 200 2419.9336 2464.1577                              
 epoch: 300                                                   1826.3113 1668.4958
 ep

In [8]:
np.average(a)

0.9217777

In [10]:
model(X_train, X_test, y_cat_train, y_cat_test)

 epoch: 100                                                1915.5895 1923.0067
 epoch: 200 2304.0083 2382.5986                              
 epoch: 300 2640.3196 2447.615                                
 epoch: 400                                                   1065.8821 985.0082
 epoch: 500 6052.9717 5511.4204                              
 epoch: 600 435.0472 417.61813                                
 epoch: 700                                                       106.02537 137.3419
 epoch: 800 68.53164 73.600945                                   
 epoch: 900                                                   67.05923 71.35722
 epoch: 1000 37.0286 21.198671                                                          
done!
final training cost:  37.0286 final testing cost:  21.198671


0.98

We can see that the loss curve reduces pretty nicely:

![title](batched.png)