<a href="https://colab.research.google.com/github/raiakash203/Machine-Learning/blob/Dimension-Reduction/MLP(Multi_Layer_Perceptron).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## MLP Initialization

In [0]:
!pip install tensorflow==1.14

In [0]:
from tensorflow.examples.tutorials.mnist import input_data

mnist=input_data.read_data_sets("MNIST", one_hot=True)

In [0]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [0]:
def plot_dynamics(x,y,y_1,ax,ticks,title,colors=['b']):
  ax.plot(x,y,'b',label='Train Loss')
  ax.plot(x,y_1,'r',label='Test Loss')
  if len(x)==1:
    plt.legend()
    plt.title(title)
  plt.yticks(ticks)
  fig.canvas.draw()

In [0]:
#Network Parameter
n_hidden_1 = 512  # 1st layer neurons
n_hidden_2 = 128 # 2nd Layer neurons
n_input = 784 #MNIST data inputs
n_classes = 10 #MNIST output labels


In [0]:
x = tf.placeholder(tf.float32,[None,784])
y_ = tf.placeholder(tf.float32,[None,10])

#These are used when using dropouts
keep_prob = tf.placeholder(tf.float32)
keep_prob_inputs = tf.placeholder(tf.float32)

In [0]:
#Weights initialization

#When using normal sigmoid activation unit we use Xavier/Gloro weight initialization based on fan_in adn fan_out
#For h1 fan_in=784, fan_out = 512
#For h2 fan_in=512 fan_out = 128
#and so on

# h1 => σ=√(2/(fan_in+fan_out+1) = 0.039 => N(0,σ) = N(0,0.039)
# h2 => σ=√(2/(fan_in+fan_out+1) = 0.055 => N(0,σ) = N(0,0.055)
# out => σ=√(2/(fan_in+fan_out+1) = 0.120 => N(0,σ) = N(0,0.120)

weights_sgd = {
    'h1' : tf.Variable(tf.random.normal([n_input,n_hidden_1],stddev=0.039,mean=0)),
    'h2' : tf.Variable(tf.random.normal([n_hidden_1,n_hidden_2],stddev=0.055,mean=0)),
    'out' : tf.Variable(tf.random.normal([n_hidden_2,n_classes],stddev=0.120,mean=0))

}

#In case of ReLu activation function, we generally use He initialization, for normal
# h1 => σ=√(2/(fan_in+1) = 0.062 => N(0,σ) = N(0,0.062)
# h2 => σ=√(2/(fan_in+1) = 0.125 => N(0,σ) = N(0,0.125)
# out => σ=√(2/(fan_in+1) = 0.120 => N(0,σ) = N(0,0.120)

weights_relu = {
    'h1' : tf.Variable(tf.random.normal([n_input,n_hidden_1],stddev=0.062,mean=0)),
    'h2' : tf.Variable(tf.random.normal([n_hidden_1,n_hidden_2],stddev=0.125,mean=0)),
    'out' : tf.Variable(tf.random.normal([n_hidden_2,n_classes],stddev=0.120,mean=0))

}

#Bias initialization

biases = {
    'b1' : tf.Variable(tf.random.normal([n_hidden_1])),
    'b2' : tf.Variable(tf.random.normal([n_hidden_2])),
    'out' : tf.Variable(tf.random.normal([n_classes]))
}

In [0]:
# Parameters
train_epooch = 15
learning_rate = 0.001
batch_size = 100
display_step = 1

# Model1 : input(784) - sigmoid(512) - sigmoid(128) - softmax(10)

In [0]:
#Create Model

def multilayer_perceptron(x,weights,biases):
  print('X: ',x.get_shape(),'W[h1] : ',weights['h1'].get_shape(),'b[h1]: ',biases['b1'].get_shape())

  #Hidden layer with sigmoid activation
  layer_1 = tf.add(tf.matmul(x,weights['h1']),biases['b1'])
  layer_1 = tf.nn.sigmoid(layer_1)
  print('layer_1: ',layer_1.get_shape(),'W[h2] : ',weights['h2'].get_shape(),'b[h2]: ',biases['b2'].get_shape())

  #Hidden layer with sigmoid activation
  layer_2 = tf.add(tf.matmul(layer_1,weights['h2']),biases['b2'])
  layer_2 = tf.nn.sigmoid(layer_2)
  print('layer_2: ',layer_2.get_shape(),'W[out] : ',weights['out'].get_shape(),'b[out]: ',biases['out'].get_shape())
  
  #Output layer with sigmoid activation
  out_layer = tf.add(tf.matmul(layer_2,weights['out']),biases['out'])
  out_layer = tf.nn.sigmoid(out_layer)
  print('out_layer: ',out_layer.get_shape())

  return out_layer


## Model1 + AdamOptimizer

In [0]:
y_sgd = multilayer_perceptron(x,weights_sgd,biases)

cost_sgd = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_sgd, labels=y_))

optimizer_adam = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost_sgd)
optimizer_sgdc = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost_sgd)

with tf.Session() as session:
  tf.global_variables_initializer().run()
  fig,ax = plt.subplots(1,1)
  ax.set_xlabel('Epoochs')
  ax.set_ylabel('SoftMax Cross Entropy loss')
  xs,ytrs,ytes = [],[],[]
  for epooch in range(train_epooch):
    train_avg_err = 0
    test_avg_err = 0
    total_batch = int(mnist.train.num_examples/batch_size)

    #Loop over all batches
    for i in range(total_batch):
      batch_xs,batch_ys = mnist.train.next_batch(batch_size)

      _,c,w = session.run([optimizer_adam,cost_sgd,weights_sgd], feed_dict = {x:batch_xs,y_:batch_ys})
      train_avg_err += c/total_batch
      c = session.run(cost_sgd, feed_dict = {x: mnist.test.images, y_:mnist.test.labels})
      test_avg_err += c/total_batch
    
    xs.append(epooch)
    ytrs.append(train_avg_err)
    ytes.append(test_avg_err)
    plot_dynamics(xs,ytrs,ytes,ax,np.arange(1.3,1.8,step=0.04),'input-Sigmoid(512)-Sigmoid(128)-Sigmoid(Output(10))-AdamOptimizer')

    if epooch%display_step==0:
      print('Epooch: {}, Train_cost: {}, Test_cost: {}'.format(epooch,train_avg_err,test_avg_err))

  #Plot final results
  plot_dynamics(xs,ytrs,ytes,ax,np.arange(1.3,1.8,step=0.04),'input-Sigmoid(512)-Sigmoid(128)-Sigmoid(Output(10))-AdamOptimizer')

  #Final Accuracy
  correct_prediction = tf.equal(tf.argmax(y_sgd,1),tf.argmax(y_,1))
  accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
  print("Accuracy-----> ",accuracy.eval({x:mnist.test.images,y_:mnist.test.labels}))

## Model1 + GradientDescentOptimizer

In [0]:
with tf.Session() as session:
  tf.global_variables_initializer().run()
  fig,ax = plt.subplots(1,1)
  ax.set_xlabel('Epoochs')
  ax.set_ylabel('SoftMax Cross Entropy loss')
  xs,ytrs,ytes = [],[],[]
  for epooch in range(train_epooch):
    train_avg_err = 0
    test_avg_err = 0
    total_batch = int(mnist.train.num_examples/batch_size)

    #Loop over all batches
    for i in range(total_batch):
      batch_xs,batch_ys = mnist.train.next_batch(batch_size)

      _,c,w = session.run([optimizer_sgdc,cost_sgd,weights_sgd], feed_dict = {x:batch_xs,y_:batch_ys})
      train_avg_err += c/total_batch
      c = session.run(cost_sgd, feed_dict = {x: mnist.test.images, y_:mnist.test.labels})
      test_avg_err += c/total_batch
    
    xs.append(epooch)
    ytrs.append(train_avg_err)
    ytes.append(test_avg_err)
    plot_dynamics(xs,ytrs,ytes,ax,np.arange(1.3,1.8,step=0.04),'input-Sigmoid(512)-Sigmoid(128)-Sigmoid(Output(10))-AdamOptimizer')

    if epooch%display_step==0:
      print('Epooch: {}, Train_cost: {}, Test_cost: {}'.format(epooch,train_avg_err,test_avg_err))

  #Plot final results
  plot_dynamics(xs,ytrs,ytes,ax,np.arange(1.3,1.8,step=0.04),'input-Sigmoid(512)-Sigmoid(128)-Sigmoid(Output(10))-AdamOptimizer')

  #Final Accuracy
  correct_prediction = tf.equal(tf.argmax(y_sgd,1),tf.argmax(y_,1))
  accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
  print("Accuracy-----> ",accuracy.eval({x:mnist.test.images,y_:mnist.test.labels})) 