<a href="https://colab.research.google.com/github/rajnagar07/Anomaly-detection-and-Time-Series/blob/main/mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from __future__ import absolute_import,division,print_function
import tensorflow as tf
import numpy as np


In [2]:
#MNIST dataset parameters
num_classes = 10 #outpu (0:10) (0 to 9)
num_feature = 784 #data features in our input(28*28 = 784)

#traning paramter
learning_rate = 0.001
training_step = 3000
batch_size = 256
display_step = 100

#Neural Network parameters
n_hidden_1 = 128 #1st layer no of neurons
n_hidden_2 = 256 #2nd layer no of neurons

In [3]:
# Prepare MNIST data.

# download the dataset

from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Convert to float32
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)

# Flatten images to 1-D vector of 784 features (28* 28)
x_train, x_test = x_train.reshape([-1, num_feature]), x_test.reshape([-1, num_feature])

# Normalize image values from [0, 255] ---> [0, 1]
x_train, x_test = x_train / 255. , x_test / 255.

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [4]:
# Use tf.data API to shuffle and batch data

train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1)

In [5]:
#store layer weights and biases
# A random number generator to intilize weights
random_normal = tf.initializers.RandomNormal()
weigths = {
    "h1":tf.Variable(random_normal([num_feature,n_hidden_1])),
    "h2":tf.Variable(random_normal([n_hidden_1,n_hidden_2])),
    "output":tf.Variable(random_normal([n_hidden_2,num_classes]))
}

biases = {
    "b1":tf.Variable(random_normal([n_hidden_1,])),
    "b2":tf.Variable(random_normal([n_hidden_2])),
    "output":tf.Variable(random_normal([num_classes]))
}

In [6]:
print("shape W_1 : ",weigths["h1"].shape)
print("shape W_2 : ",weigths["h2"].shape)
print("shape W_out : ",weigths["output"].shape)
print("shape B1 : ",biases["b1"].shape)
print("shape B2 : ",biases["b2"].shape)
print("shape b_out : ",biases["output"].shape)

shape W_1 :  (784, 128)
shape W_2 :  (128, 256)
shape W_out :  (256, 10)
shape B1 :  (128,)
shape B2 :  (256,)
shape b_out :  (10,)


In [7]:
import tensorflow as tf

#create a model
def neural_net(x):
  #hidden fully connected layer with 128 neuron
  layer_1 = tf.add(tf.matmul(x,weigths['h1']),biases['b1'])  # z1
  # apply ReLU to layer_1  output for non_linearity
  layer_1 = tf.nn.relu(layer_1) # A1

  #hidden fully connected layer with 256 neurons
  layer_2 = tf.add(tf.matmul(layer_1,weigths['h2']),biases['b2']) #z2

  #apply ReLU to layer_2 output
  layer_2 = tf.nn.relu(layer_2) # A2

  #output fully connected layer with a neuron for each class --> 10
  out_layer = tf.add(tf.matmul(layer_2,weigths['output']),biases['output']) # z3

  # apply  softmax to normalize to logits to a probablity distribution
  return tf.nn.softmax(out_layer)

In [8]:
# #cross-entropy loss function
# def cross_entropy(y_pred,y_true):
#   #encode label to a one hot encode
#   y_true = tf.one_hot(y_true,depth=num_classes)

#   #clip prediction values to avoid log(0) error

#   y_pred = tf.clip_by_value(y_pred,1e-9,1.)

#   # compute cross-entropy loss
#   return tf.reduce_mean(-tf.reduce_sum(y_true*tf.math.log(y_pred)))

# Cross-Entropy loss function
def cross_entropy(y_pred, y_true):
  # Encode label to a one hot vector
  y_true = tf.one_hot(y_true, depth = num_classes)

  # Clip predictions values to avoid log(0) error.

  y_pred = tf.clip_by_value(y_pred, 1e-9, 1.)

  # Compute cross-entropy loss

  return tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred)))

#accuracy metrics
def accuracy(y_pred,y_true):
  #predicted class is the index of the highest score in prediction vector (ie agrmax)
  correct_prediction = tf.equal(tf.argmax(y_pred,1),tf.cast(y_true,tf.int64))
  return tf.reduce_mean(tf.cast(correct_prediction,tf.float32),axis=-1)

#stohastic gradient descent optimizer
optimizer = tf.optimizers.SGD(learning_rate)


In [9]:
#optimizing process

def run_optimization(x,y):
  #wrap computation insdie a gradient tape for automatic diffrentiation
  with tf.GradientTape() as g:
    pred = neural_net(x)
    loss = cross_entropy(pred,y)

    #variable to update ie trainable variable during back propagation
    trainable_variables = list(weigths.values())+list(biases.values())

    #compute gradients
    gradients = g.gradient(loss,trainable_variables)

    #update W and b following gradients
    optimizer.apply_gradients(zip(gradients,trainable_variables))


In [None]:
#run training for the given no of steps
for step,(batch_x,batch_y) in enumerate(train_data.take(training_step),1):
  # run the optimization top update W and b values after each batch
  run_optimization(batch_x,batch_y)

  if step % display_step == 0:
    pred = neural_net(batch_x)
    loss = cross_entropy(pred,batch_y)
    acc = accuracy(pred,batch_y)

    print(f"step : {step},loss : {loss},accuracy : {acc}")

step : 100,loss : 85.8126220703125,accuracy : 0.9140625
step : 200,loss : 37.466487884521484,accuracy : 0.96875
step : 300,loss : 43.9863166809082,accuracy : 0.953125
step : 400,loss : 29.249509811401367,accuracy : 0.97265625
step : 500,loss : 29.703659057617188,accuracy : 0.96875
step : 600,loss : 22.629322052001953,accuracy : 0.98046875
step : 700,loss : 12.136271476745605,accuracy : 0.9921875
step : 800,loss : 21.695171356201172,accuracy : 0.97265625
step : 900,loss : 25.268321990966797,accuracy : 0.98046875
step : 1000,loss : 18.890419006347656,accuracy : 0.9765625
step : 1100,loss : 14.622906684875488,accuracy : 0.98046875
step : 1200,loss : 7.242950439453125,accuracy : 1.0
step : 1300,loss : 13.866117477416992,accuracy : 0.9921875
step : 1400,loss : 6.49835205078125,accuracy : 1.0
step : 1500,loss : 8.250457763671875,accuracy : 0.99609375
step : 1600,loss : 8.844024658203125,accuracy : 0.99609375
step : 1700,loss : 8.596818923950195,accuracy : 0.9921875
step : 1800,loss : 8.10069

In [None]:
#test model on validatation set
pred = neural_net(x_test)
print(f"test acc : {accuracy(pred,y_test)}")

In [None]:
#visual prediction
import matplotlib.pyplot as plt



In [None]:
#predict 5 images
n_images = 50
# shape = [50,784]
test_images = x_test[:n_images]

#[50,10]
preddictions = neural_net(test_images)

#display images and model prediction
for i in range(n_images):
  plt.imshow(np.reshape(test_images[i],[28,28]),cmap='grey')
  plt.show()
  print(f"model prediction : {np.argmax(preddictions.numpy()[i])}")
