<a href="https://colab.research.google.com/github/mahera02/machine_learning/blob/master/ML_Problem_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Loading the MNIST data set through Keras**

In [0]:
import keras
from keras.datasets import mnist
import numpy as np
#from keras.utils import to_categorical
import matplotlib as plt

(train_images_original, train_labels_original), (test_images_original, test_labels_original) = mnist.load_data()

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


**Preparing the image data and Preparing the train labels ( converting intergers to binary)**

In [0]:
train_images = train_images_original.reshape((60000, 28 * 28))
train_images = train_images.astype('float32') / 255
train_images = train_images.T

test_images = test_images_original.reshape((10000, 28 * 28))
test_images = test_images.astype('float32') / 255
test_images = test_images.T

def to_categorical(labels,num_classes):
    result = []
    for i in labels:
      t = np.zeros(num_classes)
      t[i] = 1
      result.append(t)
    result = np.array(result)
    result = result.reshape(labels.shape[0],num_classes)
    return result
  
train_labels = to_categorical(train_labels_original,10)
test_labels = to_categorical(test_labels_original,10)

train_labels = train_labels.T
test_labels = test_labels.T

print(train_images.shape)
print(test_images.shape)
print(train_labels_original.shape)
print(test_labels_original.shape)

(784, 60000)
(784, 10000)
(60000,)
(10000,)


**Initialize the parameters epochs,batch size and learning rate for training the data set**

In [0]:
learn_rate = 0.05
batch_size = 32
epochs = 20

**Method Definitions for logistic regression, softmax and categorical cross entropy **




In [0]:
# initialize weights and bias
def init():
  W = np.random.randn(10,784)*0.01
  b = np.zeros(shape=(10, 1))
  return W,b

def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    return np.exp(x) / np.sum(np.exp(x), axis = 0)
  
# forward propagation
def forward_propagation(weight,bias,x):
  logist_reg = np.dot(weight,np.transpose(x)) + (bias)
  result = softmax(logist_reg)
  return result  
  
# Function to calculate categorical cross entropy loss
def categorical_cross_entropy(Y, Y_new):
    return -np.mean(Y * np.log(Y_new))
  

def get_mini_batches(train_images,train_labels,batch_size):
  for i in range(0, train_images.shape[0]- batch_size+1, batch_size):
    last_slice = slice(i, i + batch_size)
    yield train_images[last_slice],train_labels[last_slice]

**Training the network using mini-batch stochastic gradient**

In [0]:
#  Model with no hidden layer
  weight,bias = init()
  
  #training
  for i in range(epochs):
     
     #using mini batch stochastic gradient descent
     for batch in get_mini_batches(train_images.T, train_labels.T, batch_size):
 
        train_images_batch, train_labels_batch = batch
        train_images_batch = train_images_batch.T
        train_labels_batch = train_labels_batch.T
  
        m = train_images_batch.shape[0]
        
        #forward propogation
        result = forward_propagation(weight,bias,train_images_batch.T)

        loss = categorical_cross_entropy(train_labels_batch, result)
        
        #back propogation
        dZ = result-train_labels_batch

        dweight = (1/m) * np.dot(dZ, train_images_batch.T)
        dbias = (1/m) * np.sum(dZ, axis=1, keepdims=True)

        weight = weight - learn_rate * dweight
        bias = bias - learn_rate * dbias
     print("Loss at Epoch ", i, " ", loss)
    
  result1 = forward_propagation(weight,bias,train_images.T)
  
  result2 = forward_propagation(weight,bias,test_images.T)
  
  # creating one hot encoding of the softmax ouptut
  train_labels_prediction = np.zeros_like(train_labels.T)
  train_labels_prediction[np.arange(len(result1.T)), result1.T.argmax(1)] = 1
  
  test_labels_prediction = np.zeros_like(test_labels.T)
  test_labels_prediction[np.arange(len(result2.T)), result2.T.argmax(1)] = 1
  
  
  # Training and Testing Accuracy
  print("Training accuracy : {} %".format(100 - np.mean(np.abs(train_labels_prediction - train_labels.T)) * 100))
  print("Testing accuracy  : {} %".format(100 - np.mean(np.abs(test_labels_prediction - test_labels.T)) * 100))

Loss at Epoch  0   0.08763154948870833
Loss at Epoch  1   0.059088294272153064
Loss at Epoch  2   0.04659512130376015
Loss at Epoch  3   0.03940860233539873
Loss at Epoch  4   0.034700702532337854
Loss at Epoch  5   0.031367122410666005
Loss at Epoch  6   0.028879307208277825
Loss at Epoch  7   0.02695027420082428
Loss at Epoch  8   0.025410047890178145
Loss at Epoch  9   0.02415129354083183
Loss at Epoch  10   0.02310280462663893
Loss at Epoch  11   0.022215452443724356
Loss at Epoch  12   0.021454239555601818
Loss at Epoch  13   0.020793563385120145
Loss at Epoch  14   0.0202142683881674
Loss at Epoch  15   0.01970174359325572
Loss at Epoch  16   0.019244656275625365
Loss at Epoch  17   0.01883408632393832
Loss at Epoch  18   0.018462920641917742
Loss at Epoch  19   0.018125420758855544
Training accuracy : 98.014 %
Testing accuracy  : 98.144 %
