<a href="https://colab.research.google.com/github/mahera02/machine_learning/blob/master/ML_Problem_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Loading the MNIST data set through Keras**

In [1]:
import keras
from keras.datasets import mnist
from keras.utils import to_categorical
import numpy as np
import matplotlib as plt

(train_images_original, train_labels_original), (test_images_original, test_labels_original) = mnist.load_data()

Using TensorFlow backend.


printing original values

In [2]:
train_images_original.shape, train_labels_original, test_images_original.shape, test_labels_original.shape

((60000, 28, 28),
 array([5, 0, 4, ..., 5, 6, 8], dtype=uint8),
 (10000, 28, 28),
 (10000,))

**Preparing the image data**

In [0]:
train_images = train_images_original.reshape((60000, 28 * 28))
train_images = train_images.astype('float32') / 255
train_images = train_images.T

test_images = test_images_original.reshape((10000, 28 * 28))
test_images = test_images.astype('float32') / 255
test_images = test_images.T

train_labels_original = train_labels_original.T
test_labels_original = test_labels_original.T

In [4]:
print(train_images.shape)
print(test_images.shape)
print(train_labels_original.shape)
print(test_labels_original.shape)

(784, 60000)
(784, 10000)
(60000,)
(10000,)


**Preparing the train labels ( converting intergers to binary)**

In [5]:
predict_train_labels_original = to_categorical(train_labels_original,10)
predict_test_labels_original = to_categorical(test_labels_original,10)
predict_train_labels_original.shape


(60000, 10)

printing new values

In [0]:
#train_images.shape, train_labels.shape, test_images.shape, test_labels.shape

**Initialize the parameters epochs,batch size and learning rate for training the data set**

In [0]:
learn_rate = 0.1
batch_size = 50
epochs = 20

**Method Definitions for logistic regression, sigmoid and mini-batch stochastic gradient using binary cross entropy loss**

In [0]:
def initialize_zeros(size):
   weight = np.zeros(shape=(size, 1))
   bias = 0
   return weight,bias
  
  
#Logistic Regression using binary cross entropy loss

def binary_entropy_loss_function(weight,bias,x,y):
  z = np.dot(np.transpose(weight),x) + (bias)
  activation = sigmoid(z)
  
  #computing gradient wrt weight and wrt to bias
  dweight = np.dot(x, (activation - y).T)
  dbias = np.sum(activation - y)
  gradient = {"dweight":dweight,"dbias":dbias }
  
  return gradient


#input to the sigmoid function (our algorithm’s prediction using Logistic regression  e.g. z= wx + b)

def sigmoid(z): 
  return 1.0/(1.0+np.exp(-z))

  
# Implementing mini-batch stochastic gradient descent for back propogation

def minibatchsgd(weight,bias,train_images,train_labels,epochs,learn_rate):
  for i in range(epochs):
    for batch in get_mini_batches(np.transpose(train_images), np.transpose(train_labels), batch_size):
      train_images_batch,train_labels_batch = batch
      gradient = binary_entropy_loss_function(weight,bias,np.transpose(train_images_batch),np.transpose(train_labels_batch))
      gradient_weight =gradient["dweight"]
      gradient_bias = gradient["dbias"]
      weight = weight - learn_rate*gradient_weight
      bias = bias - learn_rate*gradient_bias
    
  initials = {"weight":weight,"bias":bias}
  gradients = {"dweight":gradient_weight ,"dbias":gradient_bias}
        
  return initials,gradients


def get_mini_batches(train_images,train_labels,batch_size):
  for i in range(0, train_images.shape[0]- batch_size+1, batch_size):
    last_slice = slice(i, i + batch_size)
    yield train_images[last_slice],train_labels[last_slice]
    

# normalizing and predicting the values

def prediction(weight, bias, train_images):
  
   m = train_images.shape[1] 
   #print('m',m) 
   labels_prediction = np.zeros((1, m)) 
   weight = weight.reshape(train_images.shape[0], 1) 
   #print(weight.T.shape)
   activation = sigmoid(np.dot(weight.T, train_images) + bias)
  
   #normalizing the predicted values
   for i in range(activation.shape[1]):
      labels_prediction[0, i] = 1 if activation[0, i] > 0.5 else 0 
  
   return labels_prediction

**Training the network to find the classifier with maximum accuracy**

In [9]:
# network model for 10 classifiers
  test_acc_list = list()
  for digit_classifier in range(0,10):
    #binary classification for one classifier_digit from 0 to 9
    modified_train_label = np.array(train_labels_original);
    modified_train_label = np.where(modified_train_label == digit_classifier, 1, 0)
      
    modified_test_label = np.array(test_labels_original);
    modified_test_label = np.where(modified_test_label == digit_classifier, 1, 0)
    
    #training the network model to obtain different weight and bias
    train_images.shape[0]
    weight, bias = initialize_zeros(train_images.shape[0])
    initials,gradients = minibatchsgd(weight, bias, train_images, modified_train_label, epochs, learn_rate)
    weight= initials["weight"]
    bias=initials["bias"]
    
    #based on trained weights and bias values we predict the labels
    train_labels_prediction = prediction(weight, bias, train_images)
    test_labels_prediction = prediction(weight,bias, test_images)
    #print(train_labels_prediction.shape)
    #print(predict_train_labels_original.shape)
    
    predict_train_labels_original[:,[digit_classifier]] = train_labels_prediction.T
    predict_test_labels_original[:,[digit_classifier]] = test_labels_prediction.T
    
    #corresponding 0 to 9 train and test classifier accuracy
    print("Digit " + str(digit_classifier) +" train accuracy : {} %".format(100 - np.mean(np.abs(train_labels_prediction - modified_train_label)) * 100))
    test_acc = 100 - np.mean(np.abs(test_labels_prediction - modified_test_label)) * 100
    test_acc_list.append(test_acc)
    print("Digit " + str(digit_classifier) +" test accuracy : " + str(test_acc))

     
  train_labels_original = keras.utils.to_categorical(train_labels_original, 10)
  test_labels_original = keras.utils.to_categorical(test_labels_original, 10)
  print(train_labels_original.shape)
  print(predict_train_labels_original.shape)
    
  #overall test and train classifier accuracy of the network
  print("Overall train accuracy: {} %".format(100 - np.mean(np.abs(predict_train_labels_original - train_labels_original)) * 100))
  print("Overall test accuracy: {} %".format(100 - np.mean(np.abs(predict_test_labels_original - test_labels_original)) * 100))
  print('Classifier with Strongest output is:', np.argmax(test_acc_list))


Digit 0 train accuracy : 98.99333333333334 %
Digit 0 test accuracy : 98.96
Digit 1 train accuracy : 98.91666666666667 %
Digit 1 test accuracy : 98.94
Digit 2 train accuracy : 96.75666666666666 %
Digit 2 test accuracy : 96.17
Digit 3 train accuracy : 93.64166666666667 %
Digit 3 test accuracy : 93.13
Digit 4 train accuracy : 97.895 %
Digit 4 test accuracy : 97.86
Digit 5 train accuracy : 96.345 %
Digit 5 test accuracy : 96.49
Digit 6 train accuracy : 98.45166666666667 %
Digit 6 test accuracy : 98.27
Digit 7 train accuracy : 98.28166666666667 %
Digit 7 test accuracy : 98.25
Digit 8 train accuracy : 94.48833333333333 %
Digit 8 test accuracy : 94.11
Digit 9 train accuracy : 94.14333333333333 %
Digit 9 test accuracy : 94.25
(60000, 10)
(60000, 10)
Overall train accuracy: 96.79133333265781 %
Overall test accuracy: 96.64300009608269 %
Classifier with Strongest output is: 0
