<a href="https://colab.research.google.com/github/kushshah777/MachineLearningAssignment1/blob/master/Answer1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import keras
from keras.datasets import mnist
from keras import backend as K


In [0]:
def initialize(dimension): #initially the weight and bias are set to zero when the input is given to the neural network
  weight=np.zeros(shape=(dimension,1))
  bias=0
  return weight, bias
def activationsigmoid(x): #sigmoid is used as activation function here as we are interested to know if the input image is YES/NO belonging to a particular class (0-9)
  return 1.0/(1.0+ np.exp(-x))
def sigmoidprime(x): #sigmoid prime is used for calculating the derivatives of sigmoid function because at the output layer we have sigmoid as activation function so
  return activationsigmoid(x)*(1-activationsigmoid(-x))
def minibatches(X,Y,size): # used to calculate minibatches
  for i in range(0,X.shape[0]-size+1,size):
    a=slice(i,i+size)
    yield X[a],Y[a]
def mse(weight,bias,X,Y): # this is used to get the mean squared error which is nothing but the loss function of the neural network
  sample=X.shape[1]
  A=activationsigmoid(np.dot(weight.T,X)+bias)
  cost=np.mean((Y-A) **2)
  dweight= (1/sample) * np.dot(X, (2*(A-Y) * A *(1-A)).T) # formula as per discussed in class
  dbias= (1/sample) * np.sum(2 * (A-Y) * A * (1-A))
  gradients = {"dweight" : dweight, "dbias" : dbias}
  cost= np.squeeze(cost)
  return gradients, cost
def backprop(weight,bias, X, Y, epochs, learningrate): #This function gives the method of backpropogation to  do gradiant descent
  totalcost=[]
  for i in range(epochs):
    for b in minibatches(X.T,Y.T,32): # we calculate the mini batches by this function
      x_batch, y_batch = b
      gradients, cost = mse(weight, bias, x_batch.T, y_batch.T)
      dweight=gradients["dweight"]
      dbias=gradients["dbias"]
      weight=dweight * (weight- learningrate)
      bias= dbias * (bias - learningrate)
    totalcost.append(cost)
  weightbias={"weight": weight, "bias" : bias}
  dweightdbias={"dbias": dbias, "dweight" : dweight}
  return weightbias, dweightdbias, totalcost

def prediction(weight, bias, X): #function to predict the values of image according to classifier
  noofsamples=X.shape[1]
  Y_pred=np.zeros((1, noofsamples))
  weight= weight.reshape(X.shape[0], 1) # it contains confidence value which can be either 0 or 1 for 60000 images
  A= activationsigmoid(np.dot(weight.T,X)+ bias)
  for i in range(A.shape[1]):
    if A[0,i] <=0.5:
      Y_pred[0,i]=1
    else:
      Y_pred[0,i]=0
  return Y_pred

In [0]:
def networkarch(X_train,Y_train,X_test,Y_test,iterations=50, learningrate=0.3):
  trainedmodels={}
  categorical_train_y=Y_train;
  categorical_test_y=Y_test;
  categorical_train_y=keras.utils.to_categorical(categorical_train_y,10)  # we have 10 classes so we make categories of the label tests and train data
  categorical_test_y=keras.utils.to_categorical(categorical_test_y,10)
  
  for i in range(0,10):
    modelno=i
    y_train_model=np.array(Y_train);
    y_train_model=np.where(y_train_model == modelno, 1, 0) #we train the model to take either 0 or 1 (yes/no) value of y train and y test
    
    y_test_model=np.array(Y_test);
    y_test_model=np.where(y_test_model == modelno, 1, 0)
    
    weight, bias=initialize(X_train.shape[0])
    weightbias, dweightdbias, totalcost= backprop(weight, bias, X_train, y_train_model,iterations,learningrate)
    weight=weightbias["weight"]
    bias= weightbias["bias"]
    
    y_train_predicted=prediction(weight,bias,X_train) #we use weight, bias to predict the values of training input images 
    y_test_predicted=prediction(weight, bias, X_test)
    
    categorical_train_y[:,[i]] = y_train_predicted.T #storing indexes that are corrosponding to categorical data from y_train
    categorical_test_y[:,[i]] = y_test_predicted.T
    
    trainaccuracy=np.mean(np.abs(y_train_predicted-y_train_model))
    testaccuracy=np.mean(np.abs(y_test_predicted-y_test_model))
    print(" Classifier "+ str(i) +"s train accuracy is "+str(trainaccuracy*100))
    print("Classifier "+ str(i) +"s test accuracy is "+str(testaccuracy*100))
    d={"Y_prediction_train": y_train_predicted, "Y_prediction_test" : y_test_predicted, "weight": weight, "bias" : bias, "totalcost": totalcost,"learningrate": learningrate}
    trainedmodels[i] = d
  print("overall train accuracy is "+str(np.mean(categorical_train_y-keras.utils.to_categorical(Y_train,10))* 100))
  print("overall test accuracy is "+str(np.mean(categorical_test_y-keras.utils.to_categorical(Y_test,10))* 100))
  

In [0]:
(x_train,y_train), (x_test,y_test ) = mnist.load_data() # loading data from mnist dataset
rows=28
columns=28
if K.image_data_format() == 'channels_first':          #checking if the data format is channel first, if so we add 1 before rows and columns.
  x_train=x_train.reshape(x_train.shape[0],1,rows, columns)
  x_test=x_test.reshape(x_test.shape[0],1,rows,columns)
  inputshape=(1,rows,columns)
else:
  x_train=x_train.reshape(x_train.shape[0],rows,columns,1)
  x_test=x_test.reshape(x_test.shape[0],rows,columns,1)
  inputshape=(rows,columns,1)

In [0]:
x_train = x_train / 255.0     #normalizing the data
x_test = x_test / 255.0
x_train = x_train.reshape(x_train.shape[0], -1).T  #scaling the data to fit our neuralnetwork
x_test = x_test.reshape(x_test.shape[0], -1).T
y_test = y_test.T
y_train = y_train.T

In [12]:
models = networkarch(x_train, y_train, x_test, y_test, iterations =12, learningrate= 0.1)

 Classifier 0s train accuracy is 90.12833333333333
Classifier 0s test accuracy is 90.2
 Classifier 1s train accuracy is 88.76333333333334
Classifier 1s test accuracy is 88.64999999999999
 Classifier 2s train accuracy is 90.07
Classifier 2s test accuracy is 89.68
 Classifier 3s train accuracy is 89.78166666666667
Classifier 3s test accuracy is 89.9
 Classifier 4s train accuracy is 90.26333333333334
Classifier 4s test accuracy is 90.18
 Classifier 5s train accuracy is 90.96499999999999
Classifier 5s test accuracy is 91.08000000000001
 Classifier 6s train accuracy is 90.13666666666667
Classifier 6s test accuracy is 90.42
 Classifier 7s train accuracy is 89.55833333333332
Classifier 7s test accuracy is 89.72
 Classifier 8s train accuracy is 90.24833333333333
Classifier 8s test accuracy is 90.25999999999999
 Classifier 9s train accuracy is 90.08500000000001
Classifier 9s test accuracy is 89.91
overall train accuracy is 89.99999761581421
overall test accuracy is 89.99999761581421
