MNIST from scratch

In [3]:
from sklearn.datasets import fetch_openml
import numpy as np
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical

x, y = fetch_openml('mnist_784', version=1, return_X_y=True)
x = (x/255).astype('float32')
y = to_categorical(y)

x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.15, random_state = 42)

In [11]:



class DeepNeuralNetwork():
  def __init__(self, sizes, epochs = 10, l_rate = 0.01):
    self.sizes = sizes
    self.epochs = epochs
    self.l_rate = l_rate

    self.params = self.initialization()


  def initialization(self):
    input_layer = self.sizes[0]
    hidden_1 = self.sizes[1]
    hidden_2 = self.sizes[2]
    output_layer = self.sizes[3]
    
    params = {
              'W1':np.random.randn(hidden_1, input_layer)*np.sqrt(1./hidden_1),
              'W2':np.random.randn(hidden_2, hidden_1)*np.sqrt(1./hidden_2),
              'W3':np.random.randn(output_layer, hidden_2)*np.sqrt(1./output_layer)
              }

    return params

  
  def sigmoid(self, x, derivative=False):
    if derivative:
      #return np.exp(-x)*((self.sigmoid(x))**2)
      return (np.exp(-x))/((np.exp(-x)+1)**2)
    return 1/(1 + np.exp(-x))

  def softmax(self, x, derivative=False):
    # Numerically stable with large exponentials
    exps = np.exp(x - x.max())
    if derivative:
        return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
    return exps / np.sum(exps, axis=0)

  def relu(self, x, derivative=False):
    if derivative:
      return x>0
    return np.maximum(x,0)

  
  def forward_pass(self, x_train):
    params = self.params
   
    params['A0'] = x_train

    params['Z1'] = np.dot(params['W1'], params['A0'].T)
    params['A1'] = self.sigmoid(params['Z1'])

    params['Z2'] = np.dot(params['W2'], params['A1'])
    params['A2'] = self.relu(params['Z2'])

    params['Z3'] = np.dot(params['W3'], params['A2'])
    params['A3'] = self.softmax(params['Z3'])

    return params['A3']


  def backward_pass(self, y_train, output):
    params = self.params
    change_w = {}

    error = 2 * (output - y_train) / output.shape[0] * self.softmax(params['Z3'], derivative=True)
    change_w['W3'] = np.outer(error, params['A2'])

    error = np.dot(params['W3'].T, error) * self.relu(params['Z2'], derivative=True)
    change_w['W2'] = np.outer(error, params['A1'])

    error = np.dot(params['W2'].T, error) * self.sigmoid(params['Z1'], derivative=True)
    change_w['W1'] = np.outer(error, params['A0'])

    return change_w


  def update_network_parameters(self, changes_to_w):
    for key, value in changes_to_w.items():
      self.params[key] -= self.l_rate * value


  def compute_accuracy(self, x_val, y_val):
    predictions = []

    for x,y in zip(x_val, y_val):
      output = self.forward_pass(x)
      pred = np.argmax(output)
      predictions.append(pred == np.argmax(y))
    return np.mean(predictions)


  def train(self, x_train, y_train, x_val, y_val):
    train_accuracy = []
    val_accuracy = []
    for iteration in range(self.epochs):
      for x,y in zip(x_train, y_train):
        output = self.forward_pass(x)
        changes_to_w = self.backward_pass(y, output)
        self.update_network_parameters(changes_to_w)
      
      accuracy = self.compute_accuracy(x_train, y_train)
      train_accuracy.append(accuracy)
      print(f'Epoch: {iteration},  Accuracy: {accuracy*100}')
      accuracy = self.compute_accuracy(x_val, y_val)
      val_accuracy.append(accuracy)
      print(f'Epoch: {iteration},  Accuracy: {accuracy*100}')


  def predict(self, x):
    pre = self.forward_pass(x)
    print('prediction is :',np.argmax(pre))
    return np.argmax(pre)








In [12]:
dnn = DeepNeuralNetwork(sizes=[784, 128, 64, 10])
dnn.train(x_train, y_train, x_val, y_val)

Epoch: 0,  Accuracy: 79.31596638655462
Epoch: 0,  Accuracy: 79.36190476190477
Epoch: 1,  Accuracy: 86.21008403361344
Epoch: 1,  Accuracy: 85.81904761904762
Epoch: 2,  Accuracy: 88.38823529411765
Epoch: 2,  Accuracy: 87.99047619047619
Epoch: 3,  Accuracy: 89.74117647058823
Epoch: 3,  Accuracy: 89.59047619047618
Epoch: 4,  Accuracy: 90.72436974789916
Epoch: 4,  Accuracy: 90.55238095238096
Epoch: 5,  Accuracy: 91.44873949579832
Epoch: 5,  Accuracy: 91.31428571428572
Epoch: 6,  Accuracy: 92.09747899159663
Epoch: 6,  Accuracy: 91.92380952380952
Epoch: 7,  Accuracy: 92.59495798319328
Epoch: 7,  Accuracy: 92.39047619047619
Epoch: 8,  Accuracy: 92.97983193277311
Epoch: 8,  Accuracy: 92.56190476190476
Epoch: 9,  Accuracy: 93.27731092436974
Epoch: 9,  Accuracy: 92.92380952380952
