In [None]:
import numpy as np
from keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(X_train.shape[0],-1).T
X_test = X_test.reshape(X_test.shape[0], -1).T

# normalize
X_train = X_train / 255.0
X_test = X_test / 255.0

# activation functions
def sigmoid(z):
  f = 1/(1+np.exp(-z))
  return f

def softmax(z):
  f = np.exp(z - np.max(z, axis = 0 , keepdims = True))
  return f / np.sum(f,axis = 0, keepdims = True)

def relu(z):
   return np.maximum(0,z)

def random_init(layers_dim):
  np.random.seed(0)
  params = {}

  L = len(layers_dim) - 1

  for i in range(1, L+1):
     params["W" + str(i)]= np.random.randn(layers_dim[i], layers_dim[i-1]) * np.sqrt(2. / layers_dim[i-1])
     params["b" + str(i)]= np.zeros((layers_dim[i],1))
  return params

def forward(X, params):

  z1 = params["W1"] @ X + params["b1"]
  a1 = relu(z1)

  z2 = params["W2"] @ a1 + params["b2"]
  a2 = relu(z2)

  z3 = params["W3"] @ a2 + params["b3"]
  a3 = softmax(z3)


  cache = {"X": X,
           "A1":a1, "A2":a2, "A3":a3,
           "Z1":z1, "Z2": z2, "Z3": z3
           }
  y_pred = a3
  return y_pred, cache

# Cross Entropy Loss
def crossentropy(y_pred, y_true):
   sample =-np.sum(y_true * np.log(y_pred + 1e-8) , axis = 0)
   loss = np.mean(sample)

   return loss

def relu_derivative(Z):
  return Z > 0

def backward(y_true, cache, params):
  m = y_true.shape[1]
  dz3 = cache["A3"] - y_true
  dw3 = (1/m) * dz3 @ cache["A2"].T
  db3 = np.sum((1/m) * dz3, axis = 1, keepdims = True)

  da2 = params["W3"].T @ dz3
  dz2 = da2 * relu_derivative(cache["Z2"])
  dw2 = (1/m) * dz2 @ cache["A1"].T
  db2 = np.sum((1/m) * dz2, axis = 1, keepdims = True)

  da1 = params["W2"].T @ dz2
  dz1 = da1 * relu_derivative(cache["Z1"])
  dw1 = (1/m) * dz1 @ cache["X"].T
  db1 = np.sum((1/m) * dz1, axis = 1, keepdims = True)

  gradients = { "dW3":dw3, "db3":db3,
  "dW2":dw2, "db2":db2,
  "dW1":dw1, "db1":db1 }

  return gradients

def onehot(y, classes):
  one_hot_y = np.zeros((classes, y.size))
  one_hot_y[y, np.arange(y.size)] = 1
  return one_hot_y

# paramaters
layers_dim = [784,128,64,10]
y_train_oh = onehot(y_train, 10)

# training
def train(lr, X, y_true, layers_dim, epochs):

  # Random Intialization of w, b
  params = random_init(layers_dim)

  # Mini batch
  batch_size = 64
  m = X.shape[1]
  num_indices = m // batch_size
  for epoch in range(epochs):
    # shuffle on each epoch
    perm = np.random.permutation(m)
    X_shuffled = X[:,perm]
    y_shuffled = y_true[:,perm]

    epoch_loss = 0.0
    for i in range(num_indices):
     start = i * batch_size
     end = start + batch_size
     X_batch = X_shuffled[:,start:end]
     y_batch = y_shuffled[:,start:end]
     # forward pass
     y_pred, cache = forward(X_batch, params)

     # computing loss
     loss = crossentropy(y_pred, y_batch)
     epoch_loss += loss
     #print(f"Epochs : {epoch+1}, Loss: {loss:.4f}")

     # backward prop
     gradients = backward(y_batch, cache, params)

     # update weights
     params["W1"] -= lr*gradients["dW1"]
     params["b1"] -= lr*gradients["db1"]

     params["W2"] -= lr*gradients["dW2"]
     params["b2"] -= lr*gradients["db2"]

     params["W3"] -= lr*gradients["dW3"]
     params["b3"] -= lr*gradients["db3"]
    print(f"Epochs : {epoch+1}, Loss: {epoch_loss:.4f}")
  return params

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
trained_params = train(0.1, X_train, y_train_oh,layers_dim, 100)

def accuracy(X, y_true, params):
  y_pred, _ = forward(X, params)
  preds = np.argmax(y_pred, axis=0)
  return np.mean(preds == y_true)

acc = accuracy(X_test, y_test, trained_params)
print("Test Accuracy:", acc*100,"%")
acc = accuracy(X_train, y_train, trained_params)
print("Train Accuracy:", acc*100,"%")

Epochs : 1, Loss: 300.6419
Epochs : 2, Loss: 139.7600
Epochs : 3, Loss: 100.4822
Epochs : 4, Loss: 78.2120
Epochs : 5, Loss: 64.3613
Epochs : 6, Loss: 53.5817
Epochs : 7, Loss: 45.2224
Epochs : 8, Loss: 38.4811
Epochs : 9, Loss: 32.2295
Epochs : 10, Loss: 27.7540
Epochs : 11, Loss: 22.7725
Epochs : 12, Loss: 20.3815
Epochs : 13, Loss: 16.8596
Epochs : 14, Loss: 14.3125
Epochs : 15, Loss: 11.3613
Epochs : 16, Loss: 9.4168
Epochs : 17, Loss: 8.4446
Epochs : 18, Loss: 6.4401
Epochs : 19, Loss: 4.9158
Epochs : 20, Loss: 4.2946
Epochs : 21, Loss: 3.5673
Epochs : 22, Loss: 2.9634
Epochs : 23, Loss: 2.6185
Epochs : 24, Loss: 2.3308
Epochs : 25, Loss: 2.0620
Epochs : 26, Loss: 1.8381
Epochs : 27, Loss: 1.7116
Epochs : 28, Loss: 1.4964
Epochs : 29, Loss: 1.3686
Epochs : 30, Loss: 1.2535
Epochs : 31, Loss: 1.1411
Epochs : 32, Loss: 1.0821
Epochs : 33, Loss: 1.0131
Epochs : 34, Loss: 0.9404
Epochs : 35, Loss: 0.8881
Epochs : 36, Loss: 0.8413
Epochs : 37, Loss: 0.7945
Epochs : 38, Loss: 0.7577
Epo