In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io
import random

In [2]:
# Load SVHN dataset
data = scipy.io.loadmat("train_32x32.mat")
X_train = data['X']
Y_train = data['y']

data = scipy.io.loadmat("test_32x32.mat")
X_test = data['X']
Y_test = data['y']

In [3]:
# Flatten the images

X_train = X_train.reshape(X_train.shape[0]*X_train.shape[1]*X_train.shape[2], -1)
X_test = X_test.reshape(X_test.shape[0]*X_test.shape[1]*X_test.shape[2], -1)

# Normalize the images

X_train = X_train/255.
X_test = X_test/255.

# Convert labels to one-hot encoding

Y_train = np.eye(10)[Y_train.flatten()-1].T
Y_test = np.eye(10)[Y_test.flatten()-1].T

# Print the shape of the data
print("X_train shape:", X_train.shape)
print("Y_train shape:", Y_train.shape)
print("X_test shape:", X_test.shape)
print("Y_test shape:", Y_test.shape)

X_train shape: (3072, 73257)
Y_train shape: (10, 73257)
X_test shape: (3072, 26032)
Y_test shape: (10, 26032)


## Here we do the following splitting a dataset into training and testing sets, and then reshaping the arrays.

In [4]:
# assigning the arrays to the variables 
a = X_train.T
b = X_test.T
c = Y_train.T
d = Y_test.T

#lists to store the data
X_train = []
X_test = []
Y_train = []
Y_test = []

#selecting a subset of the data to use as the training and testing sets.
# where the first 1000 rows of a to X_train, the first 350 rows of b to X_test, the first 1000 rows of c to Y_train, and the first 350 rows of d to Y_test
for i in range(1000):
    X_train.append(a[i])
    
for i in range(400):
    X_test.append(b[i])
    
for i in range(1000):
    Y_train.append(c[i])
    
for i in range(400):
    Y_test.append(d[i])
    
X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(X_test)
# transposing the arrays back to their original shape.
X_train = X_train.T
X_test = X_test.T
Y_train = Y_train.T
Y_test = Y_test.T

X_train = X_train
Y_train = Y_train
X_test = X_test
Y_test = Y_test

# Print the shape of the data
print("X_train shape:", X_train.shape)
print("Y_train shape:", Y_train.shape)
print("X_test shape:", X_test.shape)
print("Y_test shape:", Y_test.shape)

X_train shape: (3072, 1000)
Y_train shape: (10, 1000)
X_test shape: (3072, 400)
Y_test shape: (3072, 400)


## defining the activation functions which will be used for classification if needed

In [5]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def softmax(x):
    expX = np.exp(x)
    return expX/np.sum(expX, axis = 0)

In [6]:
def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)

## Xavier weight initialization

In [7]:
# Define Xavier weight initialization function
def xavier_init(n_in, n_out):
    # return np.random.normal(0, np.sqrt(2/(n_in + n_out)), (n_in, n_out))
    return np.random.randn(n_in, n_out) * np.sqrt(1/n_in)
def xavier_init(n_in, n_out):
    return np.random.randn(n_out, int(n_in / np.sqrt(n_in)))


## initializing the parameters of a neural network with three hidden layers.

In [8]:
def initialize_parameters(n_x, neurons_hidden, n_y):
    np.random.seed(2)
    w1 = np.random.randn(neurons_hidden, n_x) * np.sqrt(1 / n_x)
    b1 = np.zeros((neurons_hidden, 1))
    w2 = np.random.randn(neurons_hidden, neurons_hidden) * np.sqrt(1 /neurons_hidden)
    b2 = np.zeros((neurons_hidden, 1))
    w3 = np.random.randn(n_y, neurons_hidden) * np.sqrt(1 /neurons_hidden)
    b3 = np.zeros((n_y, 1))
    parameters = {"w1": w1, 
                  "b1": b1, 
                  "w2": w2, 
                  "b2": b2,
                  "w3": w3, 
                  "b3": b3}
    return parameters

## forward_propagation that performs the forward propagation step of a neural network with three hidden layers. Computing the activations of the hidden layers and output layer.forward_cache containing the pre-activations z1, z2, and z3, as well as the activations a1, a2, and a3, and returns it

In [9]:
def forward_propagation(x, parameters):
    
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    w3 = parameters['w3']
    b3 = parameters['b3']
    
    
    z1 = np.dot(w1, x) + b1
    a1 = sigmoid(z1)
    
    z2 = np.dot(w2, a1) + b2
    a2 = sigmoid(z2)
    
    z3 = np.dot(w3, a2) + b3
    a3 = softmax(z3)
    
    
    forward_cache = {
        "z1" : z1,
        "a1" : a1,
        "z2" : z2,
        "a2" : a2,
        "z3" : z3,
        "a3" : a3
    }
    
    return forward_cache

defines a cost function to measure the error between the predicted output A2 and the actual output Y. It calculates the cross-entropy loss by computing the negative log-likelihood of the predicted output A2 given the actual output Y

In [10]:
def cost_function(A2, Y):
    
    m = Y.shape[1]
    
    logprobs_1 = np.multiply(np.log(A2), Y)
    logprobs_2 = np.multiply(np.log(1 - A2),(1 - Y))
    logprobs = logprobs_1 + logprobs_2
    cost = - (1/m) * np.sum(logprobs)
    
    cost = float(np.squeeze(cost))
    
    return cost

defines a function backward_propagation that computes the gradients of the cost function with respect to the parameters of the neural network using the backpropagation algorithm

In [11]:
def backward_propagation(x, y, parameters, forward_cache):
    
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    w3 = parameters['w3']
    b3 = parameters['b3']
    
    a1 = forward_cache['a1']
    a2 = forward_cache['a2']
    a3 = forward_cache['a3']
    
    m = x.shape[1]
    
    dz3 = (a3 - y)
    dw3 = (1/m)*np.dot(dz3, a2.T)
    db3 = (1/m)*np.sum(dz3, axis = 1, keepdims = True)
    
    dz2 = (1/m)*np.dot(w3.T, dz3)*sigmoid_derivative(a2)
    dw2 = (1/m)*np.dot(dz2, a1.T)
    db2 = (1/m)*np.sum(dz2, axis = 1, keepdims = True)
    
    dz1 = (1/m)*np.dot(w2.T, dz2)*sigmoid_derivative(a1)
    dw1 = (1/m)*np.dot(dz1, x.T)
    db1 = (1/m)*np.sum(dz1, axis = 1, keepdims = True)
    
    gradients = {
        "dw1" : dw1,
        "db1" : db1,
        "dw2" : dw2,
        "db2" : db2,
        "dw3" : dw3,
        "db3" : db3
    }
    
    return gradients

defines a function update_parameters_sgd that updates the values of the parameters of a neural network using stochastic gradient descent (SGD). The function first extracts the current values of the parameters and gradients from the corresponding dictionaries. It then updates the parameters using the formula parameter = parameter - learning_rate * gradient

In [12]:
def update_parameters_sgd(parameters, gradients, learning_rate):
    
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    w3 = parameters['w3']
    b3 = parameters['b3']
    
    dw1 = gradients['dw1']
    db1 = gradients['db1']
    dw2 = gradients['dw2']
    db2 = gradients['db2']
    dw3 = gradients['dw3']
    db3 = gradients['db3']
    
    w1 = w1 - learning_rate*dw1
    b1 = b1 - learning_rate*db1
    w2 = w2 - learning_rate*dw2
    b2 = b2 - learning_rate*db2
    w3 = w3 - learning_rate*dw3
    b3 = b3 - learning_rate*db3
    
    parameters = {
        "w1" : w1,
        "b1" : b1,
        "w2" : w2,
        "b2" : b2,
        "w3" : w3,
        "b3" : b3
    }
    
    return parameters

updates the parameters of a neural network using the RMSprop optimization algorithm. The function takes four input arguments: Parametrs, gradients, learning_rate, epsilon and The RMSprop algorithm updates the weights and biases using a moving average of the squared gradients.the algorithm updates the weights and biases using the formula:

W = W - learning_rate * dW / (sqrt(s_dW) + epsilon)


b = b - learning_rate * db / (sqrt(s_db) + epsilon)


In [13]:
def update_parameters_rmsprop(parameters, gradients, learning_rate, epsilon=1e-8):
    
    # Retrieve the parameters from the dictionary
    w1 = parameters["w1"]
    b1 = parameters["b1"]
    w2 = parameters["w2"]
    b2 = parameters["b2"]
    w3 = parameters["w3"]
    b3 = parameters["b3"]
    
    # Retrieve the gradients from the dictionary
    dw1 = gradients["dw1"]
    db1 = gradients["db1"]
    dw2 = gradients["dw2"]
    db2 = gradients["db2"]
    dw3 = gradients["dw3"]
    db3 = gradients["db3"]
    
    s_dW1 = np.zeros_like(dw1)
    s_db1 = np.zeros_like(db1)
    s_dW2 = np.zeros_like(dw2)
    s_db2 = np.zeros_like(db2)
    s_dW3 = np.zeros_like(dw3)
    s_db3 = np.zeros_like(db3)
    
    beta = 0.9
    
    s_dW1 = beta * s_dW1 + (1 - beta) * dw1 ** 2
    s_db1 = beta * s_db1 + (1 - beta) * db1 ** 2
    s_dW2 = beta * s_dW2 + (1 - beta) * dw2 ** 2
    s_db2 = beta * s_db2 + (1 - beta) * db2 ** 2
    s_dW3 = beta * s_dW3 + (1 - beta) * dw3 ** 2
    s_db3 = beta * s_db3 + (1 - beta) * db3 ** 2
    
    w1 -= learning_rate * dw1 / (np.sqrt(s_dW1) + epsilon)
    b1 -= learning_rate * db1 / (np.sqrt(s_db1) + epsilon)
    w2 -= learning_rate * dw2 / (np.sqrt(s_dW2) + epsilon)
    b2 -= learning_rate * db2 / (np.sqrt(s_db2) + epsilon)
    w3 -= learning_rate * dw3 / (np.sqrt(s_dW3) + epsilon)
    b3 -= learning_rate * db3 / (np.sqrt(s_db3) + epsilon)
    
    parameters = {"w1": w1, 
                  "b1": b1, 
                  "w2": w2, 
                  "b2": b2, 
                  "w3": w3, 
                  "b3": b3}
    
    return parameters

defines a function called model that trains a neural network with a specified number of hidden neurons, learning rate, activation function, and number of iterations. The function takes in input features x and target labels y and returns the trained parameters and the cost of the network over the iterations.

In [16]:
def model(x, y, neurons_hidden, learning_rate, iterations, activation):
    
    n_x = x.shape[0]
    n_y = y.shape[0]
    
    cost_list = []
    
    parameters = initialize_parameters(n_x, neurons_hidden, n_y)
    
    for i in range(iterations):
        
        forward_cache = forward_propagation(x, parameters)
        
        cost = cost_function(forward_cache['a3'], y)
        
        gradients = backward_propagation(x, y, parameters, forward_cache)
        
        if (activation == "SGD"):
            parameters = update_parameters_sgd(parameters, gradients, learning_rate)
        if (activation == "RMS_prop"):
            parameters = update_parameters_rmsprop(parameters, gradients, learning_rate, epsilon=1e-8)
        
        cost_list.append(cost)
        
        if(i%(iterations/10) == 0):
            print("Cost after", i, "iterations is :", cost)
        
    return parameters, cost_list

training a neural network using the RMSProp optimizer, with a hidden layer of 1200 neurons, a learning rate of 0.001, and 250 iterations of training.

In [17]:
iterations = 250
neurons_hidden = 1200
learning_rate = 0.001

Parameters, Cost_list = model(X_train, Y_train, neurons_hidden = neurons_hidden, learning_rate = learning_rate, iterations = iterations, activation="RMS_prop")

Cost after 0 iterations is : 3.3769013263821077
Cost after 25 iterations is : 3.4381587045488358
Cost after 50 iterations is : 3.6818816512653374
Cost after 75 iterations is : 3.388904649692596
Cost after 100 iterations is : 3.5908095208082256
Cost after 125 iterations is : 3.36527803877349
Cost after 150 iterations is : 3.540197363680468
Cost after 175 iterations is : 3.3501992787701385
Cost after 200 iterations is : 3.502662252602062
Cost after 225 iterations is : 3.3326132340288193


function accuracy calculates the accuracy of the predictions made by a neural network model on a given input data and corresponding labels.

In [18]:
def accuracy(inp, labels, parameters):
    forward_cache = forward_propagation(inp, parameters)
    a_out = forward_cache['a3']   # containes propabilities with shape(10, 1)
    
    a_out = np.argmax(a_out, 0)  # 0 represents row wise 
    
    labels = np.argmax(labels, 0)
    accuracy = np.mean(a_out == labels)*100
    return accuracy

calling the accuracy() function to compute the accuracy of the trained neural network model on both the train and test datasets.

In [19]:
print("Accuracy of Train Dataset", accuracy(X_train, Y_train, Parameters), "%")
print("Accuracy of Test Dataset", accuracy(X_test, Y_test, Parameters), "%")

Accuracy of Train Dataset 13.600000000000001 %
Accuracy of Test Dataset 0.5 %


Resources: few websites where they built neural network from scratch and few github resources also