# MACHINE LEARNING LAB ASSIGNMENT


# Neural Network Assignment


### NAME     : **MOHIT TALREJA**

### ROLL NO. : **177237**

In [1]:
from sklearn.datasets import fetch_openml
from keras.utils.np_utils import to_categorical
import numpy as np
from sklearn.model_selection import train_test_split
import time

### Loading the MNIST Dataset

In [2]:
features, labels = fetch_openml('mnist_784', version=1, return_X_y=True)

## Preprocessing

Normalizing the feature set:

Each entry is a value between 0 and 255. It is brought to a value between 0 and 1 so as to avoid issues with activation function calculations. (Overflowing when taken directly as it is)

In [3]:
features = (features/255).astype('float32')

Converting the labels to 1-hot encoded format so as to allow for simple subtraction in delta calculation step at the outputs of the network.

In [4]:
labels = to_categorical(labels)

Splitting into training and testing parts

In [5]:
X_train, X_test, Y_train, Y_test = train_test_split(features, labels, test_size=0.15, random_state=37)

### Neural Network Construction

**Number of Neurons in Input Layer** = 28 x 28 = 784

**Number of Hidden Layers** = 2

**Number of Neurons in Output Layer** = Number of prediction labels = 10

**Weighted Sum:**
This is the scalar dot product of the 2 vectors, weights and features.

$ z = \sum_{i=0}^{n}w_{i}.x_{i}$

**Activation Function for Hidden Layer Units**: 
The sigmoid activation function takes input the weighted sum and computes the following - 

$f(z) = \frac{1}{1+e^{-z}}$

**Activation Function for Output Layer Units**:
The softmax activation function takes input the weighted sum of outputs from hidden layer units and computes the following - 

$softmax(z) = \frac{exp(z)}{\sum_{j}^{ }exp(z_j)}$

This softmax gives us the probability of each prediction label.

**Forward Propogation**

Hidden layer 1 takes input the feature tuple, computes weighted sum, applies sigmoid activation to calculate the output.

Hidden layer 2 takes input as the outputs from hidden layer 1, computes weighted sum, applies sigmoid activation to calculate the output.

Output layer takes input as the outputs from hidden layer 2, computes weighted sum, applies softmax activation to calculate prediction of each label digit.

**Backward Propogation**

Calculates the delta (change) values of network parameters:

Output layer's delta value is calculated with difference between actual labelled outputs (1-hot encoded vector) and the prediciton values from forward propogation step along with the softmax derivative of it's output.

Hidden layer 1's delta value is calculated with weighted sum of delta values of output layer along with the sigmoid derivative of it's output.

Hidden layer 2's delta value is calculated with weighted sum of delta values of hidden layer 1 along with the sigmoid derivative of it's output.

**Training the Model**

Repeat for EPOCH iterations of the dataset:

Forward propogate features

Backpropogate errors

Update weights (stochastic update)

In [2]:
class NeuralNetwork():
    #Initializing the network
    def __init__(self, sizes, epochs=50, lRate=0.01):
        self.sizes = sizes # Number of neurons in each layer
        self.epochs = epochs
        self.lRate = lRate # Learning rate
        self.parameters = self.initParameters()

    def sigmoid(self, x, derivative=False):
        if derivative:
            return (np.exp(-x))/((np.exp(-x)+1)**2)
        return 1/(1 + np.exp(-x))

    def softmax(self, x, derivative=False):
        exps = np.exp(x - x.max())
        if derivative:
            return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
        return exps / np.sum(exps, axis=0)
    
    # Initializing the weights and biases of the network
    def initParameters(self):
        parameters = {}
        for i in range(1,len(self.sizes)):
            #parameters['W'+str(i)] = np.full((self.sizes[i],self.sizes[i-1]),0.1)
            parameters['W'+str(i)] = np.random.randn(self.sizes[i], self.sizes[i-1]) * np.sqrt(1. / self.sizes[i])

        """
        input_layer=self.sizes[0]
        hidden_1=self.sizes[1]
        hidden_2=self.sizes[2]
        output_layer=self.sizes[3]

        # Initializing all weights to values between 0 and 1 
        parameters = {
            'W1':np.random.randn(hidden_1, input_layer) * np.sqrt(1. / hidden_1),
            'W2':np.random.randn(hidden_2, hidden_1) * np.sqrt(1. / hidden_2),
            'W3':np.random.randn(output_layer, hidden_2) * np.sqrt(1. / output_layer)
        }
        """
        return parameters

    def forwardPropogate(self, X_train):
        parameters = self.parameters
        # A0 denotes input to hidden layer 1, which is the feature vector
        parameters['A0'] = X_train
        for i in range(1,len(self.sizes)):
            parameters['Z'+str(i)] = np.dot(parameters['W'+str(i)], parameters['A'+str(i-1)])
            parameters['A'+str(i)] = self.sigmoid(parameters['Z'+str(i)])
        """
        # Z1 is weighted sum of inputs
        parameters['Z1'] = np.dot(parameters["W1"], parameters['A0'])
        # A1 denotes input to hidden layer 2, which is the output of hidden layer 1
        parameters['A1'] = self.sigmoid(parameters['Z1'])

        # Z2 is weighted sum of outputs from hidden layer 1
        parameters['Z2'] = np.dot(parameters["W2"], parameters['A1'])
        # A2 denotes input to output layer, which is the output of hidden layer 2
        parameters['A2'] = self.sigmoid(parameters['Z2'])

        # Z3 is weighted sum of outputs from hidden layer 2
        parameters['Z3'] = np.dot(parameters["W3"], parameters['A2'])
        # A3 denotes final prediction
        parameters['A3'] = self.softmax(parameters['Z3'])
        """
        return parameters['A'+str(len(self.sizes)-1)]

    def backwardPropogate(self, y_train, output):
        
        parameters = self.parameters
        deltaW = {}

        j = len(self.sizes)-1
        # Output layer's delta
        #delta = 2 * (output - y_train) / output.shape[0] * self.softmax(parameters['Z'+str(j)], derivative=True)
        delta = output*([1 for _ in range(len(output))]-output)*(output-y_train)
        # Weight change value for output layer
        

        while(j>0):
          # weights and delta have the same shape, so we need to perform transpose on weights to calculate dot product
          # Hidden layer 1's delta is weighted sum of delta values of output layer's neurons
            deltaW['W'+str(j)] = np.outer(delta, parameters['A'+str(j-1)])
            if(j>1):
                delta = np.dot(parameters['W'+str(j)].T, delta) * self.sigmoid(parameters['Z'+str(j-1)], derivative=True)
            j-=1
          
        """
        # Hidden layer 1's delta is weighted sum of delta values of output layer's neurons
        delta = np.dot(parameters['W3'].T, delta) * self.sigmoid(parameters['Z2'], derivative=True)
        # Weight change value for hidden layer 1
        deltaW['W2'] = np.outer(delta, parameters['A1'])

        # Hidden layer 2's delta is weighted sum of delta values of hidden layer 1's neurons
        delta = np.dot(parameters['W2'].T, delta) * self.sigmoid(parameters['Z1'], derivative=True)
        # Weight change value for hidden layer 2
        deltaW['W1'] = np.outer(delta, parameters['A0'])
        """
        return deltaW

    def updateParameters(self, deltaW):
        
        for key, value in deltaW.items():
            self.parameters[key] -= self.lRate * value

    def calculateAccuracy(self, X_test, Y_test):
        predictions = []

        for x, y in zip(X_test, Y_test):
            output = self.forwardPropogate(x)
            pred = np.argmax(output)
            predictions.append(pred == np.argmax(y))
        
        return np.mean(predictions)

    def train(self, X_train, y_train, X_test, Y_test):
        start_time = time.time()
        for iteration in range(self.epochs):
            for x,y in zip(X_train, y_train):
                output = self.forwardPropogate(x)
                deltaW = self.backwardPropogate(y, output)
                self.updateParameters(deltaW)
            
            #print(self.parameters)
            accuracy = self.calculateAccuracy(X_test, Y_test)
            print('Epoch: {0}, Time Spent: {1:.2f}s, Accuracy: {2:.2f}%'.format(
                iteration+1, time.time() - start_time, accuracy * 100
            ))

**Number of neurons:** 

Input layer = 784

Hidden layer 1 = 32

Hidden layer 2 = 16

Output layer = 10

In [12]:
nnObj = NeuralNetwork(sizes=[784, 128, 64, 32, 10],epochs=10)
nnObj.train(X_train, Y_train, X_test, Y_test)

Epoch: 1, Time Spent: 62.05s, Accuracy: 19.06%
Epoch: 2, Time Spent: 123.93s, Accuracy: 28.04%
Epoch: 3, Time Spent: 185.79s, Accuracy: 33.66%
Epoch: 4, Time Spent: 248.10s, Accuracy: 39.74%
Epoch: 5, Time Spent: 310.03s, Accuracy: 59.26%
Epoch: 6, Time Spent: 371.74s, Accuracy: 65.73%
Epoch: 7, Time Spent: 433.65s, Accuracy: 74.04%
Epoch: 8, Time Spent: 496.30s, Accuracy: 80.55%
Epoch: 9, Time Spent: 557.90s, Accuracy: 84.50%
Epoch: 10, Time Spent: 625.49s, Accuracy: 86.64%


### Loading Handwritten Character Dataset

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import os
os.environ['KAGGLE_CONFIG_DIR'] = "/content/gdrive/My Drive/Kaggle"

In [None]:
#changing the working directory
%cd /content/gdrive/My Drive/Kaggle

/content/gdrive/My Drive/Kaggle


In [None]:
!kaggle datasets download -d sachinpatel21/az-handwritten-alphabets-in-csv-format

Downloading az-handwritten-alphabets-in-csv-format.zip to /content/gdrive/My Drive/Kaggle
 96% 177M/185M [00:07<00:00, 21.5MB/s]
100% 185M/185M [00:07<00:00, 24.4MB/s]


In [None]:
!ls

az-handwritten-alphabets-in-csv-format.zip  kaggle.json


In [None]:
#unzipping the zip files and deleting the zip files
!unzip \*.zip  && rm *.zip

Archive:  az-handwritten-alphabets-in-csv-format.zip
  inflating: A_Z Handwritten Data.csv  
  inflating: A_Z Handwritten Data/A_Z Handwritten Data.csv  


In [None]:
!ls

'A_Z Handwritten Data'	'A_Z Handwritten Data.csv'   kaggle.json


In [None]:
df = pd.read_csv('A_Z Handwritten Data.csv')
print(df.shape)

(372450, 785)


Taking subset of the dataset (first 60000 rows)

In [None]:
df = np.array(df)
X = df[:60000,1:785]
Y = df[:60000,0:1]

In [None]:
print(X.shape)
print(Y.shape)
print(np.unique(Y))

(60000, 784)
(60000, 1)
[0 1 2 3 4]


Normalizing feature set entries and converting labels to 1-hot encoded form (same as above done for MNIST dataset)

In [None]:
X = (X/255).astype('float32')
Y = to_categorical(Y)


In [None]:
CX_train, CX_test, CY_train, CY_test = train_test_split(X, Y, test_size=0.15, random_state=37)

**Number of neurons:** 

Input layer = 784

Hidden layer 1 = 64

Hidden layer 2 = 32

Output layer = 5

In [None]:
nnObjChar = NeuralNetwork(sizes=[784, 64, 32, 5],epochs=5)
nnObjChar.train(CX_train, CY_train, CX_test, CY_test)

Epoch: 1, Time Spent: 30.26s, Accuracy: 88.08%
Epoch: 2, Time Spent: 60.59s, Accuracy: 91.08%
Epoch: 3, Time Spent: 90.86s, Accuracy: 93.29%
Epoch: 4, Time Spent: 121.14s, Accuracy: 94.41%
Epoch: 5, Time Spent: 151.34s, Accuracy: 94.78%


### Adaline Multi Layer Network for learning XOR Logic

In [3]:
X_XOR = np.array([[0,0],[0,1],[1,0],[1,1]])
Y_XOR = [0,1,1,0]

In [4]:
nnObjChar = NeuralNetwork(sizes=[2, 2, 2, 1],epochs=5)
nnObjChar.train(X_XOR, Y_XOR, X_XOR, Y_XOR)

{'W1': array([[ 0.19387601, -0.08574695,  0.00645916],
       [ 0.07529335, -1.7797719 , -1.73207747]]), 'W2': array([[ 0.65755297,  0.54557789],
       [ 0.46994111, -0.5705033 ]]), 'W3': array([[ 0.18529898, -1.14961815]]), 'A0': array([1, 1, 1]), 'Z1': array([ 0.11458822, -3.43655601]), 'A1': array([0.52861575, 0.03117233]), 'Z2': array([0.36459979, 0.23063436]), 'A2': array([0.59015345, 0.55740436]), 'Z3': array([-0.53144734]), 'A3': array([0.37017938])}
Epoch: 1, Time Spent: 0.00s, Accuracy: 100.00%
{'W1': array([[ 0.19387601, -0.08574695,  0.00645916],
       [ 0.07529335, -1.7797719 , -1.73207747]]), 'W2': array([[ 0.65755297,  0.54557789],
       [ 0.46994111, -0.5705033 ]]), 'W3': array([[ 0.18529898, -1.14961815]]), 'A0': array([1, 1, 1]), 'Z1': array([ 0.11458822, -3.43655601]), 'A1': array([0.52861575, 0.03117233]), 'Z2': array([0.36459979, 0.23063436]), 'A2': array([0.59015345, 0.55740436]), 'Z3': array([-0.53144734]), 'A3': array([0.37017938])}
Epoch: 2, Time Spent: 0.00s