
#### Q1. (i) In this simple assignment, you will implement a single layered perceptron algorithm. You will need to write in a language of your choice from SCRATCH (use of pre-built machine learning libraries is not allowed though you can use basic linear algebra routines from numpy and scipy). 
#### (ii) Then use the MNIST dataset to find the accuracy that a single layered perceptron that you have implemented gives. Note the MNIST dataset requires classification into one of 10 categories (digits). You may assume that the class indicated is given by the output of the corresponding neuron with the highest output. Use 5-fold cross validation

## Creating the Single Layer Perceptron

In [None]:
# Importing Libraries

import numpy as np
import scipy as sp
import math
import random
import time

# Creating class

class SLP():

    def __init__(self, nodes, types = 'online', alpha=0.001, max_iter=500, tol=0.0001, activation='relu'):
        if activation.lower() not in ['relu','binsig', 'bipsig','linear']:
            raise Exception("Activation should be either Relu, Linear or Binary/Bipolar Sigmoid")
        if types.lower() not in ['online','batch']:
            raise Exception("Learning should be either Online or Batch")
        if alpha<=0 or max_iter<=0 or tol<0 or nodes<1:
            raise Exception("Parameter values cannot be negative.")
        self.alpha = alpha
        self.max_iter = max_iter
        self.tol = tol
        self.activation = activation
        self.nodes = nodes
        self.X_train = []
        self.y_train = []
        self.w_arr = []
        self.iters = 0
        self.J = 100000
    
    def activation_function(self, weighted_sum):
        if self.activation.lower() == 'linear':
            return weighted_sum
        elif self.activation.lower() == 'relu':
            return max(0,weighted_sum)
        elif self.activation.lower() == 'binsig':
            if weighted_sum>100:
                return 0
            elif weighted_sum<-100:
                return 1
            else:
                return 1/(1+math.exp(weighted_sum*-1))
        elif self.activation.lower() == 'bipsig':
            if weighted_sum>100:
                return 0
            elif weighted_sum<-100:
                return 1
            else:
                return (2/(1+math.exp(weighted_sum*-1))) - 1

    def delJ(self, y_pred):
        if self.activation.lower() == 'linear':
            delJ = 1
        elif self.activation.lower() == 'relu':
            delJ = 1
            # How does one take this out for Relu???
        elif self.activation.lower() == 'binsig':
            delJ = 0
            for node in range(self.nodes):
                delJ = delJ + y_pred[node]*(1-y_pred[node])
        elif self.activation.lower() == 'bipsig':
            delJ = 0
            for node in range(self.nodes):
                delJ = delJ + ((1-y_pred[node])**2)/2
        return delJ

    def predict(self, X_test):
        # Inserting a column of 1s at the 0th index
        result = []
        bias = np.ones(X_test.shape[0])
        X_test = np.insert(X_test, 0, bias, axis=1)
        for j in range(X_test.shape[0]): # 70000
                y_actual = y_train[j]
                y_pred = []
                for node in range(self.nodes): # 10
                    weighted_sum = X_test[j] @ self.w_arr[:,node]
                    y_val = self.activation_function(weighted_sum)
                    y_pred.append(y_val)
                result.append(y_pred)
        return result

    def score(self, y_test, y_pred):
        count = 0
        total = len(y_test)
        for i in range(total):
            if y_test == y_pred:
                count += 1
        return count/total

    def forward_pass(self):
        # the while loop code
        pass

    def weight_adjust(self):
        # second half of while loop
        pass

    def fit(self, X_train, y_train):
        t1 = time.time()
        X_train = np.array(X_train)
        y_train = np.array(y_train)
        input_neurons = X_train.shape[1]              #785
        output_neurons = np.zeros(y_train.max()+1)    #10

        b = np.zeros((y_train.size, y_train.max()+1))
        b[np.arange(y_train.size), y_train] = 1
        y_train = b

        # Inserting a column of 1s at the 0th index
        bias = np.ones(X_train.shape[0])
        X_train = np.insert(X_train, 0, bias, axis=1)

        if X_train.ndim > 2:
            raise Exception("X cannot be a",X_train.ndim,"dimensional array.")
        
        # Creating initial weights. 
        for node in range(self.nodes):
            W = []
            # Creating a 785 x 1 sized weight vector (x0 = 0) for EACH NODE (785x5 if 5 nodes)
            for i in range(X_train.shape[1]):
                W.append(random.randint(-250,250)/1000)
            self.w_arr.append(W)
        self.w_arr = np.transpose(self.w_arr)

        # Machine is learning
        while self.iters < self.max_iter and self.J > self.tol:
            J_val = 0

            for j in range(X_train.shape[0]): # 70000
                y_actual = y_train[j]
                y_pred = []

                for node in range(self.nodes): # `0
                    weighted_sum = X_train[j] @ self.w_arr[:,node]
                    y_val = self.activation_function(weighted_sum)
                    y_pred.append(y_val)
                    J_val = J_val + (y_actual[node] - y_pred[node])**2

                # assume that the class indicated is given by the output of the corresponding neuron with the highest output.
                y_pred = np.array(y_pred)
                b = np.zeros(self.nodes)
                b[y_pred.argmax()] = 1
                y_pred = b

                for node in range(self.nodes):
                    for z in range(X_train.shape[1]): # 785
                        delta = self.alpha * self.delJ(y_pred) * (y_actual[node] - y_pred[node]) * X_train[j,z]
                        self.w_arr[z,node] = self.w_arr[z,node] - delta
            self.iters += 1
            self.J = J_val
        t2 = time.time()
        print("Time taken by algorithm:", t2 - t1)
    

## Creating the Cross Validation class

In [None]:
class cross_validate():

    def __init__(self, splits):
        self.splits = splits

    def split(self, X):
        indices = []
        size = int(X.shape[0]/self.splits)
        test1 = 0
        test2 = size
        for i in range(self.splits):
            mask = np.full(X.shape[0], True, dtype = bool)
            index = np.arange(test1,test2)
            mask[index] = False
            test1 += size
            test2 += size
            indices.append([mask,~mask])
        return indices


## Testing our algorithm

In [None]:
from keras.datasets import mnist

(trainX, trainy), (testX, testy) = mnist.load_data()
X = np.concatenate([trainX, testX], axis=0)
y = np.concatenate([trainy, testy], axis=0)
X = X.reshape(X.shape[0], X.shape[1]*X.shape[2])

slp = SLP(10, types = 'online', alpha=0.001, max_iter=300, tol=0.001, activation='binsig')
cv = cross_validate(5)
scores = []

for train, test in cv.split(X):
    X_train, X_test = X[train], X[test]
    y_train, y_test = y[train], y[test]
    slp.fit(X_train, y_train)
    # y_pred = slp.predict(X_test)
    score = slp.score(X_test, y_test)
    scores.append(score)

scores

#### Q2. For this part you can use scikit (or any other pre-packaged library) and use their built-in implementation of multi-layered perceptron. Use the same dataset as above. Use 5-fold cross validation as above and compare your results with what you get with a single layered perceptron


In [1]:
import numpy as np
from tensorflow.keras.utils import to_categorical
from keras.datasets import mnist
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import KFold
from sklearn import metrics
from sklearn.preprocessing import OneHotEncoder

In [2]:
# Loading the Dataset and preprocessing
(trainX, trainy), (testX, testy) = mnist.load_data()

# Concatenating the two datasets given we plan on using k-Fold Cross Validation
X = np.concatenate([trainX, testX], axis=0)
y = np.concatenate([trainy, testy], axis=0)
print("X:",len(X),"y:",len(y))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
X: 70000 y: 70000


In [3]:
# Creating model
# Here we use logistic activation - Binary Sigmoid given how it returns a value between 0 and 1, which we want.

clf = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(10), max_iter=5000, tol=0.00001, activation='logistic')

In [4]:
# Preprocessing - Converting X to a 2D array
# X currently is a 3D array with first dimension being the no of examples,
# while the second and third dimensions are the two dimensions of pixels, making a 28x28 matrix
# So we flatten this array to a vector of size 784

# Thanks to this source which helped me understand why my code wasn't initially working: 
# https://medium.com/analytics-vidhya/multi-layer-perceptron-using-keras-on-mnist-dataset-for-digit-classification-problem-relu-a276cbf05e97

X = X.reshape(X.shape[0], X.shape[1]*X.shape[2])
X.shape

(70000, 784)

In [5]:
# Running 5 fold classification and checking score for each classifier

kf = KFold(n_splits=5)
scores = []
for train, test in kf.split(X):
    X_train, X_test = X[train], X[test]
    y_train, y_test = y[train], y[test]
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    # matrix = metrics.confusion_matrix(y_test, y_pred)
    score = clf.score(X_test, y_test)
    scores.append(score)

scores

[0.8924285714285715,
 0.8806428571428572,
 0.887,
 0.8877142857142857,
 0.9139285714285714]