In [11]:
import cloudpickle as pickle
import numpy as np
import pandas as pd
import math
from sklearn.model_selection import train_test_split
from sklearn import datasets, linear_model
import scipy.sparse
import copy
import random
import matplotlib 
import matplotlib.pyplot as plt

In [12]:
def load_mnist(data_file="mnist.data", test_size=0.10, random_state=0):
    mnist = pickle.load(open(data_file, "rb"))
    return train_test_split(mnist['data'], mnist['target'], test_size=test_size,
                            random_state=random_state)

In [13]:
def load(file_name):
    file_name = "mnist.data"
    return load_mnist(data_file=file_name, test_size=0.2, random_state=42)


In [14]:
#COST FUNCTION

def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))


def sigmoid_prime(z):
    return z * (1 - z)


def softmax(z):
    z -= np.max(z)
    sm = (np.exp(z).T / np.sum(np.exp(z), axis=1)).T
    return sm


def softmax_prime(z):
    return


def relu(z):
    return np.maximum(z, 0)


def relu_prime(z):
    dz = np.ones_like(z)
    dz[z < 0] = 0
    return dz



In [15]:
#WEIGHTS INITIALIZATION FUNCTION
def relu_weight(m, n):
    np.random.seed(0)
    return np.random.rand(m, n) * np.sqrt(2) / np.sqrt(m)


def xavier(m, n):
    np.random.seed(0)
    return np.random.rand(m, n) / np.sqrt(m)


def he(m, n):
    np.random.seed(0)
    return np.random.rand(m, n) * np.sqrt(2) / np.sqrt(m + n)



In [16]:
class Layers(object):

    def __init__(self, n_in, n_out=10, activation_function="relu"):
        self.n_in = n_in
        self.n_out = n_out
        self.set_activation_functions(act_function_name=activation_function)

    def set_activation_functions(self, act_function_name="relu"):
        if act_function_name == "relu":
            self.activation_function = relu
            self.function = relu_prime
            self.set_weight_function(weight_name="he")
        elif act_function_name == "sigmoid":
            self.activation_function = sigmoid
            self.function = sigmoid_prime
            self.set_weight_function(weight_name="xavier")
        elif act_function_name == "softmax":
            self.activation_function = softmax
            self.function = softmax_prime
            self.set_weight_function(weight_name="he")

    def set_weight_function(self, weight_name):
        if weight_name == "relu":
            self.weight_function = relu_weight
        elif weight_name == "xavier":
            self.weight_function = xavier
        elif weight_name == "he":
            self.weight_function = he


In [17]:
class Neural_Network(object):
    
    def __init__(self, n_in=784, n_out=10, l_rate=0.1):

        self.n_in = n_in
        self.n_out = n_out
        self.initial_lrate = l_rate
        self.l_rate = l_rate

        self.weights = []
        self.biases = []
        self.previous_weights = []
        self.previous_biases = []
        self.layers = []
        self.losses = []
        

    def layer(self, activation_function="relu", n_neurons=4):
        if len(self.layers) <= 0:
            n_previous_neurons = self.n_in
        else:
            n_previous_neurons = self.layers[-1].n_out

        L = Layers(n_in=n_previous_neurons, n_out=n_neurons, activation_function=activation_function)
        self.layers.append(L)
        
    
    def backpropogation_weights(self):
        self.previous_weights.append(self.weights)
        self.previous_biases.append(self.biases)
    
    
    def forward_propagation(self, X):
        a = [X]
        for l in range(len(self.layers)):
            z = a[l].dot(self.weights[l]) + self.biases[l]
            activation = self.layers[l].activation_function(z)
            a.append(activation)

        return a
    
    def backpropagation(self, x,y_mat,a):
        m = x.shape[0]
        output = a[-1]
        
        loss = (-1 / m) * np.sum(y_mat * np.log(output))
        
        deltas = []
        delta = y_mat - output
        deltas.append(delta)
        
        for l in range(len(self.layers)-1):
            prime = self.layers[-2 - l].function(a[-2 - l])
            w = self.weights[-1-l]
            delta = np.dot(delta, w.T) * prime
            deltas.append(delta)
        
        prev_weights = self.previous_weights.pop(0)
        prev_biases = self.previous_biases.pop(0)

        
        for l in range(len(self.layers)-1):
            dw = (2/m) * np.dot(a[l].T,deltas[-1-l])
            self.weights[l] += self.l_rate * dw
            
            db = (1/m) * np.sum(deltas[-1-l], axis=0, keepdims=True)
            self.biases[l] += self.l_rate * db
   
        self.backpropogation_weights()
        return loss
    
    
    def initialize_weights(self):
        for i in range(len(self.layers)):
            n_cur_layer_neurons = self.layers[i].n_out
            n_prev_layer_neurons = self.layers[i].n_in

            weights = self.layers[i].weight_function(n_prev_layer_neurons, n_cur_layer_neurons)
            self.weights.append(weights)

            biases = np.zeros((1, n_cur_layer_neurons))
            self.biases.append(biases)

    def train(self, x, y, n_epoch=10000):
        
        self.initialize_weights()
        self.backpropogation_weights()
        
        y_mat = self.oneHotIt(y)
        
        for i in range(n_epoch):
            a = self.forward_propagation(x)
            loss = self.backpropagation(x,y_mat,a)
                       
            if i%1000==0:
                print('Iteration: {0}  --  Loss: {1}'.format(i,loss))
                self.losses.append([i,loss])
            
    #Encode Target Label IDs to one hot vector of size m where m is the number of unique labels
    def oneHotIt(self, Y):
        m = Y.shape[0]
        label = scipy.sparse.csr_matrix((np.ones(m), (Y, np.array(range(m)))))
        label = np.array(label.todense()).T
        return label
    
    def Predicted_value(self, x):
        probs = self.forward_propagation(x)[-1]
        preds = np.argmax(probs,axis=1)      
        return probs,preds

    def Accuracy(self, x,y):
        prob,predicted_val = self.Predicted_value(x)
        accuracy = sum(predicted_val == y)/(float(len(y)))
        percentage = accuracy*100
        return percentage
    
    def loss_graph(self):
        errors = np.array(self.losses)
        plt.plot(errors[:, 0], errors[:, 1], 'r--')
        plt.title("(MNIST data) Loss vs Epoch (Learning rate = 0.1)")
        plt.xlabel("Epochs")
        plt.ylabel("Loss")
        plt.show()
        


In [None]:
# mnist data set
train_img, test_img, train_lbl, test_lbl = load(file_name="mnist.data")
    
X_train = train_img[:1000] / 255.
y_train = train_lbl[:1000].astype(int)
X_test = test_img[:100] / 255.
y_test = test_lbl[:100].astype(int)

n_in = X_train.shape[1]

nn = Neural_Network(n_in=n_in, n_out=10)

#Randomly assigning activation functions to the hidden layers
#List = ["relu", "sigmoid"]

#activation_func = random.choice(List)
nn.layer(activation_function = "relu", n_neurons=80)
nn.layer(activation_function= "sigmoid", n_neurons=30)
nn.layer(activation_function="softmax", n_neurons=10)

nn.train(X_train, y_train, n_epoch=1000000)

train_accuracy = nn.Accuracy(X_train,y_train)
test_accuracy = nn.Accuracy(X_test,y_test)


Iteration: 0  --  Loss: 2.3446453510897127
Iteration: 1000  --  Loss: 2.3445033146973207
Iteration: 2000  --  Loss: 2.3365099513521788
Iteration: 3000  --  Loss: 2.2973107344388857
Iteration: 4000  --  Loss: 2.188965270203473
Iteration: 5000  --  Loss: 2.1308601673673477
Iteration: 6000  --  Loss: 2.0797318712518362
Iteration: 7000  --  Loss: 2.0617118088804403
Iteration: 8000  --  Loss: 2.0517631904424256
Iteration: 9000  --  Loss: 2.0442793006363567
Iteration: 10000  --  Loss: 2.0296736352116325
Iteration: 11000  --  Loss: 2.0207333550847557
Iteration: 12000  --  Loss: 2.0146353805204016
Iteration: 13000  --  Loss: 1.9889714170790358
Iteration: 14000  --  Loss: 1.9826319363324205
Iteration: 15000  --  Loss: 1.9785517604277758
Iteration: 16000  --  Loss: 1.9752583185358348
Iteration: 17000  --  Loss: 1.9502953191914139
Iteration: 18000  --  Loss: 1.933426569118058
Iteration: 19000  --  Loss: 1.9289386622132751
Iteration: 20000  --  Loss: 1.9262288735762496
Iteration: 21000  --  Loss: 

Iteration: 174000  --  Loss: 1.679626252206125
Iteration: 175000  --  Loss: 1.679417867686571
Iteration: 176000  --  Loss: 1.6791580091105474
Iteration: 177000  --  Loss: 1.6789474104006412
Iteration: 178000  --  Loss: 1.6787467554162727
Iteration: 179000  --  Loss: 1.6785876566040756
Iteration: 180000  --  Loss: 1.6784344268324827
Iteration: 181000  --  Loss: 1.6782891931526562
Iteration: 182000  --  Loss: 1.6758145979528274
Iteration: 183000  --  Loss: 1.6652778326487925
Iteration: 184000  --  Loss: 1.6642279605149257
Iteration: 185000  --  Loss: 1.6636723038710262
Iteration: 186000  --  Loss: 1.663220266186211
Iteration: 187000  --  Loss: 1.6627964298977562
Iteration: 188000  --  Loss: 1.6624288486565943
Iteration: 189000  --  Loss: 1.6621153234166086
Iteration: 190000  --  Loss: 1.661839172460695
Iteration: 191000  --  Loss: 1.6615305026354814
Iteration: 192000  --  Loss: 1.6612896662211774
Iteration: 193000  --  Loss: 1.6610846643440746
Iteration: 194000  --  Loss: 1.6608605667908

Iteration: 346000  --  Loss: 1.6505865250341198
Iteration: 347000  --  Loss: 1.6505327337433677
Iteration: 348000  --  Loss: 1.6505054105169696
Iteration: 349000  --  Loss: 1.65048621823733
Iteration: 350000  --  Loss: 1.6504684656806816
Iteration: 351000  --  Loss: 1.6504471711072743
Iteration: 352000  --  Loss: 1.6504263882808563
Iteration: 353000  --  Loss: 1.6504092946078466
Iteration: 354000  --  Loss: 1.6503933082980493
Iteration: 355000  --  Loss: 1.6503761121929934
Iteration: 356000  --  Loss: 1.6503555844573297
Iteration: 357000  --  Loss: 1.6503359388225765
Iteration: 358000  --  Loss: 1.650283606747492
Iteration: 359000  --  Loss: 1.6502569862294325
Iteration: 360000  --  Loss: 1.650239571966855
Iteration: 361000  --  Loss: 1.650220622512632
Iteration: 362000  --  Loss: 1.6501927830595429
Iteration: 363000  --  Loss: 1.6501676137818087
Iteration: 364000  --  Loss: 1.6501496915886038
Iteration: 365000  --  Loss: 1.650134219329742
Iteration: 366000  --  Loss: 1.650119772852224

Iteration: 518000  --  Loss: 1.6476234541950538
Iteration: 519000  --  Loss: 1.6476103064025525
Iteration: 520000  --  Loss: 1.6476004250744076
Iteration: 521000  --  Loss: 1.647592504830633
Iteration: 522000  --  Loss: 1.647585758582177
Iteration: 523000  --  Loss: 1.6475797552749551
Iteration: 524000  --  Loss: 1.6475742751288287
Iteration: 525000  --  Loss: 1.6475691864453583
Iteration: 526000  --  Loss: 1.6475644025551814
Iteration: 527000  --  Loss: 1.6475598680793002
Iteration: 528000  --  Loss: 1.6475555388537186
Iteration: 529000  --  Loss: 1.6475513741299632
Iteration: 530000  --  Loss: 1.64754732590922
Iteration: 531000  --  Loss: 1.6475432968906039
Iteration: 532000  --  Loss: 1.6475388275376048
Iteration: 533000  --  Loss: 1.6474894008014778
Iteration: 534000  --  Loss: 1.6474709755805281
Iteration: 535000  --  Loss: 1.647465011353433
Iteration: 536000  --  Loss: 1.6474604585394994
Iteration: 537000  --  Loss: 1.6474564744103148
Iteration: 538000  --  Loss: 1.64745280003302

Iteration: 690000  --  Loss: 1.6460344658966173
Iteration: 691000  --  Loss: 1.6460274576561096
Iteration: 692000  --  Loss: 1.6460192642204028
Iteration: 693000  --  Loss: 1.6460137796384027
Iteration: 694000  --  Loss: 1.6459928916579376
Iteration: 695000  --  Loss: 1.6459649911744914
Iteration: 696000  --  Loss: 1.645951275773155
Iteration: 697000  --  Loss: 1.6459408541530993
Iteration: 698000  --  Loss: 1.6459335201703658
Iteration: 699000  --  Loss: 1.6459284902122469
Iteration: 700000  --  Loss: 1.6459239172526572
Iteration: 701000  --  Loss: 1.6459109485639387
Iteration: 702000  --  Loss: 1.6458888789067783
Iteration: 703000  --  Loss: 1.645881011212623
Iteration: 704000  --  Loss: 1.645868978579821
Iteration: 705000  --  Loss: 1.6458631707056173
Iteration: 706000  --  Loss: 1.6458589611645007
Iteration: 707000  --  Loss: 1.6458554275378778
Iteration: 708000  --  Loss: 1.6458522816933507
Iteration: 709000  --  Loss: 1.6458493930642872
Iteration: 710000  --  Loss: 1.645846679259

Iteration: 862000  --  Loss: 1.6219839823026612
Iteration: 863000  --  Loss: 1.6219793847051682
Iteration: 864000  --  Loss: 1.6219429684609465
Iteration: 865000  --  Loss: 1.6219062143208038
Iteration: 866000  --  Loss: 1.6218976197068737
Iteration: 867000  --  Loss: 1.6218915050654472
Iteration: 868000  --  Loss: 1.6218863817617122
Iteration: 869000  --  Loss: 1.621881806980642
Iteration: 870000  --  Loss: 1.6218775812436672
Iteration: 871000  --  Loss: 1.6218735972228837
Iteration: 872000  --  Loss: 1.6218697781805023
Iteration: 873000  --  Loss: 1.6218660614487042
Iteration: 874000  --  Loss: 1.621862407877711
Iteration: 875000  --  Loss: 1.6218587890653189
Iteration: 876000  --  Loss: 1.621855187410631
Iteration: 877000  --  Loss: 1.6218516030999062
Iteration: 878000  --  Loss: 1.6218480352495093
Iteration: 879000  --  Loss: 1.6218444716253584
Iteration: 880000  --  Loss: 1.6218408610049486
Iteration: 881000  --  Loss: 1.6218370682828966
Iteration: 882000  --  Loss: 1.621832523639

In [None]:
print('Training Accuracy: {0:0.2f} %'.format(train_accuracy))
print('Test Accuracy: {0:0.2f} %'.format(test_accuracy))
nn.loss_graph()