In [1]:
import numpy as np 
import matplotlib.pyplot as plt
from keras.datasets import mnist
import math
from sklearn import preprocessing
from sklearn import metrics 
import seaborn as sns 
import sys

2021-11-11 11:30:31.427027: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/peshal/opencv/build/lib
2021-11-11 11:30:31.427065: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
np.random.seed(42)

In [3]:
(X_train, Y_train),(X_test,Y_test)=mnist.load_data()

In [4]:
def data_preprocessing(X,Y):
    X=X.astype(np.float32)
    X=X/255-0.5
    X=X.reshape((len(X),-1))
    lb = preprocessing.LabelBinarizer()
    lb.fit(Y)
    Y=lb.transform(Y) 
    return X,Y

In [5]:
X_train,Y_train=data_preprocessing(X_train,Y_train)
X_test,Y_test=data_preprocessing(X_test,Y_test)

In [6]:
class Layer:
    def __init__(self):
        pass
    def forward(self,input_):
        output=input_
        return output_
    def backward(self,input_,grad_output):
        output_=np.dot(np.transpose(grad_output),input_)/len(grad_output)
        return output_

In [46]:
class Relu(Layer):
    def __init__(self):
        pass
    def forward(self,input_):
        output_=np.where(input_>=0,input_,0)
        return output_
    def backward(self,input_,grad_output):
        output_=np.where(input_>=0,1,0)
        return output_*grad_output

class Sigmoid(Layer):
    def __init__(self):
        pass
    def forward(self,input_):
        output_=(1/(1+np.exp(-input_)))
        return output_
    def backward(self,input_,grad_output):
        output_=input_*(1-input_)
        return output_*grad_output

class Tanh(Layer):
    def __init__(self):
        pass
    def forward(self,input_):
        output_=(np.exp(input_)-np.exp(-input_))/(np.exp(input_)+np.exp(-input_))
        return output_
    def backward(self,input_,grad_output):
        output_=(1-input_**2)
        return output_*grad_output

class Softmax(Layer):
    def __init__(self):
        pass
    def forward(self,input_):
        exponential_of_input=np.exp(input_)
        sum_along_row=np.sum(exponential_of_input,axis=1)
        return np.divide(exponential_of_input,sum_along_row[:,None])
    def backward(self,input_,grad_output):
        return grad_output


In [47]:
class Dense(Layer):
    def __init__(self,input_nodes,output_nodes,learning_rate):
        self.input_nodes=input_nodes+1
        self.output_nodes=output_nodes
        self.weights=np.random.normal(loc=0.0, scale = np.sqrt(2/(output_nodes+self.input_nodes)), size = (output_nodes,self.input_nodes))
        self.learning_rate=learning_rate
        
    def forward(self,input_):
        input_new=np.ones((len(input_),len(input_[0])+1))
        input_new[:,1:]=np.copy(input_)
        output_=np.dot(input_new,np.transpose(self.weights))
        return output_

    def backward(self,input_,grad_output):
        input_new=np.ones((len(input_),len(input_[0])+1))
        input_new[:,1:]=np.copy(input_)
        grad_input=np.dot(grad_output,self.weights[:,1:])
        gradient=np.dot(np.transpose(grad_output),input_new)/len(grad_output)
        self.weights=self.weights-self.learning_rate*gradient
        return grad_input

In [48]:
def MSE(Yactual,Ypredict):
    Y_actual=np.copy(Yactual)
    Y_predict=np.copy(Ypredict)
    loss_func=(Y_actual-Y_predict)**2
    cost_func=np.sum(loss_func,axis=0)/len(loss_func)
    return np.sum(cost_func)/len(cost_func)

def MSE_gradient(Yactual,Ypredict):
    return (Ypredict-Yactual)

In [49]:
def categorical_cross_entropy(Yactual,Ypredict):
    cross_entropy=0
    Y_actual=np.argmax(Yactual,axis=1)
    for i in range(len(Ypredict)):
        j=Y_actual[i]
        p=Ypredict[i][j]
        if p<=0:
            p=0.00000000000001
        cross_entropy-=math.log(p)
    return cross_entropy/len(Ypredict)
    
def categorical_cross_entropy_gradient(Yactual,Ypredict):
    Y_actual=np.argmax(Yactual,axis=1)
    Y_predict=np.copy(Ypredict)
    for i in range(len(Y_predict)):
        j=Y_actual[i]
        Y_predict[i][j]=(Y_predict[i][j]-1)
    grad_output=Y_predict
    return grad_output

In [50]:
def cross_entropy(Yactual,Ypredict):
    Y_actual=np.copy(Yactual)
    Y_predict=np.copy(Ypredict)
    logistic_func=np.ones(Y_actual.shape)
    for i in range(len(Y_predict)):
        for j in range(0,len(Y_predict[0])):
            value1=Y_predict[i][j]
            value2=1-Y_predict[i][j]
            if value1<=0 :
                value1=0.00000000000001
            elif value2<=0 :
                value2=0.00000000000001
            logistic_func[i][j]=(-Y_actual[i][j]*math.log(value1)-(1-Y_actual[i][j])*math.log(value2))
    cost_func=np.sum(logistic_func,axis=0)/len(logistic_func)
    return np.sum(cost_func)/len(cost_func)


def cross_entropy_gradient(Yactual,Ypredict):
    Y_actual=np.copy(Yactual)
    Y_predict=np.copy(Ypredict)
    Y_actual=np.where(Y_actual==0,0.00000000001,Y_actual)
    Y_actual=np.where(Y_actual==1,0.99999999999,Y_actual)
    Y_predict=np.where(Y_predict==0,0.00000000001,Y_predict)
    Y_predict=np.where(Y_predict==1,0.99999999999,Y_predict)
    actual_diff=-((Y_actual-Y_predict)/(Y_predict*(1-Y_predict)))
    return actual_diff

In [51]:
# implementation of feed forward
def forward(network,input_):
    input_variable=np.copy(input_)
    activation=[input_]
    check_even_odd=0
    for l in network:
        input_variable=l.forward(input_variable)
        check_even_odd+=1
        if(check_even_odd%2==0):
            activation.append(input_variable)
    return activation
# implementation of back propagation 
def backward(network,input_,grad_output):
    activation_index=-1
    for i in range(0,len(network)):
        index=-(i+1)
        if index%2==0:
            activation_index-=1
        grad_output=network[index].backward(input_[activation_index],grad_output)

In [52]:
#training function 
def train(network,X_train,Y_train,epochs=100,batches=10):
    number_of_trainingset=int(len(X_train)/batches)
    for number_of_epochs in range(epochs):
        mean_error=0
        counts=[]
        # based on batch sizes images are divided into batches or groups and are trained accrodingly
        for batch in range(batches):
            start=0
            end=0;
            if (batch+1)*number_of_trainingset < len(X_train):
                start=int(batch*number_of_trainingset)
                end=int(start+number_of_trainingset)
            else:
                start=int(batch*number_of_trainingset)
                end=int(len(X_train))
            X_train_batch=X_train[start:end,:]
            Y_train_batch=Y_train[start:end,:]
            activation_output=forward(network=network,input_=X_train_batch)
            # Loss function
            mean_error=cross_entropy(Y_train_batch,activation_output[-1])
            # gradient of loss function
            grad_output=cross_entropy_gradient(Y_train_batch,activation_output[-1])
            # Backpropagation
            backward(network=network,input_=activation_output,grad_output=grad_output)
            Y_predict=np.argmax(activation_output[-1],axis=1)
            Y_train_batch_one_dimesional=np.argmax(Y_train_batch,axis=1)
            unique_value,counts=np.unique(Y_predict==Y_train_batch_one_dimesional,return_counts=True)
        print("error: "+mean_error.__str__()+"  accuracy : "+( counts[1]/np.sum(counts)*100).__str__()+"%")

In [53]:
# testing function 
def test(network,X_test,Y_test):
    activation_output=forward(network=network,input_=X_test)
    mean_error=cross_entropy(Y_test,activation_output[-1])
    Y_predict=np.argmax(activation_output[-1],axis=1)
    Y_test=np.argmax(Y_test,axis=1)
    unique_value,counts=np.unique(Y_predict==Y_test,return_counts=True)
    print("error: "+mean_error.__str__()+"  accuracy : "+( counts[1]/np.sum(counts)*100).__str__()+"%")
    cf_matrix=metrics.confusion_matrix(Y_test,Y_predict, labels=[0,1,2,3,4,5,6,7,8,9])
    print(metrics.classification_report(Y_test,Y_predict, labels=[0,1,2,3,4,5,6,7,8,9],zero_division=0))
    sns.set(rc={'figure.figsize':(11.7,8.27)})
    sns.heatmap(cf_matrix, annot=True)
    

In [86]:
learning_rate=2
#creating neural network model with sigmoid as activation function 
network=[]
network.append(Dense(input_nodes=784,output_nodes=15,learning_rate=learning_rate))
network.append(Sigmoid())
network.append(Dense(input_nodes=15,output_nodes=11,learning_rate=learning_rate))
network.append(Sigmoid())
network.append(Dense(input_nodes=11,output_nodes=10,learning_rate=learning_rate))
network.append(Sigmoid())

In [None]:
train(network=network,X_train=X_train,Y_train=Y_train)

In [None]:
test(network=network,X_test=X_test,Y_test=Y_test)