In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from keras.datasets import fashion_mnist
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split
import seaborn as sns

In [2]:
(x_train,y_train),(x_test,y_test)=fashion_mnist.load_data()
x_train = x_train/255
x_test = x_test/255

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [8]:

class Neural_network:
    def __init__(self,x_train,y_train,input_dim,hidden_layers_size,hidden_layers,output_dim,batch_size=32,epochs=1,activation_func="sigmoid"
           ,learning_rate=6e-3 ,decay_rate=0.9,beta=0.9,beta1=0.9,beta2=0.99,optimizer="nesterov",weight_init="random"):

        self.x_train,self.x_cv,self.y_train,self.y_cv = train_test_split(x_train, y_train, test_size=0.10, random_state=100,stratify=y_train)

        np.random.seed(10)
        self.gradient={}
        for i in range(hidden_layers+2):
            self.gradient["W"+str(i)]=i;
        self.input_dim = input_dim
        self.hidden_layers = hidden_layers
        self.hidden_layers_size = hidden_layers_size
        self.output_dim = output_dim

        self.batch = batch_size
        self.epochs = epochs
        self.activation_func = activation_func
        self.learning_rate = learning_rate
        self.decay_rate = decay_rate
        self.optimizer = optimizer
        for i in range(hidden_layers+2):
            self.gradient["b"+str(i)]=i;
        self.weight_init = weight_init
        self.beta = beta
        self.beta1 = beta1
        self.beta2 = beta2
        self.layers = [self.input_dim] + self.hidden_layers*[self.hidden_layers_size] + [self.output_dim]
        layers = self.layers.copy()
        self.activations = []
        self.activation_gradients = []
        #self.optimizer_list={'gradient_descent':self.gradient_descent,'sgd':self.sgd,'nesterov':self.nesterov,'nadam':self.nadam,'adam':self.adam,'momentum':self.momentum,'rmsprop':self.rmsprop}
        self.weights_gradients = []
        self.biases_gradients = []
        self.weights = []
        self.biases = []
        n=len(layers)
        for i in range(n-1):
            if self.weight_init == 'random':
                a=np.random.normal(0,0.5,(layers[i],layers[i+1]))
                self.weights.append(a)
                self.biases.append(np.random.normal(0,0.5,(layers[i+1])))
            else :
                std = np.sqrt(2/(layers[i]*layers[i+1]))
                a=np.random.normal(0,std,(layers[i],layers[i+1]))
                self.weights.append(a)
                self.biases.append(np.random.normal(0,std,(layers[i+1])))
            v1=np.zeros(layers[i])
            self.activations.append(v1)
            v2=np.zeros(layers[i+1])
            self.activation_gradients.append(v2)
            self.weights_gradients.append(np.zeros((layers[i],layers[i+1])))
            self.biases_gradients.append(v2)
        self.activations.append(np.zeros(layers[-1]))
        #self.optimizer_list[optimizer](self.x_train,self.y_train)
            

    def sigmoid(self,activations):
        res = []
        for z in activations:
            if z<-40:
                res.append(0.0)
            elif z>40:
                res.append(1.0)
            else:
                res.append(1/(1+np.exp(-z)))
        res=np.asarray(res)
        return res

    def tanh(self,activations):
        res = []
        for z in activations:
            if z<-20:
                res.append(-1.0)
            elif z>20:
                res.append(1.0)
            else:
                temp=(np.exp(z) - np.exp(-z))/(np.exp(z) + np.exp(-z))
                res.append(temp)
        res=np.asarray(res)
        return res

    def relu(self,activations):
        res = []
        for i in activations:
            if i>0:
                res.append(i)
            else:
                res.append(0)
        res=np.asarray(res)
        return res

    def softmax(self,activations):
        tot = 0
        res=[]
        for z in activations:
            tot += np.exp(z)
        res=np.asarray([np.exp(z)/tot for z in activations])
        return res

    def forward_propagation(self,x,y,weights,biases):
        n = len(self.layers)
        pre_activation=[]
        for i in range(n-2):
            pre_activation.append(i)
        self.activations[0] = x
        for i in range(n-2):
            if self.activation_func == "sigmoid":
                s=self.sigmoid(np.matmul(weights[i].T,self.activations[i])+biases[i])
                self.activations[i+1] =s
            elif self.activation_func == "tanh":
                t=self.tanh(np.matmul(weights[i].T,self.activations[i])+biases[i])
                self.activations[i+1] =t
            elif self.activation_func == "relu":
                r=self.relu(np.matmul(weights[i].T,self.activations[i])+biases[i])
                self.activations[i+1] = r
        temp=self.softmax(np.matmul(weights[n-2].T,self.activations[n-2])+biases[n-2])
        self.activations[n-1] = temp      
        return -(np.log2(self.activations[-1][y]))


    def grad_w(self,i):
        gw=np.matmul(self.activations[i].reshape((-1,1)),self.activation_gradients[i].reshape((1,-1)))
        return gw


    def grad_b(self,i):
        gb=self.activation_gradients[i]
        return gb

In [9]:
nn = Neural_network(x_train,y_train,784,64,2,10)

In [11]:
nn.forward_propagation(x_train[0].reshape(784,),y_train[0],nn.weights,nn.biases)

4.107910471974991

In [12]:
nn.activations[-1]

array([1.65284993e-02, 8.88077068e-02, 1.08257016e-03, 1.29130983e-02,
       6.06017162e-01, 5.44504811e-03, 3.38257565e-02, 1.77357286e-01,
       2.71822417e-05, 5.79956912e-02])