In [None]:
!pip install wandb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import wandb

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from keras.datasets import fashion_mnist
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split
import seaborn as sns

In [None]:
(x_train,y_train),(x_test,y_test)=fashion_mnist.load_data()
x_train = x_train/255
x_test = x_test/255

In [None]:

class Neural_network:
    def __init__(self,x_train,y_train,input_dim,hidden_layers_size,hidden_layers,output_dim,batch_size=32,epochs=1,activation_func="sigmoid"
           ,learning_rate=6e-3 ,decay_rate=0.9,beta=0.9,beta1=0.9,beta2=0.99,optimizer="nesterov",weight_init="random"):

        self.x_train,self.x_cv,self.y_train,self.y_cv = train_test_split(x_train, y_train, test_size=0.10, random_state=100,stratify=y_train)

        np.random.seed(10)
        self.gradient={}
        for i in range(hidden_layers+2):
            self.gradient["W"+str(i)]=i;
        self.input_dim = input_dim
        self.hidden_layers = hidden_layers
        self.hidden_layers_size = hidden_layers_size
        self.output_dim = output_dim

        self.batch = batch_size
        self.epochs = epochs
        self.activation_func = activation_func
        self.learning_rate = learning_rate
        self.decay_rate = decay_rate
        self.optimizer = optimizer
        for i in range(hidden_layers+2):
            self.gradient["b"+str(i)]=i;
        self.weight_init = weight_init
        self.beta = beta
        self.beta1 = beta1
        self.beta2 = beta2
        self.layers = [self.input_dim] + self.hidden_layers*[self.hidden_layers_size] + [self.output_dim]
        layers = self.layers.copy()
        self.activations = []
        self.activation_gradients = []
        self.optimizer_list={'gradient_descent':self.gradient_descent,'sgd':self.sgd,'nesterov':self.nesterov,'nadam':self.nadam,'adam':self.adam,'momentum':self.momentum,'rmsprop':self.rmsprop}
        self.weights_gradients = []
        self.biases_gradients = []
        self.weights = []
        self.biases = []
        n=len(layers)
        for i in range(n-1):
            if self.weight_init == 'random':
                a=np.random.normal(0,0.5,(layers[i],layers[i+1]))
                self.weights.append(a)
                self.biases.append(np.random.normal(0,0.5,(layers[i+1])))
            else :
                std = np.sqrt(2/(layers[i]*layers[i+1]))
                a=np.random.normal(0,std,(layers[i],layers[i+1]))
                self.weights.append(a)
                self.biases.append(np.random.normal(0,std,(layers[i+1])))
            v1=np.zeros(layers[i])
            self.activations.append(v1)
            v2=np.zeros(layers[i+1])
            self.activation_gradients.append(v2)
            self.weights_gradients.append(np.zeros((layers[i],layers[i+1])))
            self.biases_gradients.append(v2)
        self.activations.append(np.zeros(layers[-1]))
        self.optimizer_list[optimizer](self.x_train,self.y_train)
            

    def sigmoid(self,activations):
        res = []
        for z in activations:
            if z<-40:
                res.append(0.0)
            elif z>40:
                res.append(1.0)
            else:
                res.append(1/(1+np.exp(-z)))
        res=np.asarray(res)
        return res

    def tanh(self,activations):
        res = []
        for z in activations:
            if z<-20:
                res.append(-1.0)
            elif z>20:
                res.append(1.0)
            else:
                temp=(np.exp(z) - np.exp(-z))/(np.exp(z) + np.exp(-z))
                res.append(temp)
        res=np.asarray(res)
        return res

    def relu(self,activations):
        res = []
        for i in activations:
            if i>0:
                res.append(i)
            else:
                res.append(0)
        res=np.asarray(res)
        return res

    def softmax(self,activations):
        tot = 0
        res=[]
        for z in activations:
            tot += np.exp(z)
        res=np.asarray([np.exp(z)/tot for z in activations])
        return res

    def forward_propagation(self,x,y,weights,biases):
        n = len(self.layers)
        pre_activation=[]
        for i in range(n-2):
            pre_activation.append(i)
        self.activations[0] = x
        for i in range(n-2):
            if self.activation_func == "sigmoid":
                s=self.sigmoid(np.matmul(weights[i].T,self.activations[i])+biases[i])
                self.activations[i+1] =s
            elif self.activation_func == "tanh":
                t=self.tanh(np.matmul(weights[i].T,self.activations[i])+biases[i])
                self.activations[i+1] =t
            elif self.activation_func == "relu":
                r=self.relu(np.matmul(weights[i].T,self.activations[i])+biases[i])
                self.activations[i+1] = r
        temp=self.softmax(np.matmul(weights[n-2].T,self.activations[n-2])+biases[n-2])
        self.activations[n-1] = temp      
        return -(np.log2(self.activations[-1][y]))


    def grad_w(self,i):
        gw=np.matmul(self.activations[i].reshape((-1,1)),self.activation_gradients[i].reshape((1,-1)))
        return gw


    def grad_b(self,i):
        gb=self.activation_gradients[i]
        return gb


    def backward_propagation(self,x,y,weights,biases):
        y_onehot = np.zeros(self.output_dim)
        y_onehot[y] = 1
        self.activation_gradients[-1] =  -1*(y_onehot - self.activations[-1])
        n = len(self.layers)
        for i in range(n-2,-1,-1):
            gw=self.grad_w(i)
            self.weights_gradients[i] += gw
            gb= self.grad_b(i)
            self.biases_gradients[i] +=gb
            if i!=0:
                val1=self.activation_gradients[i]
                value = np.matmul(weights[i],val1)
                if self.activation_func == "sigmoid":
                    val= value * self.activations[i] * (1-self.activations[i])
                    self.activation_gradients[i-1] = val
                elif self.activation_func == "tanh":
                    val=value * (1-np.square(self.activations[i]))
                    self.activation_gradients[i-1] = val
                elif self.activation_func == "relu":
                    res = []
                    for k in self.activations[i]:
                        ans=1.0 if k>0 else 0.0
                        res.append(ans)
                    res = np.asarray(res)
                    self.activation_gradients[i-1] = value * res

    def gradient_descent(self,x_train,y_train):
        grads=[]
        for i in (self.weights_gradients):
            grads.append(i)
        for i in range(self.epochs):
            print('Epoch---',i+1,end=" ")
            loss = 0
            val_loss = 0
            wg=[]
            for i in (self.weights_gradients):
                wg.append(0*i)
            self.weights_gradients = wg
            bg=[]
            for i in (self.biases_gradients):
              bg.append(0*i)
            self.biases_gradients =bg
            index = 1
            for x,y in zip(x_train,y_train):
                x = x.ravel()
                val=self.forward_propagation(x,y,self.weights,self.biases)
                loss += val
                self.backward_propagation(x,y,self.weights,self.biases)
                temp=index % self.batch
                if temp == 0 or index == x_train.shape[0]:
                    n=len(self.weights)
                    for j in range(n):
                        w_g=self.learning_rate * self.weights_gradients[j]
                        self.weights[j] -= w_g
                        b_g=self.learning_rate * self.biases_gradients[j]
                        self.biases[j] -= b_g
                    wg=[]
                    for i in (self.weights_gradients):
                      wg.append(0*i)
                    self.weights_gradients = wg
                    bg=[]
                    for i in (self.biases_gradients):
                      bg.append(0*i)
                    self.biases_gradients =bg
                index += 1 
            for x,y in zip(self.x_cv,self.y_cv):
               x=x.ravel()
               temp=self.forward_propagation(x,y,self.weights,self.biases)
               val_loss+=temp
            temp1=self.calculate_accuracy(x_train,y_train)
            acc=round(temp1,3)
            temp2=self.calculate_accuracy(self.x_cv,self.y_cv)
            val_acc=round(temp2,3)
            print('  loss = ',loss/x_train.shape[0],'  accuracy = ',acc,'   validation loss= ',val_loss/self.x_cv.shape[0],'  validation accuaracy= ',val_acc)

    def sgd(self,x_train,y_train):
        grads=[]
        for i in (self.weights_gradients):
            grads.append(i)
        t=self.epochs
        for i in range(t):
            print('Epoch---',i+1,end=" ")
            loss = 0
            val_loss=0
            index = 1
            for x,y in zip(x_train,y_train):
                x = x.ravel()
                val=self.forward_propagation(x,y,self.weights,self.biases)
                loss += val
                self.backward_propagation(x,y,self.weights,self.biases)
                temp=index % self.batch
                if  temp== 0 or index == x_train.shape[0]:
                    lst=[0*i for i in (self.weights_gradients)]
                    for j in range(len(self.weights)):
                        temp=self.learning_rate * self.weights_gradients[j]
                        self.weights[j] -= temp
                        self.biases[j] -= self.learning_rate * self.biases_gradients[j]
                    wg=[]
                    for i in (self.weights_gradients):
                      wg.append(0*i)
                    self.weights_gradients = wg
                    bg=[]
                    for i in (self.biases_gradients):
                      bg.append(0*i)
                    self.biases_gradients =bg
                index +=1   
            for x,y in zip(self.x_cv,self.y_cv):
               x=x.ravel()
               temp=self.forward_propagation(x,y,self.weights,self.biases)
               val_loss+=temp
            cal_acc=self.calculate_accuracy(x_train,y_train)
            acc=round(cal_acc,3)
            cal_acc_cv=self.calculate_accuracy(self.x_cv,self.y_cv)
            val_acc=round(cal_acc_cv,3)
            wandb.log({'train_loss':loss/x_train.shape[0],'train_accuracy':acc,'val_loss':val_loss/self.x_cv.shape[0],'val_accuracy':val_acc})
            print('  loss = ',loss/x_train.shape[0],'  accuracy = ',acc,'   validation loss= '
                  ,val_loss/self.x_cv.shape[0],'  validation accuaracy= ',val_acc)
            
    def momentum(self,x_train,y_train):
        prev_gradients_w=[]
        temp1=[]
        for i in (self.weights_gradients):
            temp1.append(0*i)
        prev_gradients_w=temp1
        prev_gradients_b=[]
        temp2=[]
        for i in (self.biases_gradients):
            temp2.append(0*i)
        prev_gradients_b=temp2
        n=self.epochs

        for i in range(n):
            print('Epoch---',i+1,end=" ")
            loss = 0
            val_loss=0
            wg=[]
            for i in (self.weights_gradients):
              wg.append(0*i)
            self.weights_gradients = wg
            bg=[]
            for i in (self.biases_gradients):
              bg.append(0*i)
            self.biases_gradients=bg
            index = 1
            for x,y in zip(x_train,y_train):
                x = x.ravel()
                val=self.forward_propagation(x,y,self.weights,self.biases)
                loss += val
                self.backward_propagation(x,y,self.weights,self.biases)
                temp=index % self.batch
                if  temp== 0 or index == x_train.shape[0]:
                    lst=[0*i for i in (self.weights_gradients)]
                    for j in range(len(self.weights)):
                        v1=self.learning_rate * self.weights_gradients[j]
                        v_w =(self.decay_rate * prev_gradients_w[j] +v1)
                        v2= self.learning_rate * self.biases_gradients[j]
                        v_b = (self.decay_rate * prev_gradients_b[j] + v2)
                        self.weights[j] -= v_w
                        self.biases[j] -= v_b
                        prev_gradients_w[j] = v_w
                        prev_gradients_b[j] = v_b
                    wg=[]
                    for i in (self.weights_gradients):
                      wg.append(0*i)
                    self.weights_gradients = wg
                    bg=[]
                    for i in (self.biases_gradients):
                      bg.append(0*i)
                    self.biases_gradients=bg
                index +=1
            for x,y in zip(self.x_cv,self.y_cv):
               x=x.ravel()
               val=self.forward_propagation(x,y,self.weights,self.biases)
               val_loss+=val

            cal_acc=self.calculate_accuracy(x_train,y_train)
            acc=round(cal_acc,3)
            cal_acc_cv=self.calculate_accuracy(self.x_cv,self.y_cv)
            val_acc=round(cal_acc_cv,3)
            wandb.log({'train_loss':loss/x_train.shape[0],'train_accuracy':acc,'val_loss':val_loss/self.x_cv.shape[0],'val_accuracy':val_acc})
            print('  loss = ',loss/x_train.shape[0],'  accuracy = ',acc,'   validation loss= '
                  ,val_loss/self.x_cv.shape[0],'  validation accuaracy= ',val_acc)


    def nesterov(self,x_train,y_train):
        prev_gradients_w=[]
        temp1=[]
        for i in (self.weights_gradients):
            temp1.append(0*i)
        prev_gradients_w=temp1
        prev_gradients_b=[]
        temp2=[]
        for i in (self.biases_gradients):
            temp2.append(0*i)
        prev_gradients_b=temp2

        n=self.epochs
        for i in range(n):
            print('Epoch---',i+1,end=" ")
            loss = 0
            val_loss=0
            for j in range(len(self.weights)):
              temp=self.weights[j] -  (self.decay_rate * prev_gradients_w[j])
              self.weights[j]=temp
              self.biases[j] =self.biases[j] -  self.decay_rate * prev_gradients_b[j]
            wg=[]
            for i in (self.weights_gradients):
              wg.append(0*i)
            self.weights_gradients = wg
            bg=[]
            for i in (self.biases_gradients):
              bg.append(0*i)
            self.biases_gradients=bg
            index = 1
            for x,y in zip(x_train,y_train):
                x = x.ravel()
                val=self.forward_propagation(x,y,self.weights,self.biases)
                loss += val
                self.backward_propagation(x,y,self.weights,self.biases)
                temp=index % self.batch
                if temp == 0 or index == x_train.shape[0]:
                    lst=[0*i for i in (self.weights_gradients)]
                    for j in range(len(self.weights)):
                        temp1=self.decay_rate * prev_gradients_w[j] + self.learning_rate*self.weights_gradients[j]
                        prev_gradients_w[j] =temp1
                        temp2= self.decay_rate * prev_gradients_b[j] + self.learning_rate*self.biases_gradients[j]               
                        prev_gradients_b[j] =  temp2
                                        
                        self.weights[j] -= prev_gradients_w[j]
                        self.biases[j] -= prev_gradients_b[j]
                    weights = [self.weights[j] -  self.decay_rate * prev_gradients_w[j] for j in range(len(self.weights))]
                    biases = [self.biases[j] -  self.decay_rate * prev_gradients_b[j] for j in range(len(self.biases))]
                    wg=[]
                    for i in (self.weights_gradients):
                       wg.append(0*i)
                    self.weights_gradients = wg
                    bg=[]
                    for i in (self.biases_gradients):
                      bg.append(0*i)
                    self.biases_gradients=bg
                index += 1
            for x,y in zip(self.x_cv,self.y_cv):
               x=x.ravel()
               val=self.forward_propagation(x,y,self.weights,self.biases)
               val_loss+=val
            cal_acc=self.calculate_accuracy(x_train,y_train)
            acc=round(cal_acc,3)
            cal_acc_cv=self.calculate_accuracy(self.x_cv,self.y_cv)
            val_acc=round(cal_acc_cv,3)
            wandb.log({'train_loss':loss/x_train.shape[0],'train_accuracy':acc,'val_loss':val_loss/self.x_cv.shape[0],'val_accuracy':val_acc})
            print('  loss = ',loss/x_train.shape[0],'  accuracy = ',acc,'   validation loss= '
                  ,val_loss/self.x_cv.shape[0],'  validation accuaracy= ',val_acc)
            
    def rmsprop(self,x_train,y_train):
        prev_gradients_w=[]
        temp1=[]
        for i in (self.weights_gradients):
            temp1.append(0*i)
        prev_gradients_w=temp1
        prev_gradients_b=[]
        temp2=[]
        for i in (self.biases_gradients):
            temp2.append(0*i)
        prev_gradients_b=temp2
        eps = 1e-2
        n=self.epochs
        for i in range(n):
            print('Epoch---',i+1,end=" ")
            loss = 0
            val_loss=0
            wg=[]
            for i in (self.weights_gradients):
              wg.append(0*i)
            self.weights_gradients = wg
            bg=[]
            for i in (self.biases_gradients):
              bg.append(0*i)
            self.biases_gradients=bg 
            index = 1
            for x,y in zip(x_train,y_train):
                x = x.ravel()
                val=self.forward_propagation(x,y,self.weights,self.biases)
                loss += val
                self.backward_propagation(x,y,self.weights,self.biases)
                condt=index%self.batch
                if condt == 0 or index == x_train.shape[0]:
                    for j in range(len(self.weights)):
                        t1=(1-self.beta) * np.square(self.weights_gradients[j])
                        v_w = (self.beta * prev_gradients_w[j] +t1)
                        t2=(1-self.beta) * np.square(self.biases_gradients[j])
                        v_b = (self.beta * prev_gradients_b[j] +t2)
                        denom_w=(self.weights_gradients[j] /(np.sqrt(v_w + eps)))
                        self.weights[j] -= self.learning_rate * denom_w
                        denom_b=(self.biases_gradients[j] /(np.sqrt(v_b + eps)))
                        self.biases[j] -= self.learning_rate * denom_b
                        prev_gradients_w[j] = v_w
                        prev_gradients_b[j] = v_b
                    wg=[]
                    for i in (self.weights_gradients):
                      wg.append(0*i)
                    self.weights_gradients=wg
                    bg=[]
                    for i in (self.biases_gradients):
                      bg.append(0*i)
                    self.biases_gradients=bg
                index +=1
            for x,y in zip(self.x_cv,self.y_cv):
               x=x.ravel()
               val=self.forward_propagation(x,y,self.weights,self.biases)
               val_loss+=val

            cal_acc=self.calculate_accuracy(x_train,y_train)
            acc=round(cal_acc,3)
            cal_acc_cv=self.calculate_accuracy(self.x_cv,self.y_cv)
            val_acc=round(cal_acc_cv,3)
            wandb.log({'train_loss':loss/x_train.shape[0],'train_accuracy':acc,'val_loss':val_loss/self.x_cv.shape[0],'val_accuracy':val_acc})
            print('  loss = ',loss/x_train.shape[0],'  accuracy = ',acc,'   validation loss= '
                  ,val_loss/self.x_cv.shape[0],'  validation accuaracy= ',val_acc)


    def adam(self,x_train,y_train):
        m_prev_gradients_w=[]
        temp1=[]
        for i in (self.weights_gradients):
            temp1.append(0*i)
        m_prev_gradients_w=temp1
        m_prev_gradients_b=[]
        temp2=[]
        for i in (self.biases_gradients):
            temp2.append(0*i)
        m_prev_gradients_b=temp2

        v_prev_gradients_w=[]
        temp3=[]
        for i in (self.weights_gradients):
            temp3.append(0*i)
        v_prev_gradients_w=temp3
        v_prev_gradients_b=[]
        temp4=[]
        for i in (self.biases_gradients):
            temp4.append(0*i)
        v_prev_gradients_b=temp4
        iter = 1
        n=self.epochs
        for i in range(n):
            print('Epoch---',i+1,end=" ")
            loss = 0
            val_loss=0
            eps = 1e-2
            wg=[]
            for i in (self.weights_gradients):
              wg.append(0*i)
            self.weights_gradients = wg
            bg=[]
            for i in (self.biases_gradients):
              bg.append(0*i)
            self.biases_gradients=bg 
            index = 1
            for x,y in zip(x_train,y_train):
                x = x.ravel()
                val=self.forward_propagation(x,y,self.weights,self.biases)
                loss +=val 
                self.backward_propagation(x,y,self.weights,self.biases)
                condt=index%self.batch
                if condt == 0 or index == x_train.shape[0]:
                    s=len(self.weights)
                    for j in range(s):
                        p1=(1-self.beta1) * self.weights_gradients[j]
                        m_w = (self.beta1 * m_prev_gradients_w[j]) + p1
                        p2=(1-self.beta1) * self.biases_gradients[j]
                        m_b = (self.beta1 * m_prev_gradients_b[j]) + p2
                        p3=(1-self.beta2) * np.square(self.weights_gradients[j])
                        v_w = (self.beta2 * v_prev_gradients_w[j]) + p3
                        p4=(1-self.beta2) * np.square(self.biases_gradients[j])
                        v_b = (self.beta2 * v_prev_gradients_b[j]) + p4
                        denom1=(1-(self.beta1)**iter)
                        m_hat_w = (m_w)/ denom1
                        m_hat_b = (m_b)/denom1
                        denom2=(1-(self.beta2)**iter)
                        v_hat_w = (v_w)/ denom2
                        v_hat_b = (v_b)/denom2
                        t1=(m_hat_w/(np.sqrt(v_hat_w + eps)))
                        self.weights[j] -= self.learning_rate * t1
                        t2=(m_hat_b/(np.sqrt(v_hat_b + eps)))
                        self.biases[j] -= self.learning_rate * t2
                        v1=m_prev_gradients_w[j]
                        m_prev_gradients_w[j] = m_w
                        m_prev_gradients_b[j] = m_b
                        v2=v_prev_gradients_w[j]
                        v_prev_gradients_w[j] = v_w
                        v_prev_gradients_b[j] = v_b
                        wg=[]
                        for i in (self.weights_gradients):
                           wg.append(0*i)
                        self.weights_gradients = wg
                        bg=[]
                        for i in (self.biases_gradients):
                          bg.append(0*i)
                        self.biases_gradients=bg
                    iter += 1
                index +=1
            for x,y in zip(self.x_cv,self.y_cv):
               x=x.ravel()
               val=self.forward_propagation(x,y,self.weights,self.biases)
               val_loss+=val
            cal_acc=self.calculate_accuracy(x_train,y_train)
            acc=round(cal_acc,3)
            cal_acc_cv=self.calculate_accuracy(self.x_cv,self.y_cv)
            val_acc=round(cal_acc_cv,3)
            wandb.log({'train_loss':loss/x_train.shape[0],'train_accuracy':acc,'val_loss':val_loss/self.x_cv.shape[0],'val_accuracy':val_acc})
            print('  loss = ',loss/x_train.shape[0],'  accuracy = ',acc,'   validation loss= '
                  ,val_loss/self.x_cv.shape[0],'  validation accuaracy= ',val_acc)
        

    def nadam(self,x_train,y_train):
        m_prev_gradients_w=[]
        temp1=[]
        for i in (self.weights_gradients):
            temp1.append(0*i)
        m_prev_gradients_w=temp1
        m_prev_gradients_b=[]
        temp2=[]
        for i in (self.biases_gradients):
            temp2.append(0*i)
        m_prev_gradients_b=temp2

        v_prev_gradients_w=[]
        temp3=[]
        for i in (self.weights_gradients):
            temp3.append(0*i)
        v_prev_gradients_w=temp3
        v_prev_gradients_b=[]
        temp4=[]
        for i in (self.biases_gradients):
            temp4.append(0*i)
        v_prev_gradients_b=temp4
        iter = 1
        n=self.epochs
        for i in range(n):
            print('Epoch---',i+1,end=" ")
            loss = 0
            val_loss=0
            eps = 1e-2
            wg=[]
            for i in (self.weights_gradients):
              wg.append(0*i)
            self.weights_gradients = wg
            bg=[]
            for i in (self.biases_gradients):
              bg.append(0*i)
            self.biases_gradients=bg 
            index = 1
            for x,y in zip(x_train,y_train):
                x = x.ravel()
                val=self.forward_propagation(x,y,self.weights,self.biases)
                loss += val
                self.backward_propagation(x,y,self.weights,self.biases)
                condt=index % self.batch
                if condt == 0 or index == x_train.shape[0]:
                    s=len(self.weights)
                    for j in range(s):
                        p1=(1-self.beta1) * self.weights_gradients[j]
                        m_w = (self.beta1 * m_prev_gradients_w[j]) + p1
                        p2=(1-self.beta1) * self.biases_gradients[j]
                        m_b = (self.beta1 * m_prev_gradients_b[j]) + p2
                        p3=(1-self.beta2) * np.square(self.weights_gradients[j])
                        v_w = (self.beta2 * v_prev_gradients_w[j]) + p3
                        p4=(1-self.beta2) * np.square(self.biases_gradients[j])
                        v_b = (self.beta2 * v_prev_gradients_b[j]) + p4
                        denom1=(1-(self.beta1)**iter)
                        m_hat_w = (m_w)/ denom1
                        m_hat_b = (m_b)/denom1
                        denom2=(1-(self.beta2)**iter)
                        v_hat_w = (v_w)/ denom2
                        v_hat_b = (v_b)/denom2
                        t3=(1-self.beta1) * self.weights_gradients[j]
                        m_dash_w = self.beta1 * m_hat_w + t3
                        t4=(1-self.beta1) * self.biases_gradients[j]
                        m_dash_b = self.beta1 * m_hat_b + t4
                        t1=(m_dash_w/(np.sqrt(v_hat_w + eps)))
                        self.weights[j] -= self.learning_rate * t1
                        t2=(m_dash_b/(np.sqrt(v_hat_b + eps)))
                        self.biases[j] -= self.learning_rate * t2
                        v1=m_prev_gradients_w[j]
                        m_prev_gradients_w[j] = m_w
                        v2=m_prev_gradients_b[j]
                        m_prev_gradients_b[j] = m_b
                        v_prev_gradients_w[j] = v_w
                        v_prev_gradients_b[j] = v_b
                        wg=[]
                        for i in (self.weights_gradients):
                           wg.append(0*i)
                        self.weights_gradients = wg
                        bg=[]
                        for i in (self.biases_gradients):
                          bg.append(0*i)
                        self.biases_gradients=bg
                    iter += 1
                index +=1
            for x,y in zip(self.x_cv,self.y_cv):
               x=x.ravel()
               val=self.forward_propagation(x,y,self.weights,self.biases)
               val_loss+=val
            cal_acc=self.calculate_accuracy(x_train,y_train)
            acc=round(cal_acc,3)
            cal_acc_cv=self.calculate_accuracy(self.x_cv,self.y_cv)
            val_acc=round(cal_acc_cv,3)
            wandb.log({'train_loss':loss/x_train.shape[0],'train_accuracy':acc,'val_loss':val_loss/self.x_cv.shape[0],'val_accuracy':val_acc})
            print('  loss = ',loss/x_train.shape[0],'  accuracy = ',acc,'   validation loss= '
                  ,val_loss/self.x_cv.shape[0],'  validation accuaracy= ',val_acc)
    
    def calculate_accuracy(self,X,Y):
        count = 0
        n=len(X)
        for i in range(n):
            if self.predict(X[i]) == Y[i]:
                count+=1
        res=count/n
        return res

    def predict(self,x):
        n=len(self.layers)
        x = x.ravel()
        self.activations[0] = x
        for i in range(n-2):
            if self.activation_func == "sigmoid":
                val=self.sigmoid(np.matmul(self.weights[i].T,self.activations[i])+self.biases[i])
                self.activations[i+1] = val
            elif self.activation_func == "tanh":
                val=self.tanh(np.matmul(self.weights[i].T,self.activations[i])+self.biases[i])
                self.activations[i+1] = val
            elif self.activation_func == "relu":
                val=self.relu(np.matmul(self.weights[i].T,self.activations[i])+self.biases[i])
                self.activations[i+1] = val

        self.activations[n-1] = self.softmax(np.matmul(self.weights[n-2].T,self.activations[n-2])+self.biases[n-2])

        return np.argmax(self.activations[-1])

In [None]:
sweep_config={
    'method': 'bayes',
    'metric': {
        'name': 'val_accuracy',
        'goal': 'maximize'
    },
    'parameters':{
        'epochs':{
            'values':[3,5,7,10]
        },
        'weight_decay':{
            'values':[0.1,0.0,0.8,0.9]
        },
        'batch_size':{
            'values':[32,64,128]
        },
        'learning_rate':{
            'values':[5e-3,2e-3,6e-3,5e-4]
        },
        'hidden_layers':{
            'values':[1,2,3]
        },
        'optimizer':{
            'values':['sgd','momentum','nesterov','adam','rmsprop','nadam']
        },
        'hidden_layers_size':{
            'values':[16,32,64]
        },
        'activation':{
            'values':['sigmoid','tanh','relu']
        },
        'weight_init':{
            'values':['random','xavier']
        }
    }
}

In [None]:
def train(): 
    res=[]
    for i in range(10):
      res.append(0);
    config_defaults={
      'epochs':5,
      'batch_size':16,
      'learning_rate':1e-3,
      'activation':'relu',
      'optimizer':'nadam',
      'hidden_layers_size':32,
      'hidden_layers':3,
      'weight_init':'xavier' }
    
    
    
    wandb.init(project = 'DL_Assignment1' , config=config_defaults)
    config=wandb.config
    wandb.run.name = 'op_{}_act_{}_lr_{}_layer_{}_bth_{}'.format(config.optimizer,config.activation,config.learning_rate,config.hidden_layers,config.batch_size)

    Neural_network(x_train,y_train,len(x_train[0].ravel()),config.hidden_layers_size,config.hidden_layers,max(y_train)+1,
                       config.batch_size,config.epochs,config.activation,
                 config.learning_rate,config.weight_decay,0.9,0.9,0.99
                        ,config.optimizer,config.weight_init)

In [None]:
sweep_id = wandb.sweep(sweep_config,project="a1_collab", entity="cs22m069")

Create sweep with ID: smdt8wfp
Sweep URL: https://wandb.ai/cs22m069/a1_collab/sweeps/smdt8wfp


In [None]:
wandb.agent(sweep_id,train,count = 5)

[34m[1mwandb[0m: Agent Starting Run: 73qm6qtj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_layers_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.006
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier
[34m[1mwandb[0m: Currently logged in as: [33mcs22m069[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch--- 1   loss =  3.233579227279614   accuracy =  0.378    validation loss=  3.222621707702519   validation accuaracy=  0.374
Epoch--- 2   loss =  3.2186653632373714   accuracy =  0.386    validation loss=  3.21722277383914   validation accuaracy=  0.383
Epoch--- 3   loss =  3.2148855692845495   accuracy =  0.39    validation loss=  3.214658437509127   validation accuaracy=  0.387
Epoch--- 4   loss =  3.212933774020543   accuracy =  0.391    validation loss=  3.213144495488344   validation accuaracy=  0.389
Epoch--- 5   loss =  3.211672411813361   accuracy =  0.393    validation loss=  3.21209942008972   validation accuaracy=  0.391
Epoch--- 6   loss =  3.2107573257749524   accuracy =  0.395    validation loss=  3.2113164539997996   validation accuaracy=  0.393
Epoch--- 7   loss =  3.2100483126569412   accuracy =  0.396    validation loss=  3.2106975670074793   validation accuaracy=  0.394


VBox(children=(Label(value='0.001 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.523884…

0,1
train_accuracy,▁▄▆▆▇██
train_loss,█▄▂▂▁▁▁
val_accuracy,▁▄▆▆▇██
val_loss,█▅▃▂▂▁▁

0,1
train_accuracy,0.396
train_loss,3.21005
val_accuracy,0.394
val_loss,3.2107


[34m[1mwandb[0m: Agent Starting Run: chx7c8hb with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_layers_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.006
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch--- 1   loss =  3.2603192038542974   accuracy =  0.103    validation loss=  3.2531188077657403   validation accuaracy=  0.102
Epoch--- 2   loss =  3.250716608459228   accuracy =  0.104    validation loss=  3.2497041237497197   validation accuaracy=  0.104
Epoch--- 3   loss =  3.248275895373287   accuracy =  0.109    validation loss=  3.2479299015386913   validation accuaracy=  0.111


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train_accuracy,▁▂█
train_loss,█▂▁
val_accuracy,▁▃█
val_loss,█▃▁

0,1
train_accuracy,0.109
train_loss,3.24828
val_accuracy,0.111
val_loss,3.24793


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9c94zhxd with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 2
[34m[1mwandb[0m: 	hidden_layers_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weight_init: random


Epoch--- 1   loss =  2.579216802399296   accuracy =  0.732    validation loss=  1.2452753791951696   validation accuaracy=  0.722
Epoch--- 2   loss =  1.0349468457685094   accuracy =  0.779    validation loss=  0.9842813962749339   validation accuaracy=  0.771
Epoch--- 3   loss =  0.8775525235473229   accuracy =  0.803    validation loss=  0.8797170852044014   validation accuaracy=  0.792
Epoch--- 4   loss =  0.8005389231793493   accuracy =  0.815    validation loss=  0.8206188140497328   validation accuaracy=  0.802
Epoch--- 5   loss =  0.7501892958099687   accuracy =  0.824    validation loss=  0.7816635092368283   validation accuaracy=  0.813
Epoch--- 6   loss =  0.7135122369483275   accuracy =  0.831    validation loss=  0.7506809575266029   validation accuaracy=  0.821
Epoch--- 7   loss =  0.6848822102096442   accuracy =  0.837    validation loss=  0.7287889783077691   validation accuaracy=  0.825
Epoch--- 8   loss =  0.6618466161836251   accuracy =  0.84    validation loss=  0.71

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train_accuracy,▁▄▅▆▇▇▇███
train_loss,█▂▂▂▁▁▁▁▁▁
val_accuracy,▁▄▅▆▇▇▇███
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
train_accuracy,0.848
train_loss,0.62665
val_accuracy,0.835
val_loss,0.68429


[34m[1mwandb[0m: Agent Starting Run: 48w0ku0h with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_layers_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch--- 1   loss =  37.198882051837366   accuracy =  0.591    validation loss=  16.67666517588   validation accuaracy=  0.591
Epoch--- 2   loss =  13.181829432703783   accuracy =  0.672    validation loss=  11.265379009198567   validation accuaracy=  0.671
Epoch--- 3   loss =  9.722038349202181   accuracy =  0.705    validation loss=  9.161490686844195   validation accuaracy=  0.708
Epoch--- 4   loss =  8.128663165100058   accuracy =  0.723    validation loss=  8.039594337752954   validation accuaracy=  0.722
Epoch--- 5   loss =  7.069094040387443   accuracy =  0.736    validation loss=  7.294501503053387   validation accuaracy=  0.734
Epoch--- 6   loss =  6.2889971701063   accuracy =  0.748    validation loss=  6.702065939985256   validation accuaracy=  0.737
Epoch--- 7   loss =  5.705162292064604   accuracy =  0.76    validation loss=  6.292271516791289   validation accuaracy=  0.744


0,1
train_accuracy,▁▄▆▆▇██
train_loss,█▃▂▂▁▁▁
val_accuracy,▁▅▆▇███
val_loss,█▄▃▂▂▁▁

0,1
train_accuracy,0.76
train_loss,5.70516
val_accuracy,0.744
val_loss,6.29227


[34m[1mwandb[0m: Agent Starting Run: u16jk1us with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	hidden_layers: 2
[34m[1mwandb[0m: 	hidden_layers_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: 	weight_init: random


Epoch--- 1   loss =  16.00787044575227   accuracy =  0.628    validation loss=  5.1851300276211   validation accuaracy=  0.623
Epoch--- 2   loss =  3.9780494396150217   accuracy =  0.7    validation loss=  3.2838425478140256   validation accuaracy=  0.693
Epoch--- 3   loss =  2.803557401720445   accuracy =  0.733    validation loss=  2.5659712101733714   validation accuaracy=  0.725
Epoch--- 4   loss =  2.293663614783741   accuracy =  0.754    validation loss=  2.219787454100948   validation accuaracy=  0.746
Epoch--- 5   loss =  1.9971209312868314   accuracy =  0.768    validation loss=  1.992894226303337   validation accuaracy=  0.761
Epoch--- 6   loss =  1.7905732537034094   accuracy =  0.777    validation loss=  1.8097494442721784   validation accuaracy=  0.772
Epoch--- 7   loss =  1.6296774749298886   accuracy =  0.784    validation loss=  1.6721680263023202   validation accuaracy=  0.777


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train_accuracy,▁▄▆▇▇██
train_loss,█▂▂▁▁▁▁
val_accuracy,▁▄▆▇▇██
val_loss,█▄▃▂▂▁▁

0,1
train_accuracy,0.784
train_loss,1.62968
val_accuracy,0.777
val_loss,1.67217
