Install required packages

In [None]:
!pip install wandb

Import libraries

In [None]:
import numpy as np
from keras.datasets import fashion_mnist
import wandb 
from sklearn.model_selection import train_test_split

In [None]:
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit: ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

Loss functions, activation functions and their derivatives

In [None]:
def _positive_sigmoid(x):
    return 1 / (1 + np.exp(-x))
def _negative_sigmoid(x):
    exp = np.exp(x)
    return exp / (exp + 1)

def sig(x):
    positive = x >= 0
    negative = ~positive
    result = np.empty_like(x)
    result[positive] = _positive_sigmoid(x[positive])
    result[negative] = _negative_sigmoid(x[negative])
    return result

def d_sig(x):
    s = sig(x)
    return (1 - s) * s

def tanh(x):
    return np.tanh(x)

def d_tanh(x):
    return 1 - np.square(np.tanh(x))

def relu(z):
    return (z>0)*(z) + ((z<0)*(z)*0.01)

def der_relu(z):
    return (z>0)*np.ones(z.shape) + (z<0)*(0.01*np.ones(z.shape) )

def softmax(x):
    mx = np.max(x)
    z = x - mx + 500.0
    e_x = np.exp(z)
    return e_x / e_x.sum()

def soft_derv(y, x):     # y = target o/p (one-hot), x = preactivation value
    yh =  softmax(x)
    return (yh-y)
    
# Loss Functions 
def cross_enloss(y, yh):
    i = np.argmax(y)
    esp = 1e-300  
    loss = -np.log(yh[i]+esp)
    return loss[0]

def mse(y, yh):
    diff = np.subtract(y,yh)
    squared_diff = np.square(diff)
    return squared_diff.mean()

In [None]:
#-- Helper Functions --------------------------
def oneHotEnc(Y, num_class):
    Yenc = np.zeros((num_class, int(Y.shape[0])))
    for i in range(Y.shape[0]):
        val = Y[i]
        Yenc[int(val)][i] = 1.0
    return Yenc

#### The layer class : initialize and maintain each layers' parameters

In [None]:
# The layer class
class Layer:

    actFun = {
        'tanh': (tanh, d_tanh),
        'sigmoid': (sig, d_sig),
        'softmax': (softmax, soft_derv),
        'relu': (relu, der_relu)
        }
    

    def __init__(self, inputs, neurons, weight_initializer, activation, op=False):
        self.op = op
        self.W = np.random.randn(neurons, inputs)
        if weight_initializer == 'xavier':
            self.W = self.W/np.sqrt(inputs)
        self.b = np.random.randn(neurons,1)
        self.f, self.der = self.actFun.get(activation)
    
    def feed(self, inps):
        pA = np.matmul(self.W,inps) + self.b
        A = self.f(pA)
        return A
    
    def feedforward(self, inps):
        self.inps = inps
        self.preAct = np.matmul(self.W, self.inps) + self.b
        self.Act = self.f(self.preAct)
        return self.Act
    
    def calc_grad(self, delA, y = np.array([[]]) ):
        if self.op == True:
            delPA = self.der(y, self.preAct)
        else:
            delPA = np.multiply(self.der(self.preAct), delA)
        
        delPA = delPA.reshape(len(delPA), 1)
        dW = np.matmul(delPA, self.inps.T)
        db = delPA
        dA_prev = np.matmul(self.W.T, delPA) #derivative of error w.r.t to activations at previous layer
        
        return dW, db, dA_prev

####The network class : to initialize a neural network and use various optimizers

In [None]:
class network:

    def __init__(self, n_layers, neurons, actfn, weight_initializer, X, Y, Xval, Yval):
        self.X = X
        self.Y = Y
        self.Xval = Xval
        self.Yval = Yval
        
        # initialize all the layers of the network
        self.layers = [ Layer( X.shape[0], neurons, weight_initializer, actfn) ]
        for i in range(1, n_layers):
            if i == (n_layers-1):
                self.layers.append( Layer(neurons, Y.shape[0], weight_initializer, 'softmax',True) )  #set layers of network
            else:
                self.layers.append( Layer(neurons, neurons, weight_initializer, actfn) )  #set layers of network

    # mini-batch gradient descent
    def batch(self, epochs=10, bat_size=16, loss_type = 'cross', l2 = 0, eta=0.01):
        losses = []
        for epoch in range(epochs):
            loss = 0.0
            DW = []     #List of W_grads of all layer
            DB = []     #List of b_grads of all layer
            for layer in self.layers:
                DW.append(np.zeros((layer.W.shape[0], layer.W.shape[1] )) )
                DB.append(np.zeros((layer.b.shape[0],1)) )  

            data_points = 0
            for x,y in zip(self.X.transpose(),self.Y.transpose()):
                #feedforward
                y = y.reshape(len(y),1)
                yh = x.reshape(len(x),1)
                
                for layer in self.layers:
                    yh = layer.feedforward(yh)
                
                delA = yh

                # gradient calculation
                for i in range(len(self.layers)-1, -1, -1):                 
                    if (self.layers[i].op==True):
                        dW,db,delA = self.layers[i].calc_grad(delA, y)
                    else:
                        dW,db,delA = self.layers[i].calc_grad(delA)

                    DW[i] = DW[i]+dW
                    DB[i] = DB[i]+db

                data_points += 1
                # mini-batch update ----------------------------------------
                if(data_points%bat_size==0):
                    for i in range(len(self.layers)-1, -1, -1):
                        nw = (eta*DW[i])/bat_size
                        nb = (eta*DB[i])/bat_size
                        self.layers[i].W =  (1-eta*l2)*self.layers[i].W - nw
                        self.layers[i].b =  self.layers[i].b - nb           
                    DW.clear()
                    DB.clear()
                    for layer in self.layers:
                        DW.append(np.zeros((layer.W.shape[0], layer.W.shape[1] )) )
                        DB.append(np.zeros((layer.b.shape[0],1)) ) 
            
            
            vacc, vloss = self.metric(self.Xval,self.Yval,loss_type)
            acc, loss = self.metric(self.X,self.Y,loss_type)
            losses.append(loss)
            wandb.log({"epoch": epoch+1, 
                       "loss": loss,
                       "accuracy": acc,
                       "val_error": vloss,
                       "val_accuracy": vacc})
            
            print("%d Complete. Loss = %f, acc = %f total loss=%f" %(epoch+1, loss, acc, vloss))

        print("Update complete. Total epochs = %d" %(epochs,))
        return losses
        
    # stochastic gradient descent    
    def sgd(self, epochs, loss_type = 'cross', l2 = 0, eta = 0.01):
        losses = []
        for epoch in range(epochs):
            loss = 0.0
            for x,y in zip(self.X.transpose(),self.Y.transpose()):
                #feedforward----------------------------------
                y = y.reshape(len(y),1)
                yh = x.reshape(len(x),1)
                for layer in self.layers:
                    yh = layer.feedforward(yh)
                
                delA = yh
                #Backpropagation---------------------------
                for layer in reversed(self.layers):
                    if(layer.op==True):
                        dW,db,delA = layer.calc_grad(delA, y)
                    else:
                        dW,db,delA = layer.calc_grad(delA)
                    # updates for each data point
                    layer.W = (1-eta*l2)*layer.W - (dW*eta)
                    layer.b = layer.b - (db*eta)

            vacc, vloss = self.metric(self.Xval,self.Yval,loss_type)
            acc, loss = self.metric(self.X,self.Y,loss_type)
            losses.append(loss)
            wandb.log({"epoch": epoch+1, 
                       "loss": loss,
                       "accuracy": acc,
                       "val_error": vloss,
                       "val_accuracy": vacc})
            
            print("%d Complete. Loss = %f, acc = %f total loss=%f" %(epoch+1, loss, acc, vloss))

        print("Update complete. Total epochs = %d" %(epochs,))
        return losses

    def momentum(self, epochs = 10, bat_size = 16, loss_type = 'cross', l2 = 0, eta = 0.01, gamma = 0.9):

        losses = []
        VW = []           #momentum parameter for W
        VB = []           #Momentum parameter for b
        data_points = 0
        DW = []     #List of W_grads of all layer
        DB = []     #List of b_grads of all layer
        for layer in self.layers:
            VW.append(np.zeros((layer.W.shape[0], layer.W.shape[1] )) )
            VB.append(np.zeros((layer.b.shape[0],1)))
            DW.append(np.zeros((layer.W.shape[0], layer.W.shape[1] )) )
            DB.append(np.zeros((layer.b.shape[0],1)) ) 

        for epoch in range(epochs):
            loss = 0.0
            for x,y in zip(self.X.transpose(),self.Y.transpose()):
                #feedforward ----------------------------------
                data_points += 1
                y = y.reshape(len(y),1)
                yh = x.reshape(len(x),1)
                for layer in self.layers:
                    yh = layer.feedforward(yh)

                delA = yh

                # gradient calculation ----------------------------
                for i in range(len(self.layers)-1, -1, -1):                   
                    if(self.layers[i].op==True):
                        dW,db,delA = self.layers[i].calc_grad(delA, y)
                    else:
                        dW,db,delA = self.layers[i].calc_grad(delA)

                    DW[i] = DW[i]+dW
                    DB[i] = DB[i]+db
                       
                # MomentumGD update ----------------------------------------
                if(data_points==bat_size):
                    data_points=0
                    for i in range(len(self.layers)-1, -1, -1):
                        VW[i] = (gamma*VW[i])+ ((eta*DW[i])/bat_size)
                        VB[i] = (gamma*VB[i])+ ((eta*DB[i])/bat_size)
                        self.layers[i].W =  (1-eta*l2)*self.layers[i].W - VW[i]
                        self.layers[i].b =  self.layers[i].b - VB[i]
                    DW.clear()
                    DB.clear()
                    for layer in self.layers:
                        DW.append(np.zeros((layer.W.shape[0], layer.W.shape[1] )) )
                        DB.append(np.zeros((layer.b.shape[0],1)) ) 


            vacc,vloss = self.metric(self.Xval,self.Yval,loss_type)
            acc,loss = self.metric(self.X,self.Y,loss_type)
            losses.append(loss)
            wandb.log({"epoch": epoch+1, 
                       "loss": loss,
                       "accuracy": acc,
                       "val_error": vloss,
                       "val_accuracy": vacc})
            
            print("%d Complete. Loss = %f, acc = %f total loss=%f" %(epoch+1, loss, acc, vloss))

        print("Update complete. Total epochs = %d" %(epochs,))
        return losses
    
    # nasterov accelerated gradient descent
    def nag(self, epochs = 10, bat_size = 16, loss_type = 'cross', l2 = 0, eta = 0.01, gamma = 0.9):
        losses = []
        pVW = []          # prev momentum parameter for W
        pVB = []          # prev Momentum parameter for b
        VW = []           # momentum parameter for W
        VB = []           # Momentum parameter for b
        W_old =[]
        b_old = []
        for layer in self.layers:
            VW.append(np.zeros((layer.W.shape[0], layer.W.shape[1] )) )
            VB.append(np.zeros((layer.b.shape[0],1)))
            pVW.append(np.zeros((layer.W.shape[0], layer.W.shape[1] )) )
            pVB.append(np.zeros((layer.b.shape[0],1)))
            W_old.append(np.zeros((layer.W.shape[0], layer.W.shape[1] )) )
            b_old.append(np.zeros((layer.b.shape[0],1)))

        data_points = 0
        DW = []     #List of W_grads of all layer
        DB = []     #List of b_grads of all layer
        for i in range(len(self.layers)):
            DW.append(np.zeros((self.layers[i].W.shape[0], self.layers[i].W.shape[1] )) )
            DB.append(np.zeros((self.layers[i].b.shape[0],1)) )
            W_old[i] = self.layers[i].W
            b_old[i] = self.layers[i].b

        for epoch in range(epochs):
            loss = 0.0
            for x,y in zip(self.X.transpose(),self.Y.transpose()):
                #feedforward ----------------------------------
                data_points += 1
                y = y.reshape(len(y),1)
                yh = x.reshape(len(x),1)
                
                for layer in self.layers:
                    yh = layer.feedforward(yh)

                delA = yh

                # gradient calculation ----------------------------
                for i in range(len(self.layers)-1, -1, -1):                   
                    if(self.layers[i].op==True):
                        dW,db,delA = self.layers[i].calc_grad(delA, y)
                    else:
                        dW,db,delA = self.layers[i].calc_grad(delA)

                    DW[i] = DW[i]+dW
                    DB[i] = DB[i]+db
                       
                # NAG updates ----------------------------------------
                if(data_points==bat_size):
                    data_points=0
                    for i in range(len(self.layers)-1, -1, -1):
                        VW[i] = (gamma*pVW[i])+ ((eta*DW[i])/bat_size)
                        VB[i] = (gamma*pVB[i])+ ((eta*DB[i])/bat_size)
                        
                        self.layers[i].W =  (1-eta*l2)*W_old[i] - gamma*pVW[i]
                        self.layers[i].b =  b_old[i] - gamma*pVB[i]
                        
                        W_old[i] =  W_old[i] - VW[i]
                        b_old[i] =  b_old[i] - VB[i]
                    
                    pVW = VW
                    pVB = VB
                        
                        
                    DW.clear()
                    DB.clear()
                    for layer in self.layers:
                        DW.append(np.zeros((layer.W.shape[0], layer.W.shape[1] )) )
                        DB.append(np.zeros((layer.b.shape[0],1)) )

            vacc,vloss = self.metric(self.Xval,self.Yval,loss_type)
            acc,loss = self.metric(self.X,self.Y,loss_type)
            losses.append(loss)
            wandb.log({"epoch": epoch+1, 
                       "loss": loss,
                       "accuracy": acc,
                       "val_error": vloss,
                       "val_accuracy": vacc})
            
            print("%d Complete. Loss = %f, acc = %f total loss=%f" %(epoch+1, loss, acc, vloss))
            
        for i in range(len(self.layers)):
            self.layers[i].W =  W_old[i]
            self.layers[i].b =  b_old[i]
            
        print("Update complete. Total epochs = %d" %(epochs,))
        return losses

    # RMSProp
    def rmsprop(self, epochs = 10, batch_size = 16, loss_type = 'cross', l2 = 0, eta = 0.01, beta = 0.9, epsilon = 1e-8):
            
        epsW = []
        epsB = []
        nW = []
        nB = []
        losses = []
        DW = []     #List of W_grads of all layer
        DB = []     #List of b_grads of all layer
        seen = 0

        for layer in self.layers:
            epsW.append(np.full((layer.W.shape[0], layer.W.shape[1]),epsilon))
            epsB.append(np.full((layer.b.shape[0],1),epsilon))
            nW.append(np.zeros((layer.W.shape[0], layer.W.shape[1])))
            nB.append(np.zeros((layer.b.shape[0],1)))
            DW.append(np.zeros((layer.W.shape[0], layer.W.shape[1])))
            DB.append(np.zeros((layer.b.shape[0],1)))

        for epoch in range(epochs):
            loss = 0.0
            for x,y in zip(self.X.transpose(),self.Y.transpose()):
                seen += 1
                #feedforward
                y = y.reshape(len(y),1)
                yh = x.reshape(len(x),1)
                
                for layer in self.layers:
                    yh = layer.feedforward(yh)
               
                delA = yh

                #gradient calculation
                for i in range(len(self.layers)-1, -1, -1):                 
                    if (self.layers[i].op==True):
                        dW,db,delA = self.layers[i].calc_grad(delA, y)
                    else:
                        dW,db,delA = self.layers[i].calc_grad(delA)

                    DW[i] = DW[i]+dW
                    DB[i] = DB[i]+db

                if (seen == batch_size):
                    DW[i] = DW[i]/batch_size
                    DB[i] = DB[i]/batch_size
                    seen = 0
                    #rmsprop update
                    for i in range(len(self.layers)-1, -1, -1):
                        nW[i] = (beta*nW[i]) + (1 - beta)*(np.square(DW[i]))
                        nB[i] = (beta*nB[i]) + (1 - beta)*(np.square(DB[i]))

                        self.layers[i].W =  (1-eta*l2)*self.layers[i].W - np.multiply((eta/np.sqrt(nW[i] + epsW[i])),DW[i])
                        self.layers[i].b =  self.layers[i].b - np.multiply((eta/np.sqrt(nB[i] + epsB[i])),DB[i]) 
                    DW.clear()
                    DB.clear()
                    for layer in self.layers:
                        DW.append(np.zeros((layer.W.shape[0], layer.W.shape[1] )))
                        DB.append(np.zeros((layer.b.shape[0],1))) 

            vacc,vloss = self.metric(self.Xval,self.Yval,loss_type)
            acc, loss = self.metric(self.X,self.Y,loss_type)
            losses.append(loss)
            '''wandb.log({"epoch": epoch+1, 
                       "loss": loss,
                       "accuracy": acc,
                       "val_error": vloss,
                       "val_accuracy": vacc})'''
            
            print("%d Complete. Loss = %f, acc = %f total loss=%f" %(epoch+1, loss, acc, vloss))

        print("Update complete. Total epochs = %d" %(epochs,))
        return losses

    #Adam 
    def adam(self, epochs = 10, batch_size = 16, loss_type = 'cross', l2 = 0, eta = 0.01, beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8):
        epsW = []
        epsB = []
        vW = []
        vB = []
        mW = []
        mB = []
        losses = []
        DW = []     #List of W_grads of all layer
        DB = []     #List of b_grads of all layer
        seen = 0

        for layer in self.layers:
            epsW.append(np.full((layer.W.shape[0], layer.W.shape[1]),epsilon))
            epsB.append(np.full((layer.b.shape[0],1),epsilon))
            mW.append(np.zeros((layer.W.shape[0], layer.W.shape[1])))
            mB.append(np.zeros((layer.b.shape[0],1)))
            vW.append(np.zeros((layer.W.shape[0], layer.W.shape[1])))
            vB.append(np.zeros((layer.b.shape[0],1)))
            DW.append(np.zeros((layer.W.shape[0], layer.W.shape[1])))
            DB.append(np.zeros((layer.b.shape[0],1)))

        for epoch in range(epochs):
            loss = 0.0
            for x,y in zip(self.X.transpose(),self.Y.transpose()):
                seen += 1
                #feedforward
                y = y.reshape(len(y),1)
                yh = x.reshape(len(x),1)
                
                for layer in self.layers:
                    yh = layer.feedforward(yh)
                
                delA = yh

                #gradient calculation
                for i in range(len(self.layers)-1, -1, -1):                 
                    if (self.layers[i].op==True):
                        dW,db,delA = self.layers[i].calc_grad(delA, y)
                    else:
                        dW,db,delA = self.layers[i].calc_grad(delA)

                    DW[i] = DW[i]+dW
                    DB[i] = DB[i]+db

                if (seen == batch_size):
                    seen = 0
                    DW[i] = DW[i]/batch_size
                    DB[i] = DB[i]/batch_size
                    #adam update
                    f1 = 1 - (beta1**(epoch+1))
                    f2 = 1 - (beta2**(epoch+1))
                    for i in range(len(self.layers)-1, -1, -1):
                        mW[i] = (beta1 * mW[i]) + (1 - beta1)*DW[i]
                        mB[i] = (beta1 * mB[i]) + (1 - beta1)*DB[i]
                        vW[i] = (beta2 * vW[i]) + (1 - beta2)*(np.square(DW[i]))
                        vB[i] = (beta2 * vB[i]) + (1 - beta2)*(np.square(DB[i]))
                        
                        self.layers[i].W =  (1-eta*l2)*self.layers[i].W - np.multiply((eta/np.sqrt((vW[i] / f2) + epsW[i])),mW[i]/ f1)
                        self.layers[i].b =  self.layers[i].b - np.multiply((eta/np.sqrt((vB[i]/ f2) + epsB[i])),mB[i]/ f1) 
                    DW.clear()
                    DB.clear()
                    for layer in self.layers:
                        DW.append(np.zeros((layer.W.shape[0], layer.W.shape[1] )))
                        DB.append(np.zeros((layer.b.shape[0],1))) 


            vacc, vloss = self.metric(self.Xval,self.Yval,loss_type)
            acc, loss = self.metric(self.X,self.Y,loss_type)
            losses.append(loss)
            wandb.log({"epoch": epoch+1, 
                       "loss": loss,
                       "accuracy": acc,
                       "val_error": vloss,
                       "val_accuracy": vacc})
            
            print("%d Complete. Loss = %f, acc = %f total loss=%f" %(epoch+1, loss, acc, vloss))

        print("Update complete. Total epochs = %d" %(epochs,))
        return losses

    #Nadam
    def nadam(self, epochs = 10, batch_size = 16, loss_type = 'cross', l2 = 0, eta = 0.01, beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8):    
        epsW = []
        epsB = []
        vW = []
        vB = []
        mW = []
        mB = []
        losses = []
        DW = []     #List of W_grads of all layer
        DB = []     #List of b_grads of all layer
        seen = 0

        for layer in self.layers:
            epsW.append(np.full((layer.W.shape[0], layer.W.shape[1]),epsilon))
            epsB.append(np.full((layer.b.shape[0],1),epsilon))
            mW.append(np.zeros((layer.W.shape[0], layer.W.shape[1])))
            mB.append(np.zeros((layer.b.shape[0],1)))
            vW.append(np.zeros((layer.W.shape[0], layer.W.shape[1])))
            vB.append(np.zeros((layer.b.shape[0],1)))
            DW.append(np.zeros((layer.W.shape[0], layer.W.shape[1])))
            DB.append(np.zeros((layer.b.shape[0],1)))

        for epoch in range(epochs):
            loss2 = 0.0
            for x,y in zip(self.X.transpose(),self.Y.transpose()):
                seen += 1
                #feedforward
                y = y.reshape(len(y),1)
                yh = x.reshape(len(x),1)
                
                for layer in self.layers:
                    yh = layer.feedforward(yh)

                delA = yh

                #gradient calculation
                for i in range(len(self.layers)-1, -1, -1):                 
                    if (self.layers[i].op==True):
                        dW,db,delA = self.layers[i].calc_grad(delA, y)
                    else:
                        dW,db,delA = self.layers[i].calc_grad(delA)

                    DW[i] = DW[i]+dW
                    DB[i] = DB[i]+db

                if (seen == batch_size):
                    seen = 0
                    DW[i] = DW[i]/batch_size
                    DB[i] = DB[i]/batch_size
                    #nadam update
                    f1 = 1 - (beta1**(epoch+1))
                    f2 = 1 - (beta2**(epoch+1))
                    for i in range(len(self.layers)-1, -1, -1):
                        mW[i] = (beta1 * mW[i]) + (1 - beta1)*DW[i]
                        mB[i] = (beta1 * mB[i]) + (1 - beta1)*DB[i]
                        vW[i] = (beta2 * vW[i]) + (1 - beta2)*(np.square(DW[i]))
                        vB[i] = (beta2 * vB[i]) + (1 - beta2)*(np.square(DB[i]))
                        
                        self.layers[i].W =  (1-eta*l2)*self.layers[i].W - np.multiply((eta/np.sqrt((vW[i] * beta2 / f2) + epsW[i])),((mW[i] * beta1) + (1-beta1)*DW[i])/f1)
                        self.layers[i].b =  self.layers[i].b - np.multiply((eta/np.sqrt((vB[i] * beta2 / f2) + epsB[i])), ((mB[i] * beta1) + (1-beta1)*DB[i])/f1)
                    DW.clear()
                    DB.clear()
                    for layer in self.layers:
                        DW.append(np.zeros((layer.W.shape[0], layer.W.shape[1] )))
                        DB.append(np.zeros((layer.b.shape[0],1))) 

            vacc,vloss = self.metric(self.Xval,self.Yval,loss_type)
            acc, loss = self.metric(self.X,self.Y,loss_type)
            losses.append(loss)
            wandb.log({"epoch": epoch+1, 
                       "loss": loss,
                       "accuracy": acc,
                       "val_error": vloss,
                       "val_accuracy": vacc})
            
            print("%d Complete. Loss = %f, acc = %f total loss=%f" %(epoch+1, loss, acc, vloss))

        print("Update complete. Total epochs = %d" %(epochs,))
        return losses

    # to calculate the predicted y value
    def classify(self, X):
        pred = []
        for x in X.transpose():
            yh = x.reshape(len(x),1)
            
            #feeding forward without saving
            for layer in self.layers:
                yh = layer.feed(yh)
                
            pred.append(yh.T)
        
        return pred

    # to calculate accuracy and loss
    def metric(self,x,y,loss_type):
        pred = self.classify(x)
        count = 0
        for i in range(y.shape[1]):
            if(np.argmax(pred[i]) == np.argmax(y.T[i])):
                count += 1
        acc = (count*100)/y.shape[1]
        loss = 0
        if loss_type == 'mse':
            for i in range(0, y.shape[1]):
                loss = loss + mse(y.T[i],pred[i].T)
        else:
            for i in range(0, y.shape[1]):
                index = np.argmax(y.T[i])
                loss = loss + (-np.log((1e-300)+float(pred[i][0][index])))
        loss = loss/y.shape[1]
        return acc, loss

#### Data generation and pre-processing

In [None]:
def data_generation():
    # load dataset
    (trainX, trainy), (testX, testy) = fashion_mnist.load_data()
    trainX = trainX.reshape(trainX.shape[0], 784)
    testX = testX.reshape(testX.shape[0], 784)

    trainX = trainX/255.0
    testX = testX/255.0

    X_train, X_val, y_train, y_val = train_test_split(trainX, trainy, test_size=0.1, random_state=45)
    X_train = X_train.transpose()
    X_val = X_val.transpose()
    ytrain_oneHot = oneHotEnc(y_train,10)
    yval_oneHot = oneHotEnc(y_val,10)
    return X_train, ytrain_oneHot, X_val, yval_oneHot

#### Setup sweep configurations for wandb 

In [None]:
sweep_config = {
    'name' : 'WandB sweep',
    "method": 'random', #grid
    'metric': { 
        'name':'val_accuracy',
        'goal': 'maximize',
        },
    'parameters':{
        'epochs' : {'values' : [5, 10]},
        'no_hidden_layers' : {'values' : [3,4,5]},
        'weight_decay' : {'values' : [0, 0.0005, 0.5]},
        'neurons' : {'values' : [32,64,128]},
        'learning_rate': {'values' : [.001,0.0001]},
        'optimizer':{'values' : ['sgd','momentumgd','NAG','rmsprop','adam','nadam']},
        'batch_size' : {'values': [16,32,64]},
        'loss_func':{'values' : [ 'cross_entropy']},
        'weight_initializer':{'values' : ['random','xavier']},
        'activation' : {'values' : ['tanh','sigmoid','relu']},
        }
}

#### Train the model using the sweep config

In [None]:
def train ():
    config_defaults = {
        'epochs': 10,
        'no_hidden_layers' : 4,
        'weight_decay' : 0.0005,
        'neurons' : 16,
        'learning_rate': 0.001,
        'optimizer':'nadam',
        'batch_size' : 16,
        'loss_func': 'cross_entropy',
        'weight_initializer': 'random',
        'activation' : 'sigmoid',
        }
    
    wandb.init(config=config_defaults)
    config = wandb.config

    wandb.run.name = "ep_{}_nhl_{}_hls_{}_eta_{}_opt_{}_bs_{}_loss_{}_winit_{}_act_{}_wdecay_{}".format(config.epochs,\
                                                                      config.no_hidden_layers,\
                                                                      config.neurons,\
                                                                      config.learning_rate,\
                                                                      config.optimizer,\
                                                                      config.batch_size,\
                                                                      config.loss_func,\
                                                                      config.weight_initializer,\
                                                                      config.activation,\
                                                                      config.weight_decay)
    
    X_train, ytrain_oneHot, X_val, y_val = data_generation()
    
    fist = network(config.no_hidden_layers+1, config.neurons, config.activation, config.weight_initializer ,X_train, ytrain_oneHot, X_val, y_val,)
    if config.optimizer == 'adam':
        losses = fist.adam(config.epochs, config.batch_size, config.loss_func, config.weight_decay, config.learning_rate)
    if config.optimizer == 'nadam':
        losses = fist.nadam(config.epochs, config.batch_size, config.loss_func, config.weight_decay, config.learning_rate)
    if config.optimizer == 'rmsprop':
        losses = fist.rmsprop(config.epochs, config.batch_size, config.loss_func, config.weight_decay, config.learning_rate)
    if config.optimizer == 'NAG':
        losses = fist.nag(config.epochs, config.batch_size, config.loss_func, config.weight_decay, config.learning_rate)
    if config.optimizer == 'momentumgd':
        losses = fist.momentum(config.epochs, config.batch_size, config.loss_func, config.weight_decay, config.learning_rate)
    if config.optimizer == 'sgd':
        losses = fist.sgd(config.epochs, config.loss_func, config.weight_decay, config.learning_rate)

    return losses


#### Get the sweep id and call the train function via wandb

In [None]:
sweep_id = wandb.sweep(sweep_config, project="Ass1_sweep")

Create sweep with ID: 4w1yusga
Sweep URL: https://wandb.ai/pandp/Ass1_sweep/sweeps/4w1yusga


In [None]:
wandb.agent(sweep_id, train, count = 40)

[34m[1mwandb[0m: Agent Starting Run: 1en8u6zq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	no_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentumgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initializer: xavier


1 Complete. Loss = 0.580728, acc = 79.155556 total loss=0.576575
2 Complete. Loss = 0.494764, acc = 82.174074 total loss=0.495142
3 Complete. Loss = 0.455553, acc = 83.590741 total loss=0.459375
4 Complete. Loss = 0.432554, acc = 84.142593 total loss=0.441739
5 Complete. Loss = 0.409650, acc = 85.137037 total loss=0.422368
Update complete. Total epochs = 5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▅▆▇█
epoch,▁▃▅▆█
loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_error,█▄▃▂▁

0,1
accuracy,85.13704
epoch,5.0
loss,0.40965
val_accuracy,84.91667
val_error,0.42237


[34m[1mwandb[0m: Agent Starting Run: j3rrulhz with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	no_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: NAG
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initializer: random




1 Complete. Loss = nan, acc = 10.042593 total loss=nan
2 Complete. Loss = nan, acc = 10.042593 total loss=nan
3 Complete. Loss = nan, acc = 10.042593 total loss=nan
4 Complete. Loss = nan, acc = 10.042593 total loss=nan
5 Complete. Loss = nan, acc = 10.042593 total loss=nan
6 Complete. Loss = nan, acc = 10.042593 total loss=nan
7 Complete. Loss = nan, acc = 10.042593 total loss=nan
8 Complete. Loss = nan, acc = 10.042593 total loss=nan
9 Complete. Loss = nan, acc = 10.042593 total loss=nan
10 Complete. Loss = nan, acc = 10.042593 total loss=nan
Update complete. Total epochs = 10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
loss,
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_error,

0,1
accuracy,10.04259
epoch,10.0
loss,
val_accuracy,9.61667
val_error,


[34m[1mwandb[0m: Agent Starting Run: q0atgz5q with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	no_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: NAG
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initializer: random


1 Complete. Loss = 1.765258, acc = 48.192593 total loss=1.800205
2 Complete. Loss = 1.316893, acc = 57.755556 total loss=1.357874
3 Complete. Loss = 1.140066, acc = 62.237037 total loss=1.194607
4 Complete. Loss = 1.049452, acc = 64.677778 total loss=1.102013
5 Complete. Loss = 0.993153, acc = 66.196296 total loss=1.048169
Update complete. Total epochs = 5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▅▆▇█
epoch,▁▃▅▆█
loss,█▄▂▂▁
val_accuracy,▁▅▆▇█
val_error,█▄▂▂▁

0,1
accuracy,66.1963
epoch,5.0
loss,0.99315
val_accuracy,64.85
val_error,1.04817


[34m[1mwandb[0m: Agent Starting Run: x8bmej7c with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	no_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initializer: xavier


1 Complete. Loss = 1.610121, acc = 38.168519 total loss=1.610184
2 Complete. Loss = 1.493866, acc = 39.037037 total loss=1.492123
3 Complete. Loss = 1.487624, acc = 38.935185 total loss=1.485740
4 Complete. Loss = 1.498044, acc = 38.809259 total loss=1.496238
5 Complete. Loss = 1.511466, acc = 38.750000 total loss=1.509650
6 Complete. Loss = 1.525134, acc = 38.650000 total loss=1.523460
7 Complete. Loss = 1.537701, acc = 38.557407 total loss=1.536152
8 Complete. Loss = 1.549216, acc = 38.483333 total loss=1.547817
9 Complete. Loss = 1.559180, acc = 38.448148 total loss=1.557848
10 Complete. Loss = 1.567181, acc = 38.383333 total loss=1.565981
Update complete. Total epochs = 10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁█▇▆▆▅▄▄▃▃
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▁▁▂▂▃▄▅▅▆
val_accuracy,▁▇█▇▇▆▆▅▅▄
val_error,█▁▁▂▂▃▄▄▅▆

0,1
accuracy,38.38333
epoch,10.0
loss,1.56718
val_accuracy,38.51667
val_error,1.56598


[34m[1mwandb[0m: Agent Starting Run: e54i0g9t with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	no_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initializer: random


1 Complete. Loss = 0.721829, acc = 76.922222 total loss=0.722206
2 Complete. Loss = 0.633511, acc = 80.174074 total loss=0.635057
3 Complete. Loss = 0.594637, acc = 81.685185 total loss=0.596157
4 Complete. Loss = 0.599832, acc = 81.724074 total loss=0.601331
5 Complete. Loss = 0.610024, acc = 81.605556 total loss=0.611470
6 Complete. Loss = 0.616349, acc = 81.512963 total loss=0.617933
7 Complete. Loss = 0.621604, acc = 81.398148 total loss=0.623292
8 Complete. Loss = 0.625363, acc = 81.261111 total loss=0.627111
9 Complete. Loss = 0.628154, acc = 81.124074 total loss=0.629831
10 Complete. Loss = 0.629876, acc = 80.955556 total loss=0.631571
Update complete. Total epochs = 10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▆█████▇▇▇
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▃▁▁▂▂▂▃▃▃
val_accuracy,▁▅██▇▇▇▆▆▆
val_error,█▃▁▁▂▂▃▃▃▃

0,1
accuracy,80.95556
epoch,10.0
loss,0.62988
val_accuracy,80.25
val_error,0.63157


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 46fd1e5m with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	no_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: momentumgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initializer: random


1 Complete. Loss = 8.975863, acc = 51.296296 total loss=9.204681
2 Complete. Loss = 3.122574, acc = 57.587037 total loss=3.469079
3 Complete. Loss = 1.999773, acc = 60.701852 total loss=2.293549
4 Complete. Loss = 1.533061, acc = 61.285185 total loss=1.768744
5 Complete. Loss = 1.260617, acc = 64.742593 total loss=1.455336
Update complete. Total epochs = 5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▄▆▆█
epoch,▁▃▅▆█
loss,█▃▂▁▁
val_accuracy,▁▄▆▆█
val_error,█▃▂▁▁

0,1
accuracy,64.74259
epoch,5.0
loss,1.26062
val_accuracy,63.88333
val_error,1.45534


[34m[1mwandb[0m: Agent Starting Run: l9zmdjqi with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	no_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentumgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initializer: xavier


1 Complete. Loss = 1.799422, acc = 48.924074 total loss=1.799896
2 Complete. Loss = 1.351762, acc = 60.318519 total loss=1.352829
3 Complete. Loss = 1.114588, acc = 66.529630 total loss=1.116181
4 Complete. Loss = 0.959695, acc = 71.125926 total loss=0.962117
5 Complete. Loss = 0.854776, acc = 73.033333 total loss=0.857497
Update complete. Total epochs = 5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▄▆▇█
epoch,▁▃▅▆█
loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_error,█▅▃▂▁

0,1
accuracy,73.03333
epoch,5.0
loss,0.85478
val_accuracy,73.15
val_error,0.8575


[34m[1mwandb[0m: Agent Starting Run: mb270bjb with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	no_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initializer: xavier


1 Complete. Loss = 2.229519, acc = 26.935185 total loss=2.230807
2 Complete. Loss = 1.680398, acc = 29.488889 total loss=1.681766
3 Complete. Loss = 1.428579, acc = 43.307407 total loss=1.424924
4 Complete. Loss = 1.266772, acc = 49.416667 total loss=1.260921
5 Complete. Loss = 1.132973, acc = 58.205556 total loss=1.127443
Update complete. Total epochs = 5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▂▅▆█
epoch,▁▃▅▆█
loss,█▄▃▂▁
val_accuracy,▁▂▅▆█
val_error,█▅▃▂▁

0,1
accuracy,58.20556
epoch,5.0
loss,1.13297
val_accuracy,58.18333
val_error,1.12744


[34m[1mwandb[0m: Agent Starting Run: f2pfi0yr with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	no_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initializer: xavier


1 Complete. Loss = 2.302710, acc = 10.022222 total loss=2.302984
2 Complete. Loss = 2.302722, acc = 10.022222 total loss=2.303042
3 Complete. Loss = 2.302720, acc = 10.022222 total loss=2.303040
4 Complete. Loss = 2.302718, acc = 10.022222 total loss=2.303038
5 Complete. Loss = 2.302716, acc = 10.022222 total loss=2.303036
Update complete. Total epochs = 5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▁▁▁▁
epoch,▁▃▅▆█
loss,▁█▇▆▄
val_accuracy,▁▁▁▁▁
val_error,▁███▇

0,1
accuracy,10.02222
epoch,5.0
loss,2.30272
val_accuracy,9.8
val_error,2.30304


[34m[1mwandb[0m: Agent Starting Run: rq7gd7rt with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	no_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initializer: random


1 Complete. Loss = 5.581534, acc = 28.833333 total loss=5.757979
2 Complete. Loss = 4.569163, acc = 35.720370 total loss=4.670863
3 Complete. Loss = 4.011333, acc = 39.622222 total loss=4.098948
4 Complete. Loss = 3.637485, acc = 42.390741 total loss=3.739167
5 Complete. Loss = 3.370486, acc = 44.524074 total loss=3.473433
6 Complete. Loss = 3.163812, acc = 46.150000 total loss=3.267831
7 Complete. Loss = 2.997604, acc = 47.603704 total loss=3.100486
8 Complete. Loss = 2.857624, acc = 48.772222 total loss=2.957752
9 Complete. Loss = 2.731002, acc = 49.875926 total loss=2.820419
10 Complete. Loss = 2.621125, acc = 50.770370 total loss=2.711466
Update complete. Total epochs = 10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▃▄▅▆▇▇▇██
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▆▄▃▃▂▂▂▁▁
val_accuracy,▁▃▅▅▆▇▇▇██
val_error,█▆▄▃▃▂▂▂▁▁

0,1
accuracy,50.77037
epoch,10.0
loss,2.62113
val_accuracy,49.48333
val_error,2.71147


[34m[1mwandb[0m: Agent Starting Run: nxfup9lq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	no_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initializer: random


1 Complete. Loss = 288.054210, acc = 58.144444 total loss=289.872722
2 Complete. Loss = 255.582484, acc = 62.820370 total loss=257.782489
3 Complete. Loss = 238.770630, acc = 65.272222 total loss=241.357611
4 Complete. Loss = 228.579010, acc = 66.725926 total loss=230.389769
5 Complete. Loss = 221.686419, acc = 67.698148 total loss=224.939853
Update complete. Total epochs = 5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▄▆▇█
epoch,▁▃▅▆█
loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_error,█▅▃▂▁

0,1
accuracy,67.69815
epoch,5.0
loss,221.68642
val_accuracy,67.31667
val_error,224.93985


[34m[1mwandb[0m: Agent Starting Run: 6m58biii with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	no_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initializer: xavier


1 Complete. Loss = 2.302855, acc = 10.042593 total loss=2.303693
2 Complete. Loss = 2.302011, acc = 10.042593 total loss=2.302847
3 Complete. Loss = 2.300574, acc = 10.042593 total loss=2.301408
4 Complete. Loss = 2.296516, acc = 10.042593 total loss=2.297341
5 Complete. Loss = 2.257216, acc = 19.970370 total loss=2.257948
6 Complete. Loss = 1.633620, acc = 29.805556 total loss=1.637532
7 Complete. Loss = 1.340318, acc = 45.283333 total loss=1.335515
8 Complete. Loss = 1.194640, acc = 49.492593 total loss=1.187082
9 Complete. Loss = 1.109878, acc = 53.098148 total loss=1.101040
10 Complete. Loss = 1.043943, acc = 57.305556 total loss=1.034969
Update complete. Total epochs = 10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▁▁▁▂▄▆▇▇█
epoch,▁▂▃▃▄▅▆▆▇█
loss,█████▄▃▂▁▁
val_accuracy,▁▁▁▁▂▄▆▇▇█
val_error,█████▄▃▂▁▁

0,1
accuracy,57.30556
epoch,10.0
loss,1.04394
val_accuracy,57.43333
val_error,1.03497


[34m[1mwandb[0m: Agent Starting Run: u5iqzkmw with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	no_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initializer: random


1 Complete. Loss = 0.979896, acc = 65.170370 total loss=1.049174
2 Complete. Loss = 0.809130, acc = 69.759259 total loss=0.890376
3 Complete. Loss = 0.734191, acc = 72.314815 total loss=0.816147
4 Complete. Loss = 0.681381, acc = 74.485185 total loss=0.767922
5 Complete. Loss = 0.650015, acc = 75.625926 total loss=0.721186
6 Complete. Loss = 0.630746, acc = 76.651852 total loss=0.704611
7 Complete. Loss = 0.606417, acc = 77.505556 total loss=0.691693
8 Complete. Loss = 0.589795, acc = 78.324074 total loss=0.670466
9 Complete. Loss = 0.580723, acc = 78.546296 total loss=0.652468
10 Complete. Loss = 0.568147, acc = 79.383333 total loss=0.647609
Update complete. Total epochs = 10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▃▅▆▆▇▇▇██
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▃▄▆▆▇▇███
val_error,█▅▄▃▂▂▂▁▁▁

0,1
accuracy,79.38333
epoch,10.0
loss,0.56815
val_accuracy,78.68333
val_error,0.64761


[34m[1mwandb[0m: Agent Starting Run: krskyf2p with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	no_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentumgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initializer: xavier


1 Complete. Loss = 1.802900, acc = 47.166667 total loss=1.802728
2 Complete. Loss = 1.313300, acc = 61.416667 total loss=1.311435
3 Complete. Loss = 1.110403, acc = 63.551852 total loss=1.108439
4 Complete. Loss = 1.014184, acc = 65.825926 total loss=1.012426
5 Complete. Loss = 0.959949, acc = 67.685185 total loss=0.958094
Update complete. Total epochs = 5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▆▇▇█
epoch,▁▃▅▆█
loss,█▄▂▁▁
val_accuracy,▁▆▇▇█
val_error,█▄▂▁▁

0,1
accuracy,67.68519
epoch,5.0
loss,0.95995
val_accuracy,67.73333
val_error,0.95809


[34m[1mwandb[0m: Agent Starting Run: fumum1s9 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	no_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: NAG
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initializer: xavier


1 Complete. Loss = 1.975768, acc = 43.359259 total loss=1.975883
2 Complete. Loss = 1.228067, acc = 60.031481 total loss=1.229327
3 Complete. Loss = 0.936234, acc = 68.838889 total loss=0.932094
4 Complete. Loss = 0.774768, acc = 73.050000 total loss=0.767902
5 Complete. Loss = 0.687649, acc = 75.140741 total loss=0.681116
Update complete. Total epochs = 5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▅▇██
epoch,▁▃▅▆█
loss,█▄▂▁▁
val_accuracy,▁▅▇██
val_error,█▄▂▁▁

0,1
accuracy,75.14074
epoch,5.0
loss,0.68765
val_accuracy,75.48333
val_error,0.68112


[34m[1mwandb[0m: Agent Starting Run: 1ubisnlp with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	no_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initializer: random


1 Complete. Loss = 0.846544, acc = 71.064815 total loss=0.856238
2 Complete. Loss = 0.731184, acc = 74.746296 total loss=0.739038
3 Complete. Loss = 0.679119, acc = 76.420370 total loss=0.688461
4 Complete. Loss = 0.647650, acc = 77.414815 total loss=0.658383
5 Complete. Loss = 0.625352, acc = 78.088889 total loss=0.637436
Update complete. Total epochs = 5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▅▆▇█
epoch,▁▃▅▆█
loss,█▄▃▂▁
val_accuracy,▁▄▆▇█
val_error,█▄▃▂▁

0,1
accuracy,78.08889
epoch,5.0
loss,0.62535
val_accuracy,77.23333
val_error,0.63744


[34m[1mwandb[0m: Agent Starting Run: u5sraa5f with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	no_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initializer: xavier


1 Complete. Loss = 0.707073, acc = 75.720370 total loss=0.710506
2 Complete. Loss = 0.546985, acc = 80.770370 total loss=0.553236
3 Complete. Loss = 0.481498, acc = 83.122222 total loss=0.489907
4 Complete. Loss = 0.442904, acc = 84.383333 total loss=0.453493
5 Complete. Loss = 0.415349, acc = 85.503704 total loss=0.426867
Update complete. Total epochs = 5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▅▆▇█
epoch,▁▃▅▆█
loss,█▄▃▂▁
val_accuracy,▁▄▆▇█
val_error,█▄▃▂▁

0,1
accuracy,85.5037
epoch,5.0
loss,0.41535
val_accuracy,85.2
val_error,0.42687


[34m[1mwandb[0m: Agent Starting Run: 0z6cwb2g with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	no_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: NAG
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initializer: xavier


1 Complete. Loss = 2.346164, acc = 9.998148 total loss=2.349272
2 Complete. Loss = 2.308874, acc = 9.998148 total loss=2.310227
3 Complete. Loss = 2.303551, acc = 10.011111 total loss=2.304160
4 Complete. Loss = 2.302741, acc = 10.011111 total loss=2.303052
5 Complete. Loss = 2.302612, acc = 10.011111 total loss=2.302806
Update complete. Total epochs = 5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▁███
epoch,▁▃▅▆█
loss,█▂▁▁▁
val_accuracy,██▁▁▁
val_error,█▂▁▁▁

0,1
accuracy,10.01111
epoch,5.0
loss,2.30261
val_accuracy,9.9
val_error,2.30281


[34m[1mwandb[0m: Agent Starting Run: o8p0v8ue with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	no_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentumgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initializer: xavier


1 Complete. Loss = 2.303953, acc = 10.022222 total loss=2.304927
2 Complete. Loss = 2.303982, acc = 10.022222 total loss=2.304981
3 Complete. Loss = 2.303996, acc = 10.022222 total loss=2.305001
4 Complete. Loss = 2.304010, acc = 10.022222 total loss=2.305005
5 Complete. Loss = 2.303917, acc = 10.022222 total loss=2.304890
6 Complete. Loss = 2.303947, acc = 10.022222 total loss=2.304944
7 Complete. Loss = 2.303961, acc = 10.022222 total loss=2.304964
8 Complete. Loss = 2.303974, acc = 10.022222 total loss=2.304967
9 Complete. Loss = 2.303882, acc = 10.022222 total loss=2.304854
10 Complete. Loss = 2.303912, acc = 10.022222 total loss=2.304908
Update complete. Total epochs = 10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
loss,▅▆▇█▃▅▅▆▁▃
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_error,▄▇██▃▅▆▆▁▄

0,1
accuracy,10.02222
epoch,10.0
loss,2.30391
val_accuracy,9.8
val_error,2.30491


[34m[1mwandb[0m: Agent Starting Run: vqutvwuk with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	no_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initializer: random


1 Complete. Loss = 5.026422, acc = 18.640741 total loss=5.127769
2 Complete. Loss = 3.986701, acc = 23.494444 total loss=4.059979
3 Complete. Loss = 3.401449, acc = 27.033333 total loss=3.475701
4 Complete. Loss = 2.980264, acc = 29.892593 total loss=3.058200
5 Complete. Loss = 2.652894, acc = 32.627778 total loss=2.723030
Update complete. Total epochs = 5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▃▅▇█
epoch,▁▃▅▆█
loss,█▅▃▂▁
val_accuracy,▁▃▅▆█
val_error,█▅▃▂▁

0,1
accuracy,32.62778
epoch,5.0
loss,2.65289
val_accuracy,31.55
val_error,2.72303


[34m[1mwandb[0m: Agent Starting Run: 6uo9yx8s with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	no_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initializer: random


1 Complete. Loss = 75.068157, acc = 45.974074 total loss=77.165963
2 Complete. Loss = 31.124625, acc = 49.742593 total loss=33.276867
3 Complete. Loss = 16.786353, acc = 52.200000 total loss=18.319211
4 Complete. Loss = 11.572142, acc = 54.994444 total loss=12.713593
5 Complete. Loss = 8.035520, acc = 57.459259 total loss=8.912675
6 Complete. Loss = 6.444348, acc = 60.737037 total loss=7.221396
7 Complete. Loss = 5.460833, acc = 61.312963 total loss=5.892906
8 Complete. Loss = 4.743285, acc = 62.633333 total loss=5.110193
9 Complete. Loss = 3.277595, acc = 65.246296 total loss=3.764331
10 Complete. Loss = 2.690475, acc = 67.794444 total loss=3.131254
Update complete. Total epochs = 10


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▂▃▄▅▆▆▆▇█
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▂▂▂▁▁▁▁▁
val_accuracy,▁▂▃▄▅▆▆▆▇█
val_error,█▄▂▂▂▁▁▁▁▁

0,1
accuracy,67.79444
epoch,10.0
loss,2.69048
val_accuracy,67.43333
val_error,3.13125


[34m[1mwandb[0m: Agent Starting Run: 0hc3ywuh with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	no_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initializer: random


1 Complete. Loss = 0.910467, acc = 76.109259 total loss=0.910833
2 Complete. Loss = 1.091682, acc = 68.940741 total loss=1.095323
3 Complete. Loss = 1.099572, acc = 65.866667 total loss=1.104895
4 Complete. Loss = 1.121250, acc = 64.688889 total loss=1.127051
5 Complete. Loss = 1.144485, acc = 64.224074 total loss=1.150409
Update complete. Total epochs = 5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,█▄▂▁▁
epoch,▁▃▅▆█
loss,▁▆▇▇█
val_accuracy,█▄▂▁▁
val_error,▁▆▇▇█

0,1
accuracy,64.22407
epoch,5.0
loss,1.14449
val_accuracy,63.7
val_error,1.15041


[34m[1mwandb[0m: Agent Starting Run: hbuflwow with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	no_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initializer: random


1 Complete. Loss = 184.913582, acc = 72.457407 total loss=186.673945
2 Complete. Loss = 177.887675, acc = 72.392593 total loss=185.673048
3 Complete. Loss = 144.475141, acc = 75.875926 total loss=153.360228
4 Complete. Loss = 166.861820, acc = 70.611111 total loss=173.950967
5 Complete. Loss = 101.464600, acc = 77.296296 total loss=106.569773
Update complete. Total epochs = 5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▃▃▇▁█
epoch,▁▃▅▆█
loss,█▇▅▆▁
val_accuracy,▄▃▆▁█
val_error,██▅▇▁

0,1
accuracy,77.2963
epoch,5.0
loss,101.4646
val_accuracy,76.71667
val_error,106.56977


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vlqidg5h with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	no_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initializer: xavier


1 Complete. Loss = 0.738306, acc = 74.787037 total loss=0.734144
2 Complete. Loss = 0.633528, acc = 77.659259 total loss=0.628487
3 Complete. Loss = 0.591173, acc = 78.970370 total loss=0.586439
4 Complete. Loss = 0.564661, acc = 79.874074 total loss=0.560356
5 Complete. Loss = 0.545494, acc = 80.524074 total loss=0.541559
Update complete. Total epochs = 5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▅▆▇█
epoch,▁▃▅▆█
loss,█▄▃▂▁
val_accuracy,▁▄▆▇█
val_error,█▄▃▂▁

0,1
accuracy,80.52407
epoch,5.0
loss,0.54549
val_accuracy,80.5
val_error,0.54156


[34m[1mwandb[0m: Agent Starting Run: 88l82pte with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	no_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_initializer: xavier


1 Complete. Loss = 2.303184, acc = 10.022222 total loss=2.303952
2 Complete. Loss = 2.303180, acc = 10.022222 total loss=2.303948
3 Complete. Loss = 2.303176, acc = 10.022222 total loss=2.303944
4 Complete. Loss = 2.303172, acc = 10.022222 total loss=2.303940
5 Complete. Loss = 2.303169, acc = 10.022222 total loss=2.303936
Update complete. Total epochs = 5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▁▁▁▁
epoch,▁▃▅▆█
loss,█▆▄▃▁
val_accuracy,▁▁▁▁▁
val_error,█▆▄▃▁

0,1
accuracy,10.02222
epoch,5.0
loss,2.30317
val_accuracy,9.8
val_error,2.30394


[34m[1mwandb[0m: Agent Starting Run: b5htpv1b with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	no_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_initializer: random


1 Complete. Loss = 350.740889, acc = 49.079630 total loss=351.708039
2 Complete. Loss = 311.310760, acc = 54.790741 total loss=315.988158
3 Complete. Loss = 290.246988, acc = 57.820370 total loss=294.113487
4 Complete. Loss = 275.638990, acc = 59.946296 total loss=277.496523
5 Complete. Loss = 264.448655, acc = 61.505556 total loss=264.508209
Update complete. Total epochs = 5


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▄▆▇█
epoch,▁▃▅▆█
loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_error,█▅▃▂▁

0,1
accuracy,61.50556
epoch,5.0
loss,264.44865
val_accuracy,61.51667
val_error,264.50821


[34m[1mwandb[0m: Agent Starting Run: 5ptk0y83 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_func: cross_entropy
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	no_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: momentumgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_initializer: xavier


#### Plotting confusion matrix using wandb

In [29]:
# Reference: https://github.com/zalandoresearch/fashion-mnist/blob/master/README.md#Labels
labels = ['Top','Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag',
			   'Ankle boot']

#Confusion Matrix
def cm_show():
    wandb.init()
    X_train, ytrain_oneHot, X_val, y_val = data_generation()
    fist = network(6, 64, 'tanh', 'xavier' ,X_train, ytrain_oneHot, X_val, y_val)
    losses = fist.rmsprop(5, 64, 'cross', 0.5, 0.0001)
    yh = fist.classify(X_val)
    y_pred = []
    y_true = []
    for i in range(len(yh)):
        y_pred.append(np.argmax(yh[i]))
        y_true.append(np.argmax(y_val.T[i]))
    
    wandb.log({"ep=5_nhl=5_hls=64_eta=.0001_opt='rmsprop'_bs=64_loss='cross'_winit='xavier'_act='tanh'_wdecay=0.5" : wandb.sklearn.plot_confusion_matrix(y_true, y_pred, labels)})
    

In [30]:
cm_show()

VBox(children=(Label(value=' 0.01MB of 0.01MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

1 Complete. Loss = 0.669864, acc = 77.898148 total loss=0.669412
2 Complete. Loss = 0.501574, acc = 82.559259 total loss=0.499425
3 Complete. Loss = 0.450500, acc = 84.166667 total loss=0.451900
4 Complete. Loss = 0.430050, acc = 84.707407 total loss=0.435804
5 Complete. Loss = 0.406261, acc = 85.650000 total loss=0.413926
Update complete. Total epochs = 5
