In [1]:
from keras.datasets import fashion_mnist
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
(trainx,trainy),(testx,testy)=fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [3]:
#onehot encoder
def onehot_encoder(Y):
  Y_encoded = np.zeros((10, Y.shape[0]))
  for train_point in range(Y.shape[0]):
      class_label = Y[train_point]
      Y_encoded[class_label,train_point] = 1.0
  return Y_encoded

In [4]:
num_examples_train=int(0.9*trainx.shape[0])
num_examples_val=int(0.1*trainx.shape[0])
num_examples_test=testx.shape[0]


In [5]:
x_train, x_val, y_train, y_val = train_test_split(trainx, trainy, test_size=0.1,shuffle=True)

In [6]:
X_train=x_train.reshape(x_train.shape[0],28*28).T
X_val=x_val.reshape(x_val.shape[0],28*28).T
X_test=testx.reshape(testx.shape[0],28*28).T
X_train=X_train/255
X_val=X_val/255
X_test=X_test/255


In [7]:
Y_train=onehot_encoder(y_train)
Y_val=onehot_encoder(y_val)
Y_test=onehot_encoder(testy)


In [8]:
!pip install wandb


Collecting wandb
  Downloading wandb-0.12.10-py2.py3-none-any.whl (1.7 MB)
[K     |████████████████████████████████| 1.7 MB 5.3 MB/s 
Collecting GitPython>=1.0.0
  Downloading GitPython-3.1.27-py3-none-any.whl (181 kB)
[K     |████████████████████████████████| 181 kB 35.9 MB/s 
Collecting shortuuid>=0.5.0
  Downloading shortuuid-1.0.8-py3-none-any.whl (9.5 kB)
Collecting yaspin>=1.0.0
  Downloading yaspin-2.1.0-py3-none-any.whl (18 kB)
Collecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
Collecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting sentry-sdk>=1.0.0
  Downloading sentry_sdk-1.5.6-py2.py3-none-any.whl (144 kB)
[K     |████████████████████████████████| 144 kB 45.3 MB/s 
Collecting gitdb<5,>=4.0.1
  Downloading gitdb-4.0.9-py3-none-any.whl (63 kB)
[K     |████████████████████████████████| 63 kB 1.7 MB/s 
[?25hCollecting smmap<6,>=3.0.1
  Downloading smmap-5.0.0-py3-none-any.whl (24 kB)
Building wheels for co

In [9]:
import numpy as np
import wandb

In [10]:
def sigmoid(z):
    return 1.0 / (1 + np.exp(-(z)))


def tanh(z):
    return np.tanh(z)


def sin(z):
    return np.sin(z)


def relu(z):
    return np.maximum(0, z) 


def softmax(Z):
    return np.exp(Z) / np.sum(np.exp(Z))


def grad_sigmoid(z):
    return  (1.0 / (1 + np.exp(-(z))))*(1 -  1.0 / (1 + np.exp(-(z))))

def grad_tanh(z):
    return 1 - np.tanh(z) ** 2

def grad_relu(z):
    if z>0:
      return 1
    else:
      return 0


In [11]:
 #initializers
def xavier_initializer(op,ip):
  std = np.sqrt(2 / (ip + op))
  return np.random.normal(0, std, size=(op, ip))

def random_initializer(op,ip):
  return np.random.normal(0, 1, size=(op, ip))

In [13]:
class neural_network2:

  def __init__(
      self, 
      num_hidden_layers,
      num_hidden_neurons, 
      X_train, 
      Y_train,  
      num_examples_train, 
      X_val, 
      Y_val, 
      num_examples_val,    
      optimizer,
      batch_size,
      lamda,
      learning_rate,
      max_epochs,
      activation,
      initializer,
      loss):
    #initalize neural nw layers
    self.layers = [784]+num_hidden_layers*[num_hidden_neurons]+[10]

    #initilialize num of examples
    self.num_examples_train = num_examples_train
    self.num_examples_val = num_examples_val
        
    #initlialize datset
    self.X_train=X_train
    self.X_val=X_val
    self.Y_train=Y_train
    self.Y_val=Y_val
   

    #loss fn
    self.loss_function = loss
    
    
    #setup initializer
    if initializer=="random":
      self.initializer = random_initializer
    elif initializer=="xavier":
      self.initializer=xavier_initializer
    else:
      raise Exception('this is wrong initializer')
    
    #setup activations
    if activation=="sigmoid":
      self.activation=sigmoid
      self.grad_activation=grad_sigmoid
    elif activation=="tanh":
      self.activation=tanh
      self.grad_activation=grad_tanh
    elif activation=="relu":
      self.activation=relu
      self.grad_activation=grad_relu
    else:
      raise Exception('this is wrong activation')

    #optimizers
    if optimizer=="sgd":
      self.optimizer=self.sgd
    elif optimizer=="mgd":
      self.optimizer=self.mgd
    elif optimizer=="nag":
      self.optimizer=self.nag
    elif optimizer=="rmsprop":
      self.optimizer=self.rmsprop
    elif optimizer=="adam":
      self.optimizer=self.adam
    elif optimizer=="nadam":
      self.optimizer=self.nadam
    else:
      raise Exception("wrong optimizer")
         

    #hyperparameters
    self.max_epochs = max_epochs
    self.batch_size = batch_size
    self.learning_rate = learning_rate
    self.lamda=lamda
    
    #initialize weights
    self.W, self.b = self.Neuralnet_init(self.layers)




  # loss fn
  def cross_entropy_loss(self, Y_true, Y_pred):
      loss=np.sum(-(Y_true*np.log(Y_pred)))
      return loss

  def L2_loss(self, lamda):
    return (lamda/2) * np.sum([np.linalg.norm((self.W[i + 1]**2))  for i in range(len(self.W))])

  def mse_loss(self, Y_true, Y_pred):
      loss = np.sum(0.5*(Y_true - Y_pred) ** 2)
      return loss

  #accuracy and predict
  def accuracy(self, Y_true, Y_pred, num_examples):
    acc = 0
    for i in range(num_examples):
      if (np.argmax(Y_true[:, i])==np.argmax(Y_pred[:, i])):
        acc+=1
    accuracy = acc / num_examples
    return accuracy

  def predict(self,X,num_examples):
    Y_pred = []        
    for i in range(num_examples):
      Y, H, A = self.forwardPropagate(X[:, i].reshape(784, 1),self.W,self.b)
      Y_pred.append(Y.reshape(10,))
    Y_pred = np.array(Y_pred).transpose()
    return Y_pred
 
  #neural network weights and biases initializer
  def Neuralnet_init(self,layer_neurons):
      num_layers=len(self.layers)
      W = {}
      b = {}
      for i in range(0, num_layers - 1):
          W[i + 1] = self.initializer(layer_neurons[i + 1], layer_neurons[i])
          b[i + 1] = np.zeros((layer_neurons[i + 1], 1))
      return W,b
  
  def forwardPropagate(self,X, W, b):
    num_layers=len(self.layers)
    H={}
    A = {}

    H[0] = X
    A[0] = X
    for k in range(0, num_layers - 2):
      A[k + 1] = np.dot(W[k + 1],H[k])+ b[k + 1]
      H[k + 1] = self.activation(A[k + 1])

    #op layer
    A[num_layers - 1] = np.dot(W[num_layers-1],H[num_layers - 2])+ b[num_layers - 1]
    y_pred = softmax(A[num_layers - 1])
    H[num_layers - 1] = y_pred
    return y_pred,H,A
  
  
  
  
  def backPropagate(self,Ypred,H,A,Y_train,lamda=0):
    
    num_layers=len(self.layers)
    grad_A={}
    grad_H={}

    grad_W={}
    grad_b={}
    

    # Gradient of op layer(AL).
    grad_A[num_layers - 1] = -(Y_train - Ypred)

    for k in range(num_layers - 2, -1, -1):
      grad_W[k + 1] = np.outer(grad_A[k + 1],H[k])+(lamda/2 )* self.W[k + 1]

        
      grad_b[k + 1]=grad_A[k + 1]
      
      if k == 0:
        grad_H[k] = np.dot(self.W[k + 1].T,grad_A[k + 1])
        grad_A[k] = grad_H[k]*A[k]
      else:
        grad_H[k] = np.dot(self.W[k + 1].T,grad_A[k + 1])
          
        grad_A[k] = grad_H[k]*self.grad_activation(A[k])


          
    return grad_W,grad_b
    


  def sgd(self,epochs,num_examples,batch_size,learning_rate, lamda):
    loss_train = []
    acc_train = []
    acc_val = []
    num_layers=len(self.layers)
    for epoch in range(epochs):
      loss_per_point=0
      
      for i in range(X_train.shape[1]):
        Y,H,A = self.forwardPropagate(self.X_train[:,i].reshape(784,1), self.W, self.b) 
        grad_w, grad_b = self.backPropagate(Y,H,A,self.Y_train[:,i].reshape(10,1))
      
        loss_per_point= loss_per_point+ self.cross_entropy_loss(Y_train[:, i].reshape(10, 1), Y)+self.L2_loss(lamda)

        for k in range(num_layers-1):
          self.W[k+1]=self.W[k+1] - learning_rate * grad_w[k+1]
          self.b[k+1]=self.b[k+1] - learning_rate * grad_b[k+1]
            
      
         #loss    
      loss_train.append(loss_per_point/X_train.shape[1])

      #accuracy 
      Y_pred_final = self.predict(self.X_train,self.num_examples_train)
      acc_train.append(self.accuracy(Y_train, Y_pred_final, num_examples))
      acc_val.append(self.accuracy(self.Y_val, self.predict(self.X_val, self.num_examples_val), self.num_examples_val))

      print("epoch:",epoch,"training loss:",loss_train[epoch],"training accuracy",acc_train[epoch],"validation accuracy",acc_val[epoch])

      wandb.log({'epoch':epoch,'training_loss':loss_train[epoch],'training_accuracy':acc_train[epoch], 'validation_accuracy':acc_val[epoch] })


    return Y_pred_final,acc_val



  def mgd(self,epochs,num_examples,batch_size,learning_rate, lamda):
    #gamma = min(1 - 2 ** (-1 - np.log((epoch / 250.0) + 1, 2)), gamma)
    gamma=0.9
    eta=learning_rate
    loss_train = []
    acc_train = []
    acc_val = []
    del_w={}
    del_b={}
    prev_update_w={}
    prev_update_b={}
    update_w={}
    update_b={}
    
    X_train = self.X_train[:, :num_examples]
    Y_train = self.Y_train[:, :num_examples] 
    num_layers=len(self.layers)

    for k in range(num_layers-1):
      prev_update_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
      prev_update_b[k+1] =np.zeros((self.layers[k + 1], 1))

    for k in range(num_layers-1):
      update_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
      update_b[k+1] =np.zeros((self.layers[k + 1], 1))
    num_points_seen = 0
    for epoch in range(epochs):
      
      loss_per_point=0
      #initializing del_w and del_b
      
      for k in range(num_layers-1):
        del_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
        del_b[k+1] =np.zeros((self.layers[k + 1], 1))
      
      

      for i in range(X_train.shape[1]):
        Y,H,A = self.forwardPropagate(self.X_train[:,i].reshape(784,1), self.W, self.b) 
        grad_w, grad_b = self.backPropagate(Y,H,A,self.Y_train[:,i].reshape(10,1))
        
        
        #add grad of weights and biases to get del_w and del_b
        for k in range(num_layers-1):
          del_w[k+1]=del_w[k+1] + grad_w[k+1]
          del_b[k+1]=del_b[k+1] + grad_b[k+1]

        loss_per_point= loss_per_point+ self.cross_entropy_loss(Y_train[:, i].reshape(10, 1), Y )+self.L2_loss(lamda)
      
      
        num_points_seen +=1
      
        if num_points_seen % batch_size == 0:
          #updating history in batch
          for k in range(num_layers-1):
            update_w[k+1]=gamma*prev_update_w[k+1]+eta*del_w[k+1] 
            update_b[k+1]=gamma*prev_update_b[k+1]+eta*del_b[k+1]

          for k in range(num_layers-1):
            self.W[k+1]=self.W[k+1] - learning_rate * update_w[k+1]
            self.b[k+1]=self.b[k+1] - learning_rate * update_b[k+1]

          

          prev_update_w = update_w
          prev_update_b = update_b
          
          for k in range(num_layers-1):
            del_w[k+1] = np.zeros((self.layers[k + 1],self.layers[k]))
            del_b[k+1] =np.zeros((self.layers[k + 1], 1))
      
      #loss    
      loss_train.append(loss_per_point/X_train.shape[1])

      #accuracy 
      Y_pred_final = self.predict(self.X_train,self.num_examples_train)
      acc_train.append(self.accuracy(Y_train, Y_pred_final, num_examples))
      acc_val.append(self.accuracy(self.Y_val, self.predict(self.X_val, self.num_examples_val), self.num_examples_val))

      print("epoch:",epoch,"training loss:",loss_train[epoch],"training accuracy",acc_train[epoch],"validation accuracy",acc_val[epoch])

      wandb.log({'epoch':epoch,'training_loss':loss_train[epoch],'training_accuracy':acc_train[epoch], 'validation_accuracy':acc_val[epoch] })


    return Y_pred_final,acc_val



  def nag(self,epochs,num_examples,batch_size,learning_rate,lamda):
    
    #gamma = min(1 - 2 ** (-1 - np.log((calls / 250.0) + 1, 2)), gamma)
    
    gamma=0.9
    eta=learning_rate
    loss_train = []
    acc_train = []
    acc_val = []
    del_w={}
    del_b={}
    prev_update_w={}
    prev_update_b={}
    update_w={}
    update_b={}
    num_layers=len(self.layers)
    
    X_train = self.X_train[:, :num_examples]
    Y_train = self.Y_train[:, :num_examples]        

    #initalizing prev and latest update
    for k in range(num_layers-1):
      prev_update_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
      prev_update_b[k+1] =np.zeros((self.layers[k + 1], 1))
    
    for k in range(num_layers-1):
      update_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
      update_b[k+1] =np.zeros((self.layers[k + 1], 1))
    num_points_seen = 0
    
    #begin
    for epoch in range(epochs):
      
      loss_per_point=0
      #initializing del_w and del_b
      
      for k in range(num_layers-1):
        del_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
        del_b[k+1] =np.zeros((self.layers[k + 1], 1))
      
      #doing lookahead
      for k in range(num_layers-1):
        update_w[k+1] = gamma*prev_update_w[k+1]
        update_b[k+1] =gamma*prev_update_b[k+1]

      for i in range(X_train.shape[1]):
        
        #look ahead w,b
        for k in range(num_layers-1):
          self.W[k+1]=self.W[k+1]-update_w[k+1]
          self.b[k+1]=self.b[k+1]-update_b[k+1]
          
        Y,H,A = self.forwardPropagate(self.X_train[:,i].reshape(784,1), self.W, self.b) 
        grad_w, grad_b = self.backPropagate(Y,H,A,self.Y_train[:,i].reshape(10,1))
        
        #add grad of weights and biases to get del_w and del_b
        for k in range(num_layers-1):
          del_w[k+1]=del_w[k+1] + grad_w[k+1]
          del_b[k+1]=del_b[k+1] + grad_b[k+1]
    
        loss_per_point= loss_per_point+ self.cross_entropy_loss(Y_train[:, i].reshape(10, 1), Y )+self.L2_loss(lamda)
      
      
        num_points_seen +=1
      
        if num_points_seen % batch_size == 0:
          
          #updating history in batch
          for k in range(num_layers-1):
            update_w[k+1]=gamma*prev_update_w[k+1]+eta*del_w[k+1] 
            update_b[k+1]=gamma*prev_update_b[k+1]+eta*del_b[k+1]

          for k in range(num_layers-1):
            self.W[k+1]=self.W[k+1] - eta * update_w[k+1]
            self.b[k+1]=self.b[k+1] - eta * update_b[k+1]

          

          prev_update_w = update_w
          prev_update_b = update_b
          for k in range(num_layers-1):
            del_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
            del_b[k+1] =np.zeros((self.layers[k + 1], 1))
      
          
      #loss    
      loss_train.append(loss_per_point/X_train.shape[1])

      #accuracy 
      Y_pred_final = self.predict(self.X_train,self.num_examples_train)
      acc_train.append(self.accuracy(Y_train, Y_pred_final, num_examples))
      acc_val.append(self.accuracy(self.Y_val, self.predict(self.X_val, self.num_examples_val), self.num_examples_val))

      print("epoch:",epoch,"training loss:",loss_train[epoch],"training accuracy",acc_train[epoch],"validation accuracy",acc_val[epoch])

      wandb.log({'epoch':epoch,'training_loss':loss_train[epoch],'training_accuracy':acc_train[epoch], 'validation_accuracy':acc_val[epoch] })


    return Y_pred_final,acc_val

  def rmsprop(self,epochs,num_examples, batch_size, learning_rate, lamda):
    
    eta=learning_rate
    loss_train = []
    acc_train = []
    acc_val = []
    del_w={}
    del_b={}
    
    update_w={}
    update_b={}
    eps=10**(-8)
    beta=0.9

    num_layers=len(self.layers)
    X_train = self.X_train[:, :num_examples]
    Y_train = self.Y_train[:, :num_examples]


    #initialize update
    for k in range(num_layers-1):
      update_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
      update_b[k+1] =np.zeros((self.layers[k + 1], 1))
    
    num_points_seen = 0
    
    for epoch in range(epochs):
      
      loss_per_point=0
      #initializing del_w and del_b
      
      for k in range(num_layers-1):
        del_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
        del_b[k+1] =np.zeros((self.layers[k + 1], 1))
      
      

      for i in range(X_train.shape[1]):
        Y,H,A = self.forwardPropagate(self.X_train[:,i].reshape(784,1), self.W, self.b) 
        grad_w, grad_b = self.backPropagate(Y,H,A,self.Y_train[:,i].reshape(10,1))
        
        #add grad of weights and biases to get del_w and del_b
        for k in range(num_layers-1):
          del_w[k+1]=del_w[k+1] + grad_w[k+1]
          del_b[k+1]=del_b[k+1] + grad_b[k+1]
    
        loss_per_point= loss_per_point+ self.cross_entropy_loss(Y_train[:, i].reshape(10, 1), Y )+self.L2_loss(lamda)
      
      
        num_points_seen +=1
      
        if num_points_seen % batch_size == 0:
          #updating history in batch
          for k in range(num_layers-1):
            update_w[k+1]=beta*update_w[k+1]+ (1-beta)*((del_w[k+1])**2) 
            update_b[k+1]=beta*update_b[k+1]+ (1-beta)*((del_b[k+1])**2)

          for k in range(num_layers-1):
            self.W[k+1]=self.W[k+1] - ((eta)/(np.sqrt(update_w[k+1]+eps)))*del_w[k+1]
            self.b[k+1]=self.b[k+1] - ((eta)/(np.sqrt(update_b[k+1]+eps)))*del_b[k+1]

        
          for k in range(num_layers-1):
            del_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
            del_b[k+1] =np.zeros((self.layers[k + 1], 1))
      
          
              
      #loss    
      loss_train.append(loss_per_point/X_train.shape[1])

      #accuracy 
      Y_pred_final = self.predict(self.X_train,self.num_examples_train)
      acc_train.append(self.accuracy(Y_train, Y_pred_final, num_examples))
      acc_val.append(self.accuracy(self.Y_val, self.predict(self.X_val, self.num_examples_val), self.num_examples_val))

      print("epoch:",epoch,"training loss:",loss_train[epoch],"training accuracy",acc_train[epoch],"validation accuracy",acc_val[epoch])

      wandb.log({'epoch':epoch,'training_loss':loss_train[epoch],'training_accuracy':acc_train[epoch], 'validation_accuracy':acc_val[epoch] })

    return Y_pred_final,acc_val

  
  
  def adam(self, epochs,num_examples, batch_size, learning_rate, lamda):
        
    
    X_train = self.X_train[:, :num_examples]
    Y_train = self.Y_train[:, :num_examples]        
    num_layers=len(self.layers)
    

    eta=learning_rate
    loss_train = []
    acc_train = []
    acc_val = []
    del_w={}
    del_b={}
    
    m_update_w={}
    m_update_b={}

    m_bias_corr_update_w={}
    m_bias_corr_update_b={}
    
    v_update_w={}
    v_update_b={}

    v_bias_corr_update_w={}
    v_bias_corr_update_b={}
    
    eps=10**(-8)
    beta1=0.9
    beta2=0.95

    
    #initialize update
    for k in range(num_layers-1):
      m_update_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
      m_update_b[k+1] =np.zeros((self.layers[k + 1], 1))

      m_bias_corr_update_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
      m_bias_corr_update_b[k+1] =np.zeros((self.layers[k + 1], 1))

      v_update_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
      v_update_b[k+1] =np.zeros((self.layers[k + 1], 1))

      v_bias_corr_update_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
      v_bias_corr_update_b[k+1] =np.zeros((self.layers[k + 1], 1))

    num_points_seen = 0
    
    for epoch in range(epochs):
      
      loss_per_point=0
      #initializing del_w and del_b
      
      for k in range(num_layers-1):
        del_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
        del_b[k+1] =np.zeros((self.layers[k + 1], 1))
      
      

      for i in range(X_train.shape[1]):
        Y,H,A = self.forwardPropagate(self.X_train[:,i].reshape(784,1), self.W, self.b) 
        grad_w, grad_b = self.backPropagate(Y,H,A,self.Y_train[:,i].reshape(10,1))
        
        #add grad of weights and biases to get del_w and del_b
        for k in range(num_layers-1):
          del_w[k+1]=del_w[k+1] + grad_w[k+1]
          del_b[k+1]=del_b[k+1] + grad_b[k+1]
    
        loss_per_point= loss_per_point+ self.cross_entropy_loss(Y_train[:, i].reshape(10, 1), Y )+self.L2_loss(lamda)
      
      
        num_points_seen +=1
      
        if num_points_seen % batch_size == 0:
          #updating history in batch
          for k in range(num_layers-1):
            
            m_update_w[k+1]=beta1*m_update_w[k+1]+ (1-beta1)*(del_w[k+1]) 
            m_update_b[k+1]=beta1*v_update_b[k+1]+ (1-beta1)*(del_b[k+1])
            
            v_update_w[k+1]=beta2*v_update_w[k+1]+ (1-beta2)*((del_w[k+1])**2) 
            v_update_b[k+1]=beta2*v_update_b[k+1]+ (1-beta2)*((del_b[k+1])**2)

            #beta**epoch+1 as epoch=0 it will be inf
            m_bias_corr_update_w[k+1]=m_update_w[k+1]/(1-((beta1)**(epoch+1)))
            m_bias_corr_update_b[k+1]=m_update_b[k+1]/(1-((beta1)**(epoch+1)))

            v_bias_corr_update_w[k+1]=v_update_w[k+1]/(1-((beta2)**(epoch+1)))
            v_bias_corr_update_b[k+1]=v_update_b[k+1]/(1-((beta2)**(epoch+1)))

          for k in range(num_layers-1):
            self.W[k+1]=self.W[k+1] - ((m_bias_corr_update_w[k+1])*(eta))/(np.sqrt(v_bias_corr_update_w[k+1]+eps))
            self.b[k+1]=self.b[k+1] - ((m_bias_corr_update_b[k+1])*(eta))/(np.sqrt(v_bias_corr_update_b[k+1]+eps))

        
          for k in range(num_layers-1):
            del_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
            del_b[k+1] =np.zeros((self.layers[k + 1], 1))
      

      #loss    
      loss_train.append(loss_per_point/X_train.shape[1])

      #accuracy 
      Y_pred_final = self.predict(self.X_train,self.num_examples_train)
      acc_train.append(self.accuracy(Y_train, Y_pred_final, num_examples))
      acc_val.append(self.accuracy(self.Y_val, self.predict(self.X_val, self.num_examples_val), self.num_examples_val))

      print("epoch:",epoch,"training loss:",loss_train[epoch],"training accuracy",acc_train[epoch],"validation accuracy",acc_val[epoch])

      wandb.log({'epoch':epoch,'training_loss':loss_train[epoch],'training_accuracy':acc_train[epoch], 'validation_accuracy':acc_val[epoch] })



    return Y_pred_final,acc_val


  def nadam(self, epochs,num_examples, batch_size, learning_rate, lamda ):
        
    
    X_train = self.X_train[:, :num_examples]
    Y_train = self.Y_train[:, :num_examples]        
    num_layers=len(self.layers)
    

    eta=learning_rate
    loss_train = []
    acc_train = []
    acc_val = []
    del_w={}
    del_b={}
    
    m_update_w={}
    m_update_b={}

    m_bias_corr_update_w={}
    m_bias_corr_update_b={}
    
    v_update_w={}
    v_update_b={}

    v_bias_corr_update_w={}
    v_bias_corr_update_b={}
    
    eps=10**(-8)
    beta1=0.9
    beta2=0.95

    
    #initialize update
    for k in range(num_layers-1):
      m_update_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
      m_update_b[k+1] =np.zeros((self.layers[k + 1], 1))

      m_bias_corr_update_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
      m_bias_corr_update_b[k+1] =np.zeros((self.layers[k + 1], 1))

      v_update_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
      v_update_b[k+1] =np.zeros((self.layers[k + 1], 1))

      v_bias_corr_update_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
      v_bias_corr_update_b[k+1] =np.zeros((self.layers[k + 1], 1))

    num_points_seen = 0
    
    for epoch in range(epochs):
      
      loss_per_point=0
      #initializing del_w and del_b
      
      for k in range(num_layers-1):
        del_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
        del_b[k+1] =np.zeros((self.layers[k + 1], 1))
      
      

      for i in range(X_train.shape[1]):
        Y,H,A = self.forwardPropagate(self.X_train[:,i].reshape(784,1), self.W, self.b) 
        grad_w, grad_b = self.backPropagate(Y,H,A,self.Y_train[:,i].reshape(10,1))
        
        #add grad of weights and biases to get del_w and del_b
        for k in range(num_layers-1):
          del_w[k+1]=del_w[k+1] + grad_w[k+1]
          del_b[k+1]=del_b[k+1] + grad_b[k+1]
    
        loss_per_point= loss_per_point+ self.cross_entropy_loss(Y_train[:, i].reshape(10, 1), Y )+self.L2_loss(lamda)
      
      
        num_points_seen +=1
      
        if num_points_seen % batch_size == 0:
          #updating history in batch
          for k in range(num_layers-1):
            
            m_update_w[k+1]=beta1*m_update_w[k+1]+ (1-beta1)*(del_w[k+1]) 
            m_update_b[k+1]=beta1*v_update_b[k+1]+ (1-beta1)*(del_b[k+1])
            
            v_update_w[k+1]=beta2*v_update_w[k+1]+ (1-beta2)*((del_w[k+1])**2) 
            v_update_b[k+1]=beta2*v_update_b[k+1]+ (1-beta2)*((del_b[k+1])**2)

            #beta**epoch+1 as epoch=0 it will be inf
            m_bias_corr_update_w[k+1]=m_update_w[k+1]/(1-((beta1)**(epoch+1)))
            m_bias_corr_update_b[k+1]=m_update_b[k+1]/(1-((beta1)**(epoch+1)))

            v_bias_corr_update_w[k+1]=v_update_w[k+1]/(1-((beta2)**(epoch+1)))
            v_bias_corr_update_b[k+1]=v_update_b[k+1]/(1-((beta2)**(epoch+1)))

            
          for k in range(num_layers-1):
            self.W[k+1]=self.W[k+1] - ((beta1*m_bias_corr_update_w[k+1]+((1-beta1)/(1-(beta1)**(epoch+1)))*del_w[k+1])*(eta))/((np.sqrt(v_bias_corr_update_w[k+1]+eps)))
            self.b[k+1]=self.b[k+1] - ((beta1*m_bias_corr_update_b[k+1]+((1-beta1)/(1-(beta1)**(epoch+1)))*del_b[k+1])*(eta)/(np.sqrt(v_bias_corr_update_b[k+1]+eps)))

        
          for k in range(num_layers-1):
            del_w[k+1] = np.zeros((self.layers[k + 1], self.layers[k]))
            del_b[k+1] =np.zeros((self.layers[k + 1], 1))
      

      #loss    
      loss_train.append(loss_per_point/X_train.shape[1])

      #accuracy 
      Y_pred_final = self.predict(self.X_train,self.num_examples_train)
      acc_train.append(self.accuracy(Y_train, Y_pred_final, num_examples))
      acc_val.append(self.accuracy(self.Y_val, self.predict(self.X_val, self.num_examples_val), self.num_examples_val))

      print("epoch:",epoch,"training loss:",loss_train[epoch],"training accuracy",acc_train[epoch],"validation accuracy",acc_val[epoch])

      wandb.log({'epoch':epoch,'training_loss':loss_train[epoch],'training_accuracy':acc_train[epoch], 'validation_accuracy':acc_val[epoch] })



    return Y_pred_final,acc_val





In [14]:
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit: ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [15]:
sweep_config = {"name":"Bayesian Sweep",
    'method':'bayes'
    }
metric_dict={"name":"validation_accuracy","goal":"maximize"}
sweep_config['metric'] = metric_dict

parameters_dict = {
    'optimizer': {
        'values': ['sgd','mgd','nag','rmsprop','adam','nadam']
        },
    'batch_size': {
        'values': [16, 32, 64]
        },
    'max_epochs': {
          'values': [5, 10]
        },
    'initializer': {
            'values': ["random", "xavier"]
        },

    'num_hidden_layers': {
            'values': [3, 4, 5]
        },
    'num_hidden_neurons':{'values':[32,64,128]
                          },

    'learning_rate':{'values':[0.001,0.0001]
                     },

    'lamda':{'values':[0,0.5,0.0005]
             },
    
    'activation': {
            'values': ['tanh','relu','sigmoid']
        }
        

    }
sweep_config['parameters'] = parameters_dict

In [16]:
sweep_id = wandb.sweep(sweep_config,project='CS6910_Deeplearning_Assignment1')

Create sweep with ID: q02io49m
Sweep URL: https://wandb.ai/dl22/CS6910_Deeplearning_Assignment1/sweeps/q02io49m


In [17]:
def train_nw():
  config_dict ={"max_epochs":10,
            "num_hidden_layers":3,
            "num_hidden_neurons":32,
            "lamda":0.0005,
            "learning_rate":0.001,
            "optimizer":"rmsprop",
            "batch_size":32,
            "activation":"tanh",
            "initializer":"random",
            "loss":"cross",
           }


  wandb.init(config = config_dict)

  wandb.run.name="hl_"+str(wandb.config.num_hidden_layers)+"_hn_"+str(wandb.config.num_hidden_neurons)+"_lr_"+str(wandb.config.learning_rate)+ "_opt_"+str(wandb.config.optimizer)+"_act_"+str(wandb.config.activation)+"_in_"+str(wandb.config.initializer)+"_bs_"+str(wandb.config.batch_size)
  
  nn =neural_network2(num_hidden_layers=wandb.config.num_hidden_layers,
        num_hidden_neurons=wandb.config.num_hidden_neurons,
        X_train=X_train,
        Y_train=Y_train,
        num_examples_train = num_examples_train,
        X_val = X_val,
        Y_val = Y_val,
        num_examples_val = num_examples_val,
        optimizer = wandb.config.optimizer,
        batch_size = wandb.config.batch_size,
        lamda = wandb.config.lamda,
        learning_rate = wandb.config.learning_rate,
        max_epochs = wandb.config.max_epochs,
        activation = wandb.config.activation,
        initializer = wandb.config.initializer,
        loss = wandb.config.loss
        )

  Y_pred_train,validation_accuracy = nn.optimizer(nn.max_epochs, nn.num_examples_train, nn.batch_size, nn.learning_rate,nn.lamda)

In [None]:
wandb.agent(sweep_id,train_nw, count = 100)

[34m[1mwandb[0m: Agent Starting Run: cmxqti96 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.0005
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	num_hidden_neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd


epoch: 0 training loss: 2.260094207209976 training accuracy 0.4602962962962963 validation accuracy 0.4558333333333333
epoch: 1 training loss: 1.6643436481218974 training accuracy 0.5626851851851852 validation accuracy 0.5585
epoch: 2 training loss: 1.1027806530685766 training accuracy 0.6308333333333334 validation accuracy 0.6265
epoch: 3 training loss: 0.9340494372328215 training accuracy 0.6634074074074074 validation accuracy 0.6608333333333334
epoch: 4 training loss: 0.8430440742166352 training accuracy 0.6923518518518519 validation accuracy 0.6903333333333334


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,█▅▂▁▁
validation_accuracy,▁▄▆▇█

0,1
epoch,4.0
training_accuracy,0.69235
training_loss,0.84304
validation_accuracy,0.69033


[34m[1mwandb[0m: Agent Starting Run: 9ygtfg5s with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	initializer: random
[34m[1mwandb[0m: 	lamda: 0.0005
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	num_hidden_neurons: 32
[34m[1mwandb[0m: 	optimizer: nag




VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

[34m[1mwandb[0m: [32m[41mERROR[0m Run 9ygtfg5s errored: ValueError('The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()')
[34m[1mwandb[0m: Agent Starting Run: vfmv74yi with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: random
[34m[1mwandb[0m: 	lamda: 0
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	optimizer: mgd


epoch: 0 training loss: 13.429527957605757 training accuracy 0.1565740740740741 validation accuracy 0.15633333333333332
epoch: 1 training loss: 10.994187143859417 training accuracy 0.20287037037037037 validation accuracy 0.194
epoch: 2 training loss: 9.438957879708495 training accuracy 0.23427777777777778 validation accuracy 0.23216666666666666
epoch: 3 training loss: 8.528041841700183 training accuracy 0.2697222222222222 validation accuracy 0.26516666666666666
epoch: 4 training loss: 7.760953925163277 training accuracy 0.3073518518518519 validation accuracy 0.30233333333333334


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▃▅▆█
training_accuracy,▁▃▅▆█
training_loss,█▅▃▂▁
validation_accuracy,▁▃▅▆█

0,1
epoch,4.0
training_accuracy,0.30735
training_loss,7.76095
validation_accuracy,0.30233


[34m[1mwandb[0m: Agent Starting Run: b8vimoxv with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: random
[34m[1mwandb[0m: 	lamda: 0.0005
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	optimizer: nag


epoch: 0 training loss: 47.428174027064834 training accuracy 0.10011111111111111 validation accuracy 0.099
epoch: 1 training loss: 48.88786797707026 training accuracy 0.10048148148148148 validation accuracy 0.09566666666666666
epoch: 2 training loss: 48.33901436855499 training accuracy 0.09966666666666667 validation accuracy 0.103
epoch: 3 training loss: 48.75957008206072 training accuracy 0.10031481481481481 validation accuracy 0.09716666666666667
epoch: 4 training loss: 47.79214959213632 training accuracy 0.09972222222222223 validation accuracy 0.1025
epoch: 5 training loss: 48.63119739352201 training accuracy 0.10031481481481481 validation accuracy 0.09716666666666667
epoch: 6 training loss: 48.56082962819626 training accuracy 0.09966666666666667 validation accuracy 0.103
epoch: 7 training loss: 48.45423133449506 training accuracy 0.10025925925925926 validation accuracy 0.09766666666666667
epoch: 8 training loss: 49.13227722525593 training accuracy 0.10068518518518518 validation acc

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▄▇▁▅▁▅▁▅█▄
training_loss,▁▇▅▆▂▆▆▅█▂
validation_accuracy,▅▂█▄█▄█▄▁▅

0,1
epoch,9.0
training_accuracy,0.10011
training_loss,47.58665
validation_accuracy,0.099


[34m[1mwandb[0m: Agent Starting Run: p7qwjebu with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: random
[34m[1mwandb[0m: 	lamda: 0.0005
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	num_hidden_neurons: 64
[34m[1mwandb[0m: 	optimizer: mgd


epoch: 0 training loss: 12.412919588397274 training accuracy 0.1005 validation accuracy 0.096
epoch: 1 training loss: 12.360460918187753 training accuracy 0.10124074074074074 validation accuracy 0.09616666666666666
epoch: 2 training loss: 12.308492321645932 training accuracy 0.10174074074074074 validation accuracy 0.097
epoch: 3 training loss: 12.257005900659859 training accuracy 0.10237037037037038 validation accuracy 0.09766666666666667
epoch: 4 training loss: 12.205801246358458 training accuracy 0.10303703703703704 validation accuracy 0.09783333333333333


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▃▅▆█
training_accuracy,▁▃▄▆█
training_loss,█▆▄▃▁
validation_accuracy,▁▂▅▇█

0,1
epoch,4.0
training_accuracy,0.10304
training_loss,12.2058
validation_accuracy,0.09783


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7hq8bmq5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	optimizer: nag


epoch: 0 training loss: 3.834692315731251 training accuracy 0.7898333333333334 validation accuracy 0.781
epoch: 1 training loss: 4.994801018676699 training accuracy 0.7978703703703703 validation accuracy 0.788
epoch: 2 training loss: 6.167682575234458 training accuracy 0.825962962962963 validation accuracy 0.8136666666666666
epoch: 3 training loss: 7.344406170217484 training accuracy 0.8184814814814815 validation accuracy 0.8086666666666666
epoch: 4 training loss: 8.610480978922354 training accuracy 0.8312222222222222 validation accuracy 0.8163333333333334


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▃▅▆█
training_accuracy,▁▂▇▆█
training_loss,▁▃▄▆█
validation_accuracy,▁▂▇▆█

0,1
epoch,4.0
training_accuracy,0.83122
training_loss,8.61048
validation_accuracy,0.81633


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: azf1nah2 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.0005
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	num_hidden_neurons: 32
[34m[1mwandb[0m: 	optimizer: sgd


epoch: 0 training loss: 2.3274751199926578 training accuracy 0.10011111111111111 validation accuracy 0.099
epoch: 1 training loss: 2.304025474904974 training accuracy 0.09540740740740741 validation accuracy 0.094
epoch: 2 training loss: 2.30374418384655 training accuracy 0.0965 validation accuracy 0.095
epoch: 3 training loss: 2.3034497321407525 training accuracy 0.09746296296296296 validation accuracy 0.09633333333333334
epoch: 4 training loss: 2.3031397936408844 training accuracy 0.09824074074074074 validation accuracy 0.09683333333333333


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▃▅▆█
training_accuracy,█▁▃▄▅
training_loss,█▁▁▁▁
validation_accuracy,█▁▂▄▅

0,1
epoch,4.0
training_accuracy,0.09824
training_loss,2.30314
validation_accuracy,0.09683


[34m[1mwandb[0m: Agent Starting Run: aaasnvqn with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: random
[34m[1mwandb[0m: 	lamda: 0.0005
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	optimizer: mgd


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

[34m[1mwandb[0m: [32m[41mERROR[0m Run aaasnvqn errored: ValueError('The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()')
[34m[1mwandb[0m: Agent Starting Run: hhyp08m1 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: random
[34m[1mwandb[0m: 	lamda: 0.0005
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	optimizer: mgd


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

[34m[1mwandb[0m: [32m[41mERROR[0m Run hhyp08m1 errored: ValueError('The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()')
[34m[1mwandb[0m: Agent Starting Run: u1osde77 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	num_hidden_neurons: 32
[34m[1mwandb[0m: 	optimizer: rmsprop


epoch: 0 training loss: 3.327396598922249 training accuracy 0.8346296296296296 validation accuracy 0.8246666666666667
epoch: 1 training loss: 4.137447794076206 training accuracy 0.8616481481481482 validation accuracy 0.8468333333333333
epoch: 2 training loss: 4.951396125830348 training accuracy 0.8601296296296296 validation accuracy 0.8458333333333333
epoch: 3 training loss: 5.823508833064329 training accuracy 0.8628703703703704 validation accuracy 0.8483333333333334
epoch: 4 training loss: 6.727168926721321 training accuracy 0.8712592592592593 validation accuracy 0.8566666666666667


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▃▅▆█
training_accuracy,▁▆▆▆█
training_loss,▁▃▄▆█
validation_accuracy,▁▆▆▆█

0,1
epoch,4.0
training_accuracy,0.87126
training_loss,6.72717
validation_accuracy,0.85667


[34m[1mwandb[0m: Agent Starting Run: gzz6fn6e with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	optimizer: adam


epoch: 0 training loss: 2.8778353058661823 training accuracy 0.8511481481481481 validation accuracy 0.8403333333333334
epoch: 1 training loss: 2.880735407109476 training accuracy 0.8728148148148148 validation accuracy 0.8606666666666667
epoch: 2 training loss: 2.942931649917265 training accuracy 0.8833333333333333 validation accuracy 0.8703333333333333
epoch: 3 training loss: 3.0088212222744906 training accuracy 0.8903518518518518 validation accuracy 0.8765
epoch: 4 training loss: 3.075629522847597 training accuracy 0.8961111111111111 validation accuracy 0.881


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,▁▁▃▆█
validation_accuracy,▁▅▆▇█

0,1
epoch,4.0
training_accuracy,0.89611
training_loss,3.07563
validation_accuracy,0.881


[34m[1mwandb[0m: Agent Starting Run: m30qfqea with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	num_hidden_neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam


epoch: 0 training loss: 5.0394507219464595 training accuracy 0.8395 validation accuracy 0.8256666666666667
epoch: 1 training loss: 8.408696279013578 training accuracy 0.851462962962963 validation accuracy 0.8358333333333333
epoch: 2 training loss: 11.299874732733501 training accuracy 0.8588703703703704 validation accuracy 0.8455
epoch: 3 training loss: 14.136299611567043 training accuracy 0.8632777777777778 validation accuracy 0.8496666666666667
epoch: 4 training loss: 17.013531800161985 training accuracy 0.8644074074074074 validation accuracy 0.8481666666666666


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▃▅▆█
training_accuracy,▁▄▆██
training_loss,▁▃▅▆█
validation_accuracy,▁▄▇██

0,1
epoch,4.0
training_accuracy,0.86441
training_loss,17.01353
validation_accuracy,0.84817


[34m[1mwandb[0m: Agent Starting Run: 6lr41cib with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	num_hidden_neurons: 32
[34m[1mwandb[0m: 	optimizer: adam


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

[34m[1mwandb[0m: [32m[41mERROR[0m Run 6lr41cib errored: ValueError('The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()')
[34m[1mwandb[0m: Agent Starting Run: 3da9oqci with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.0005
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	num_hidden_neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam


epoch: 0 training loss: 0.5428527540054479 training accuracy 0.8327777777777777 validation accuracy 0.8203333333333334
epoch: 1 training loss: 0.43421089770505467 training accuracy 0.8488888888888889 validation accuracy 0.8313333333333334
epoch: 2 training loss: 0.4105840635904732 training accuracy 0.8572037037037037 validation accuracy 0.8455
epoch: 3 training loss: 0.40096643618020594 training accuracy 0.8601481481481481 validation accuracy 0.8391666666666666
epoch: 4 training loss: 0.3985041241227611 training accuracy 0.8687777777777778 validation accuracy 0.85
epoch: 5 training loss: 0.39496733606377615 training accuracy 0.8679629629629629 validation accuracy 0.8506666666666667
epoch: 6 training loss: 0.39379810937173554 training accuracy 0.8657592592592592 validation accuracy 0.8456666666666667
epoch: 7 training loss: 0.3946307310876789 training accuracy 0.8712407407407408 validation accuracy 0.8538333333333333
epoch: 8 training loss: 0.3984876293280705 training accuracy 0.8749814

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▄▅▆▇▇▆▇█▇
training_loss,█▃▂▁▁▁▁▁▁▁
validation_accuracy,▁▃▆▅▇▇▆███

0,1
epoch,9.0
training_accuracy,0.87167
training_loss,0.39957
validation_accuracy,0.85433


[34m[1mwandb[0m: Agent Starting Run: oj195y3d with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.0005
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam


epoch: 0 training loss: 0.49081176461881587 training accuracy 0.8552407407407407 validation accuracy 0.8438333333333333
epoch: 1 training loss: 0.37183083255862226 training accuracy 0.8760925925925926 validation accuracy 0.8636666666666667
epoch: 2 training loss: 0.34014523314563755 training accuracy 0.8854814814814815 validation accuracy 0.8728333333333333
epoch: 3 training loss: 0.3195569977658575 training accuracy 0.8919444444444444 validation accuracy 0.8771666666666667
epoch: 4 training loss: 0.3041774548047438 training accuracy 0.8966666666666666 validation accuracy 0.8801666666666667
epoch: 5 training loss: 0.2917797812545741 training accuracy 0.9003333333333333 validation accuracy 0.8813333333333333
epoch: 6 training loss: 0.281352070863022 training accuracy 0.9036481481481482 validation accuracy 0.8838333333333334
epoch: 7 training loss: 0.2723405050075621 training accuracy 0.9068703703703703 validation accuracy 0.8848333333333334
epoch: 8 training loss: 0.264409120285022 trai

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▄▅▆▆▇▇▇██
training_loss,█▄▃▃▂▂▂▁▁▁
validation_accuracy,▁▄▆▆▇▇▇▇██

0,1
epoch,9.0
training_accuracy,0.91119
training_loss,0.25732
validation_accuracy,0.88867


[34m[1mwandb[0m: Agent Starting Run: 1s3gz56u with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.0005
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	num_hidden_neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam


epoch: 0 training loss: 0.5262792931400506 training accuracy 0.8313148148148148 validation accuracy 0.8238333333333333
epoch: 1 training loss: 0.42962222885787743 training accuracy 0.8497407407407407 validation accuracy 0.844
epoch: 2 training loss: 0.40850787357194956 training accuracy 0.8638703703703704 validation accuracy 0.85
epoch: 3 training loss: 0.39969139217780664 training accuracy 0.8583333333333333 validation accuracy 0.8493333333333334
epoch: 4 training loss: 0.3933145541735382 training accuracy 0.860537037037037 validation accuracy 0.8413333333333334
epoch: 5 training loss: 0.3875646550057923 training accuracy 0.8653888888888889 validation accuracy 0.8491666666666666
epoch: 6 training loss: 0.38778292220864086 training accuracy 0.8689259259259259 validation accuracy 0.851
epoch: 7 training loss: 0.38993391734149446 training accuracy 0.8692592592592593 validation accuracy 0.8526666666666667
epoch: 8 training loss: 0.3910971519303885 training accuracy 0.8728333333333333 vali

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▄▆▆▆▇▇▇██
training_loss,█▃▂▂▁▁▁▁▁▁
validation_accuracy,▁▆▇▇▅▇████

0,1
epoch,9.0
training_accuracy,0.8722
training_loss,0.39261
validation_accuracy,0.85183


[34m[1mwandb[0m: Agent Starting Run: ofultrkb with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.0005
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

[34m[1mwandb[0m: [32m[41mERROR[0m Run ofultrkb errored: ValueError('The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()')
[34m[1mwandb[0m: Agent Starting Run: ql4dj79x with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.0005
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	num_hidden_neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam


epoch: 0 training loss: 0.5365475331208066 training accuracy 0.8380555555555556 validation accuracy 0.8283333333333334
epoch: 1 training loss: 0.4372193460115419 training accuracy 0.8548703703703704 validation accuracy 0.8438333333333333
epoch: 2 training loss: 0.41269612524072763 training accuracy 0.8533518518518518 validation accuracy 0.8436666666666667
epoch: 3 training loss: 0.4048515654839397 training accuracy 0.8688703703703704 validation accuracy 0.854
epoch: 4 training loss: 0.40032230928847296 training accuracy 0.8644444444444445 validation accuracy 0.8516666666666667


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▃▅▆█
training_accuracy,▁▅▄█▇
training_loss,█▃▂▁▁
validation_accuracy,▁▅▅█▇

0,1
epoch,4.0
training_accuracy,0.86444
training_loss,0.40032
validation_accuracy,0.85167


[34m[1mwandb[0m: Agent Starting Run: u54bnqqx with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam


epoch: 0 training loss: 2.9015034946616454 training accuracy 0.8514444444444444 validation accuracy 0.8373333333333334
epoch: 1 training loss: 2.905289141560185 training accuracy 0.8707407407407407 validation accuracy 0.8591666666666666
epoch: 2 training loss: 2.966211558604085 training accuracy 0.8812962962962962 validation accuracy 0.8685
epoch: 3 training loss: 3.0320850033794704 training accuracy 0.8889074074074074 validation accuracy 0.8735
epoch: 4 training loss: 3.0998330589780263 training accuracy 0.8943703703703704 validation accuracy 0.876
epoch: 5 training loss: 3.168535979537187 training accuracy 0.899537037037037 validation accuracy 0.8781666666666667
epoch: 6 training loss: 3.238097140351863 training accuracy 0.9034814814814814 validation accuracy 0.8808333333333334
epoch: 7 training loss: 3.3085788705326715 training accuracy 0.9060370370370371 validation accuracy 0.8835
epoch: 8 training loss: 3.3799920561223358 training accuracy 0.9081481481481481 validation accuracy 0.

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▅▅▆▇▇███
training_loss,▁▁▂▃▄▄▅▆▇█
validation_accuracy,▁▄▆▆▇▇▇███

0,1
epoch,9.0
training_accuracy,0.90976
training_loss,3.45237
validation_accuracy,0.88467


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: n9g87ib5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.0005
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	num_hidden_neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam


epoch: 0 training loss: 0.5452926663504182 training accuracy 0.8296296296296296 validation accuracy 0.8216666666666667
epoch: 1 training loss: 0.4381069925548327 training accuracy 0.8504444444444444 validation accuracy 0.8408333333333333
epoch: 2 training loss: 0.41354179922833045 training accuracy 0.8640555555555556 validation accuracy 0.8508333333333333
epoch: 3 training loss: 0.4020221438142 training accuracy 0.8571111111111112 validation accuracy 0.8403333333333334
epoch: 4 training loss: 0.39740244640243594 training accuracy 0.8599629629629629 validation accuracy 0.8445
epoch: 5 training loss: 0.3950625935191104 training accuracy 0.8680925925925926 validation accuracy 0.8476666666666667
epoch: 6 training loss: 0.39535929355108784 training accuracy 0.8659814814814815 validation accuracy 0.8491666666666666
epoch: 7 training loss: 0.39692518394190207 training accuracy 0.8637037037037038 validation accuracy 0.8426666666666667
epoch: 8 training loss: 0.3985461116565933 training accurac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▄▇▅▆▇▇▆▇█
training_loss,█▃▂▁▁▁▁▁▁▁
validation_accuracy,▁▅█▅▆▇▇▆▇█

0,1
epoch,9.0
training_accuracy,0.873
training_loss,0.40484
validation_accuracy,0.8525


[34m[1mwandb[0m: Agent Starting Run: andpl1z0 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.0005
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	num_hidden_neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam


epoch: 0 training loss: 0.48966462200192895 training accuracy 0.8391851851851851 validation accuracy 0.8246666666666667
epoch: 1 training loss: 0.38467428385678787 training accuracy 0.8676481481481482 validation accuracy 0.8561666666666666
epoch: 2 training loss: 0.34758123180272926 training accuracy 0.8796296296296297 validation accuracy 0.8601666666666666
epoch: 3 training loss: 0.324703293058221 training accuracy 0.8761111111111111 validation accuracy 0.8566666666666667
epoch: 4 training loss: 0.3128231419638018 training accuracy 0.8906111111111111 validation accuracy 0.8673333333333333
epoch: 5 training loss: 0.3019869220781781 training accuracy 0.8911111111111111 validation accuracy 0.8688333333333333
epoch: 6 training loss: 0.2922385320183129 training accuracy 0.896 validation accuracy 0.8666666666666667
epoch: 7 training loss: 0.2848981410477563 training accuracy 0.8973703703703704 validation accuracy 0.8683333333333333
epoch: 8 training loss: 0.2789417609788422 training accurac

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▄▆▅▇▇████
training_loss,█▅▃▃▂▂▂▁▁▁
validation_accuracy,▁▆▇▆█████▇

0,1
epoch,9.0
training_accuracy,0.89431
training_loss,0.27319
validation_accuracy,0.866


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: yecoxwn6 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	num_hidden_neurons: 32
[34m[1mwandb[0m: 	optimizer: adam


epoch: 0 training loss: 4.482745721401763 training accuracy 0.8522222222222222 validation accuracy 0.8391666666666666
epoch: 1 training loss: 6.919280833883999 training accuracy 0.863462962962963 validation accuracy 0.8525
epoch: 2 training loss: 9.23782176222265 training accuracy 0.8630185185185185 validation accuracy 0.8493333333333334
epoch: 3 training loss: 11.690615626960097 training accuracy 0.863462962962963 validation accuracy 0.8485
epoch: 4 training loss: 14.360023255591285 training accuracy 0.8639814814814815 validation accuracy 0.8473333333333334
epoch: 5 training loss: 17.212397057439368 training accuracy 0.8624814814814815 validation accuracy 0.8446666666666667
epoch: 6 training loss: 20.17993952399157 training accuracy 0.8669074074074075 validation accuracy 0.8473333333333334
epoch: 7 training loss: 23.208520019353397 training accuracy 0.8662407407407408 validation accuracy 0.8418333333333333
epoch: 8 training loss: 26.363581702203557 training accuracy 0.8690740740740741

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▆▅▆▆▅▇▇██
training_loss,▁▂▂▃▄▅▅▆▇█
validation_accuracy,▁█▆▆▅▄▅▂▄▃

0,1
epoch,9.0
training_accuracy,0.86822
training_loss,29.65475
validation_accuracy,0.84367


[34m[1mwandb[0m: Agent Starting Run: 5urgtpbs with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.0005
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	num_hidden_neurons: 32
[34m[1mwandb[0m: 	optimizer: nadam


epoch: 0 training loss: 0.5161698502224676 training accuracy 0.8451296296296297 validation accuracy 0.8315
epoch: 1 training loss: 0.40630462257125566 training accuracy 0.8652962962962963 validation accuracy 0.8503333333333334
epoch: 2 training loss: 0.38416768786683003 training accuracy 0.8701851851851852 validation accuracy 0.8541666666666666
epoch: 3 training loss: 0.376429874720569 training accuracy 0.8735925925925926 validation accuracy 0.8513333333333334
epoch: 4 training loss: 0.37240855670005474 training accuracy 0.8752037037037037 validation accuracy 0.8565


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▃▅▆█
training_accuracy,▁▆▇██
training_loss,█▃▂▁▁
validation_accuracy,▁▆▇▇█

0,1
epoch,4.0
training_accuracy,0.8752
training_loss,0.37241
validation_accuracy,0.8565


[34m[1mwandb[0m: Agent Starting Run: u8r6khk0 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.0005
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	num_hidden_neurons: 128
[34m[1mwandb[0m: 	optimizer: nadam


epoch: 0 training loss: 0.5740547741840172 training accuracy 0.8291481481481482 validation accuracy 0.8221666666666667
epoch: 1 training loss: 0.4573694895192401 training accuracy 0.8426851851851852 validation accuracy 0.8375
epoch: 2 training loss: 0.42623157291568753 training accuracy 0.8581111111111112 validation accuracy 0.8458333333333333
epoch: 3 training loss: 0.40363650921348104 training accuracy 0.8734259259259259 validation accuracy 0.8608333333333333
epoch: 4 training loss: 0.38750725604647596 training accuracy 0.8717407407407407 validation accuracy 0.8518333333333333


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▃▅▆█
training_accuracy,▁▃▆██
training_loss,█▄▂▂▁
validation_accuracy,▁▄▅█▆

0,1
epoch,4.0
training_accuracy,0.87174
training_loss,0.38751
validation_accuracy,0.85183


[34m[1mwandb[0m: Agent Starting Run: ny9j7uwu with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.0005
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	num_hidden_neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam


epoch: 0 training loss: 0.49020861544563815 training accuracy 0.8433333333333334 validation accuracy 0.8325
epoch: 1 training loss: 0.3852292007340384 training accuracy 0.8653333333333333 validation accuracy 0.8521666666666666
epoch: 2 training loss: 0.3519745960059022 training accuracy 0.8755925925925926 validation accuracy 0.858
epoch: 3 training loss: 0.3316336132760061 training accuracy 0.8852407407407408 validation accuracy 0.869
epoch: 4 training loss: 0.31688761399735593 training accuracy 0.8892962962962963 validation accuracy 0.8695
epoch: 5 training loss: 0.3064342431858479 training accuracy 0.8942037037037037 validation accuracy 0.8708333333333333
epoch: 6 training loss: 0.29604121903314634 training accuracy 0.8949259259259259 validation accuracy 0.8723333333333333
epoch: 7 training loss: 0.2892641877290281 training accuracy 0.9002777777777777 validation accuracy 0.8706666666666667
epoch: 8 training loss: 0.28074580878682465 training accuracy 0.8999259259259259 validation acc

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▄▅▆▆▇▇███
training_loss,█▅▃▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▇▇█████

0,1
epoch,9.0
training_accuracy,0.90224
training_loss,0.27615
validation_accuracy,0.87283


[34m[1mwandb[0m: Agent Starting Run: zzcn62lm with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	initializer: xavier
[34m[1mwandb[0m: 	lamda: 0.5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	num_hidden_neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam


epoch: 0 training loss: 3.1090084170411605 training accuracy 0.8519629629629629 validation accuracy 0.8408333333333333
epoch: 1 training loss: 3.101736670998243 training accuracy 0.8663888888888889 validation accuracy 0.857
