In [None]:
!pip install wandb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from keras.datasets import fashion_mnist, mnist
import numpy as np
import math
import wandb

In [None]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mcs22m059[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [None]:
def process(x) :
  x_proc = x.reshape(len(x), -1)
  x_proc = x_proc.astype('float64')
  x_proc = x_proc / 255.0
  return x_proc

In [None]:
def load_data(dataset = "fashion_mnist"):
  if dataset == "fashion_mnist" :
      (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
  elif dataset == "mnist":
      (x_train, y_train), (x_test, y_test) = mnist.load_data()
  
  x_train, x_valid = x_train[:int(len(x_train) * 0.9)], x_train[int(len(x_train) * 0.9):]
  y_train, y_valid = y_train[:int(len(y_train) * 0.9)], y_train[int(len(y_train) * 0.9):]

  x_train = process(x_train)
  x_valid = process(x_valid)
  x_test = process(x_test) 

  k = 10
  y_train = np.eye(k)[y_train] # one-hot
  y_valid = np.eye(k)[y_valid]
  y_test = np.eye(k)[y_test]
  
  return x_train, y_train, x_valid, y_valid, x_test, y_test

In [None]:
def sigmoid(x) :
  return 1. / (1. + np.exp(-x))

def tanh(x) :
  return (2. / (1. + np.exp(-2.*x))) - 1.

def relu(x) : # do not use relu with random
  return np.where(x >= 0, x, 0.)

def softmax(x) :
  x = x - np.max(x, axis=0)
  y = np.exp(x)
  return y / y.sum(axis=0)

In [None]:
class my_nn :

  def __init__(self, n_feature = 784, n_class = 10, nhl = 1, sz = 4, weight_init = "random", act_fun = "sigmoid", loss = "cross_entropy", 
               epochs = 1, b_sz = 4, optimizer = "sgd", lr = 0.1, mom = 0.9, beta = 0.9, beta1 = 0.9, beta2 = 0.999, epsilon = 0.000001, w_d = 0.005) :
    self.n_feature = n_feature
    self.n_class = n_class
    self.nhl = nhl
    self.L = nhl + 1
    self.sz = sz
    self.weight_init = weight_init
    self.act_fun = act_fun
    self.loss = loss
    self.epochs = epochs
    self.b_sz = b_sz
    self.optimizer = optimizer
    self.lr = lr
    self.mom = mom
    self.beta = beta
    self.beta1 = beta1
    self.beta2 = beta2
    self.epsilon = epsilon
    self.w_d = w_d

    self.W = [0 for i in range(0, self.L+1, 1)]
    self.b = [0 for i in range(0, self.L+1, 1)]

    self.d_a = [0 for i in range(0, self.L+1, 1)]
    self.d_b = [0 for i in range(0, self.L+1, 1)]
    self.d_W = [0 for i in range(0, self.L+1, 1)]

    self.a = [0 for i in range(0, self.L+1, 1)]
    self.h = [0 for i in range(0, self.L+1, 1)]

    self.u_W = [0 for i in range(0, self.L+1, 1)]
    self.u_b = [0 for i in range(0, self.L+1, 1)]

    self.W_look = [0 for i in range(0, self.L+1, 1)]
    self.b_look = [0 for i in range(0, self.L+1, 1)]

    self.v_W = [0 for i in range(0, self.L+1, 1)]
    self.v_b = [0 for i in range(0, self.L+1, 1)]

    self.m_W = [0 for i in range(0, self.L+1, 1)]
    self.m_b = [0 for i in range(0, self.L+1, 1)]

    self.initialization()

  ######################################################

  def initialization(self) :
    if self.act_fun == "ReLU" :
      self.W[1] = np.random.randn(self.sz, self.n_feature) * np.sqrt(2.0/self.n_feature)
      for i in range(2, self.L, 1) :
        self.W[i] = np.random.randn(self.sz, self.sz) * math.sqrt(2.0/self.sz)
      self.W[self.L] = np.random.randn(self.n_class, self.sz) * math.sqrt(2.0/self.sz)

    elif self.weight_init == "random" :
      self.W[1] = np.random.randn(self.sz, self.n_feature)
      for i in range(2, self.L, 1) :
        self.W[i] = np.random.randn(self.sz, self.sz)
      self.W[self.L] = np.random.randn(self.n_class, self.sz)

    elif self.weight_init == "Xavier" :
      self.W[1] = np.random.randn(self.sz, self.n_feature) * np.sqrt(2.0/self.n_feature)
      for i in range(2, self.L, 1) :
        self.W[i] = np.random.randn(self.sz, self.sz) * math.sqrt(2.0/self.sz)
      self.W[self.L] = np.random.randn(self.n_class, self.sz) * math.sqrt(2.0/self.sz)
    
    for i in range(1, self.L, 1) :
      self.b[i] = np.zeros((self.sz, 1))
    self.b[self.L] = np.zeros((self.n_class, 1))
  
  #########################################################

  def forward_propagation(self, x) :
    self.h[0] = x

    for i in range(1, self.L, 1) :
      self.a[i] = self.b[i] + np.dot(self.W[i], self.h[i-1])

      if self.act_fun == "sigmoid" :
        self.h[i] = sigmoid(self.a[i])
      elif self.act_fun == "tanh" :
        self.h[i] = tanh(self.a[i])
      elif self.act_fun == "ReLU" :
        self.h[i] = relu(self.a[i])
    
    self.a[self.L] = self.b[self.L] + np.dot(self.W[self.L], self.h[self.L-1])
    self.h[self.L] = softmax(self.a[self.L]) # h[L] = y_hat

  #########################################################

  def back_propagation(self, y) :
    if self.loss == "cross_entropy" :
      self.d_a[self.L] = self.h[self.L] - y
    elif self.loss == "mean_squared_error" :
      self.d_a[self.L] = (self.h[self.L] - y) * (self.h[self.L] * (1. - self.h[self.L]))
    
    self.d_b[self.L] = np.sum(self.d_a[self.L], axis=1, keepdims=True)
    self.d_W[self.L] = np.dot(self.d_a[self.L], self.h[self.L-1].T) + self.w_d * self.W[self.L]
    
    for i in range(self.L-1, 0, -1) :
      d_h_i = np.dot(self.W[i+1].T, self.d_a[i+1])
      
      if self.act_fun == "sigmoid" :
        g_dash_a_i = self.h[i] * (1. - self.h[i])
      elif self.act_fun == "tanh" :
        g_dash_a_i = 1. - self.h[i]**2
      elif self.act_fun == "ReLU" :
        g_dash_a_i = np.where(self.h[i] > 0., 1., 0.)
      
      self.d_a[i] = d_h_i * g_dash_a_i
      self.d_b[i] = np.sum(self.d_a[i], axis=1, keepdims=True)
      self.d_W[i] = np.dot(self.d_a[i], self.h[i-1].T) + self.w_d * self.W[i]

  ############################################################

  def nag_forward_propagation(self, x) :
    self.h[0] = x

    for i in range(1, self.L, 1) :
      self.a[i] = self.b_look[i] + np.dot(self.W_look[i], self.h[i-1])

      if self.act_fun == "sigmoid" :
        self.h[i] = sigmoid(self.a[i])
      elif self.act_fun == "tanh" :
        self.h[i] = tanh(self.a[i])
      elif self.act_fun == "ReLU" :
        self.h[i] = relu(self.a[i])
    
    self.a[self.L] = self.b_look[self.L] + np.dot(self.W_look[self.L], self.h[self.L-1])
    self.h[self.L] = softmax(self.a[self.L]) # h[L] = y_hat

  #########################################################

  def nag_back_propagation(self, y) :
    if self.loss == "cross_entropy" :
      self.d_a[self.L] = self.h[self.L] - y
    elif self.loss == "mean_squared_error" :
      self.d_a[self.L] = (self.h[self.L] - y) * (self.h[self.L] * (1. - self.h[self.L]))
    
    self.d_b[self.L] = np.sum(self.d_a[self.L], axis=1, keepdims=True)
    self.d_W[self.L] = np.dot(self.d_a[self.L], self.h[self.L-1].T) + self.w_d * self.W_look[self.L]
    
    for i in range(self.L-1, 0, -1) :
      d_h_i = np.dot(self.W_look[i+1].T, self.d_a[i+1])
      
      if self.act_fun == "sigmoid" :
        g_dash_a_i = self.h[i] * (1. - self.h[i])
      elif self.act_fun == "tanh" :
        g_dash_a_i = 1. - self.h[i]**2
      elif self.act_fun == "ReLU" :
        g_dash_a_i = np.where(self.h[i] > 0., 1., 0.)
      
      self.d_a[i] = d_h_i * g_dash_a_i
      self.d_b[i] = np.sum(self.d_a[i], axis=1, keepdims=True)
      self.d_W[i] = np.dot(self.d_a[i], self.h[i-1].T) + self.w_d * self.W_look[i]

  ############################################################

  def predict_prob(self, x) :
    a_temp = [0 for i in range(0, self.L+1, 1)]
    h_temp = [0 for i in range(0, self.L+1, 1)]
    h_temp[0] = x

    for i in range(1, self.L, 1) :
      a_temp[i] = self.b[i] + np.dot(self.W[i], h_temp[i-1])

      if self.act_fun == "sigmoid" :
        h_temp[i] = sigmoid(a_temp[i])
      elif self.act_fun == "tanh" :
        h_temp[i] = tanh(a_temp[i])
      elif self.act_fun == "ReLU" :
        h_temp[i] = relu(a_temp[i])
    
    a_temp[self.L] = self.b[self.L] + np.dot(self.W[self.L], h_temp[self.L-1])
    h_temp[self.L] = softmax(a_temp[self.L]) # h[L] = y_hat

    return h_temp[self.L].T
  
  #############################################################

  def loss_val(self, y_hat, y) :
    loss_val = 0.0
    N = y.shape[0]

    if self.loss == "cross_entropy" :
      for i in range(0, N, 1) :
        temp_loss = math.log(y_hat[i][y[i].argmax()])
        loss_val += temp_loss
      
      loss_val *= (-1.0/N)
    
    elif self.loss == "mean_squared_error" :
      loss_val = np.sum((y - y_hat)**2) / N

    return loss_val

  ##############################################################

  def accuracy(self, y_hat, y) :
    N = y.shape[0]
    n_correct = 0

    for i in range(0, N, 1) :
      if y[i].argmax() == y_hat[i].argmax() :
        n_correct += 1
    
    return 100 * n_correct / N

  ###############################################################

  def sgd(self, X, y, X_valid, y_valid) :
    t = 0
    N = X.shape[0]

    while t < self.epochs :
      for j in range(0, N, self.b_sz) :
        r_idx = j + self.b_sz
        if (j + self.b_sz) > N :
          r_idx = N
        self.forward_propagation(X[j:r_idx].T)
        self.back_propagation(y[j:r_idx].T)
        
        for idx in range(1, self.L+1, 1) :
          self.W[idx] = self.W[idx] - (self.lr * self.d_W[idx])
          self.b[idx] = self.b[idx] - (self.lr * self.d_b[idx])
      
      y_hat = self.predict_prob(X.T)
      tr_loss = self.loss_val(y_hat, y)
      tr_acc = self.accuracy(y_hat, y)

      y_val_hat = self.predict_prob(X_valid.T)
      val_loss = self.loss_val(y_val_hat, y_valid)
      val_acc = self.accuracy(y_val_hat, y_valid)

      print(f"epoch {t + 1} : train_loss = {tr_loss:.2f} valid_loss = {val_loss:.2f}, train accuracy = {tr_acc:.2f} valid_accuracy = {val_acc:.2f}")
      wandb.log({'tr_loss' : tr_loss, 'tr_accuracy' : tr_acc, 'val_loss' : val_loss, 'val_accuracy' : val_acc})

      t += 1

  #################################################################

  def mgd(self, X, y, X_valid, y_valid) :
    t = 0
    N = X.shape[0]
    n_step = 0

    while t < self.epochs :
      for j in range(0, N, self.b_sz) :
        n_step += 1
        r_idx = j + self.b_sz
        if (j + self.b_sz) > N :
          r_idx = N
        self.forward_propagation(X[j:r_idx].T)
        self.back_propagation(y[j:r_idx].T)

        for idx in range(1, self.L+1, 1) :
          if n_step == 1 :
            self.u_W[idx] = (self.lr * self.d_W[idx])
            self.u_b[idx] = (self.lr * self.d_b[idx])
          else :
            self.u_W[idx] = (self.mom * self.u_W[idx]) + (self.lr * self.d_W[idx])
            self.u_b[idx] = (self.mom * self.u_b[idx]) + (self.lr * self.d_b[idx])
          
          self.W[idx] = self.W[idx] - self.u_W[idx]
          self.b[idx] = self.b[idx] - self.u_b[idx]

      y_hat = self.predict_prob(X.T)
      tr_loss = self.loss_val(y_hat, y)
      tr_acc = self.accuracy(y_hat, y)

      y_val_hat = self.predict_prob(X_valid.T)
      val_loss = self.loss_val(y_val_hat, y_valid)
      val_acc = self.accuracy(y_val_hat, y_valid)

      print(f"epoch {t + 1} : train_loss = {tr_loss:.2f} valid_loss = {val_loss:.2f}, train accuracy = {tr_acc:.2f} valid_accuracy = {val_acc:.2f}")
      wandb.log({'tr_loss' : tr_loss, 'tr_accuracy' : tr_acc, 'val_loss' : val_loss, 'val_accuracy' : val_acc})
      
      t += 1

  ##################################################################

  def nagd(self, X, y, X_valid, y_valid) :
    t = 0
    N = X.shape[0]
    n_step = 0

    while t < self.epochs :
      for j in range(0, N, self.b_sz) :
        n_step += 1
        r_idx = j + self.b_sz
        if (j + self.b_sz) > N :
          r_idx = N
        if n_step == 1 :
          self.forward_propagation(X[j:r_idx].T)
          self.back_propagation(y[j:r_idx].T)
        else :
          for idx in range(1, self.L+1, 1) :
            self.W_look[idx] = self.W[idx] - (self.mom * self.u_W[idx])
            self.b_look[idx] = self.b[idx] - (self.mom * self.u_b[idx])
          self.nag_forward_propagation(X[j:r_idx].T)
          self.nag_back_propagation(y[j:r_idx].T)

        for idx in range(1, self.L+1, 1) :
          if n_step == 1 :
            self.u_W[idx] = (self.lr * self.d_W[idx])
            self.u_b[idx] = (self.lr * self.d_b[idx])
          else :
            self.u_W[idx] = (self.mom * self.u_W[idx]) + (self.lr * self.d_W[idx])
            self.u_b[idx] = (self.mom * self.u_b[idx]) + (self.lr * self.d_b[idx])
          
          self.W[idx] = self.W[idx] - self.u_W[idx]
          self.b[idx] = self.b[idx] - self.u_b[idx]
        
      y_hat = self.predict_prob(X.T)
      tr_loss = self.loss_val(y_hat, y)
      tr_acc = self.accuracy(y_hat, y)

      y_val_hat = self.predict_prob(X_valid.T)
      val_loss = self.loss_val(y_val_hat, y_valid)
      val_acc = self.accuracy(y_val_hat, y_valid)

      print(f"epoch {t + 1} : train_loss = {tr_loss:.2f} valid_loss = {val_loss:.2f}, train accuracy = {tr_acc:.2f} valid_accuracy = {val_acc:.2f}")
      wandb.log({'tr_loss' : tr_loss, 'tr_accuracy' : tr_acc, 'val_loss' : val_loss, 'val_accuracy' : val_acc})
      t += 1

  ##############################################################

  def rmsprop(self, X, y, X_valid, y_valid) :
    t = 0
    N = X.shape[0]
    n_step = 0

    while t < self.epochs :
      for j in range(0, N, self.b_sz) :
        n_step += 1
        r_idx = j + self.b_sz
        if (j + self.b_sz) > N :
          r_idx = N
        self.forward_propagation(X[j:r_idx].T)
        self.back_propagation(y[j:r_idx].T)

        for idx in range(1, self.L+1, 1) :
          if n_step == 1 :
            self.v_W[idx] = ((1. - self.beta) * (self.d_W[idx]**2))
            self.v_b[idx] = ((1. - self.beta) * (self.d_b[idx]**2))
          else :
            self.v_W[idx] = (self.beta * self.v_W[idx]) + ((1. - self.beta) * (self.d_W[idx]**2))
            self.v_b[idx] = (self.beta * self.v_b[idx]) + ((1. - self.beta) * (self.d_b[idx]**2))
          
          self.W[idx] = self.W[idx] - (self.lr / (np.sqrt(self.v_W[idx] + self.epsilon))) * self.d_W[idx]
          self.b[idx] = self.b[idx] - (self.lr / (np.sqrt(self.v_b[idx] + self.epsilon))) * self.d_b[idx]
        
      y_hat = self.predict_prob(X.T)
      tr_loss = self.loss_val(y_hat, y)
      tr_acc = self.accuracy(y_hat, y)

      y_val_hat = self.predict_prob(X_valid.T)
      val_loss = self.loss_val(y_val_hat, y_valid)
      val_acc = self.accuracy(y_val_hat, y_valid)

      print(f"epoch {t + 1} : train_loss = {tr_loss:.2f} valid_loss = {val_loss:.2f}, train accuracy = {tr_acc:.2f} valid_accuracy = {val_acc:.2f}")
      wandb.log({'tr_loss' : tr_loss, 'tr_accuracy' : tr_acc, 'val_loss' : val_loss, 'val_accuracy' : val_acc})
      t += 1
  
  ##############################################################

  def adam(self, X, y, X_valid, y_valid) :
    t = 0
    N = X.shape[0]
    n_step = 0

    while t < self.epochs :
      for j in range(0, N, self.b_sz) :
        n_step += 1
        r_idx = j + self.b_sz
        if (j + self.b_sz) > N :
          r_idx = N
        self.forward_propagation(X[j:r_idx].T)
        self.back_propagation(y[j:r_idx].T)

        for idx in range(1, self.L+1, 1) :
          if n_step == 1 :
            self.m_W[idx] = ((1. - self.beta1) * self.d_W[idx])
            self.m_b[idx] = ((1. - self.beta1) * self.d_b[idx])

            self.v_W[idx] = ((1. - self.beta2) * (self.d_W[idx]**2))
            self.v_b[idx] = ((1. - self.beta2) * (self.d_b[idx]**2))
          else :
            self.m_W[idx] = (self.beta1 * self.m_W[idx]) + ((1. - self.beta1) * self.d_W[idx])
            self.m_b[idx] = (self.beta1 * self.m_b[idx]) + ((1. - self.beta1) * self.d_b[idx])

            self.v_W[idx] = (self.beta2 * self.v_W[idx]) + ((1. - self.beta2) * (self.d_W[idx]**2))
            self.v_b[idx] = (self.beta2 * self.v_b[idx]) + ((1. - self.beta2) * (self.d_b[idx]**2))
          
          self.W[idx] = self.W[idx] - (self.lr / (np.sqrt(self.v_W[idx] / (1. - self.beta2**n_step) + self.epsilon))) * (self.m_W[idx] / (1. - self.beta1**n_step))
          self.b[idx] = self.b[idx] - (self.lr / (np.sqrt(self.v_b[idx] / (1. - self.beta2**n_step) + self.epsilon))) * (self.m_b[idx] / (1. - self.beta1**n_step))
        
      y_hat = self.predict_prob(X.T)
      tr_loss = self.loss_val(y_hat, y)
      tr_acc = self.accuracy(y_hat, y)

      y_val_hat = self.predict_prob(X_valid.T)
      val_loss = self.loss_val(y_val_hat, y_valid)
      val_acc = self.accuracy(y_val_hat, y_valid)

      print(f"epoch {t + 1} : train_loss = {tr_loss:.2f} valid_loss = {val_loss:.2f}, train accuracy = {tr_acc:.2f} valid_accuracy = {val_acc:.2f}")
      wandb.log({'tr_loss' : tr_loss, 'tr_accuracy' : tr_acc, 'val_loss' : val_loss, 'val_accuracy' : val_acc})
      t += 1

  ##############################################################

  def nadam(self, X, y, X_valid, y_valid) :
    t = 0
    N = X.shape[0]
    n_step = 0

    while t < self.epochs :
      for j in range(0, N, self.b_sz) :
        n_step += 1
        r_idx = j + self.b_sz
        if (j + self.b_sz) > N :
          r_idx = N
        self.forward_propagation(X[j:r_idx].T)
        self.back_propagation(y[j:r_idx].T)

        for idx in range(1, self.L+1, 1) :
          if n_step == 1 :
            self.m_W[idx] = ((1. - self.beta1) * self.d_W[idx])
            self.m_b[idx] = ((1. - self.beta1) * self.d_b[idx])

            self.v_W[idx] = ((1. - self.beta2) * (self.d_W[idx]**2))
            self.v_b[idx] = ((1. - self.beta2) * (self.d_b[idx]**2))
          else :
            self.m_W[idx] = (self.beta1 * self.m_W[idx]) + ((1. - self.beta1) * self.d_W[idx])
            self.m_b[idx] = (self.beta1 * self.m_b[idx]) + ((1. - self.beta1) * self.d_b[idx])

            self.v_W[idx] = (self.beta2 * self.v_W[idx]) + ((1. - self.beta2) * (self.d_W[idx]**2))
            self.v_b[idx] = (self.beta2 * self.v_b[idx]) + ((1. - self.beta2) * (self.d_b[idx]**2))
          
          W_term = (self.beta1 / (1. - self.beta1**n_step)) * self.m_W[idx]  + ((1. - self.beta1) / (1. - self.beta1**n_step)) * self.d_W[idx]
          b_term = (self.beta1 / (1. - self.beta1**n_step)) * self.m_b[idx]  + ((1. - self.beta1) / (1. - self.beta1**n_step)) * self.d_b[idx]

          self.W[idx] = self.W[idx] - (self.lr / (np.sqrt(self.v_W[idx] / (1. - self.beta2**n_step) + self.epsilon))) * W_term
          self.b[idx] = self.b[idx] - (self.lr / (np.sqrt(self.v_b[idx] / (1. - self.beta2**n_step) + self.epsilon))) * b_term
        
      y_hat = self.predict_prob(X.T)
      tr_loss = self.loss_val(y_hat, y)
      tr_acc = self.accuracy(y_hat, y)

      y_val_hat = self.predict_prob(X_valid.T)
      val_loss = self.loss_val(y_val_hat, y_valid)
      val_acc = self.accuracy(y_val_hat, y_valid)

      print(f"epoch {t + 1} : train_loss = {tr_loss:.2f} valid_loss = {val_loss:.2f}, train accuracy = {tr_acc:.2f} valid_accuracy = {val_acc:.2f}")
      wandb.log({'tr_loss' : tr_loss, 'tr_accuracy' : tr_acc, 'val_loss' : val_loss, 'val_accuracy' : val_acc})
      t += 1

  ##############################################################

  def train(self, X_train, y_train, X_valid, y_valid) :
    if self.optimizer == "sgd" :
      self.sgd(X_train, y_train, X_valid, y_valid)
    elif self.optimizer == "momentum" :
      self.mgd(X_train, y_train, X_valid, y_valid)
    elif self.optimizer == "nag" :
      self.nagd(X_train, y_train, X_valid, y_valid)
    elif self.optimizer == "rmsprop" :
      self.rmsprop(X_train, y_train, X_valid, y_valid)
    elif self.optimizer == "adam" :
      self.adam(X_train, y_train, X_valid, y_valid)
    elif self.optimizer == "nadam" :
      self.nadam(X_train, y_train, X_valid, y_valid)
  

In [None]:
x_train, y_train, x_valid, y_valid, x_test, y_test = load_data()

In [None]:
sweep_config = {
    'method': 'bayes',
    'name' : 'Bayesian_sweep_cross_entropy',
    'metric': {
      'name': 'valid accuracy',
      'goal': 'maximize'  
    },
    'parameters': {
        'epochs': {
            'values': [5, 10]
        },
        'num_layers': {
            'values': [3, 4, 5]
        },
         'hidden_size': {
            'values': [32, 64, 128]
        },
        'weight_decay': {
            'values': [0, 0.0005, 0.5]
        },
         'learning_rate': {
            'values': [0.001, 0.0001]
        },
         'optimizer': {
            'values': ['sgd', 'momentum', 'nag', 'rmsprop', 'adam', 'nadam']           
        },
        'batch_size': {
            'values': [16, 32, 64]
        },
         'weight_init': {
            'values': ['random', 'Xavier']            
        },
        'activation': {
            'values': ['sigmoid', 'tanh', 'ReLU']
        },
    }
}

sweep_id = wandb.sweep(sweep = sweep_config, project = 'cs6910_dl_assgn_1_q_4')

Create sweep with ID: nxcy3r4z
Sweep URL: https://wandb.ai/cs22m059/cs6910_dl_assgn_1_q_4/sweeps/nxcy3r4z


In [None]:
def main():
  with wandb.init() as run:
    run_name = "opt_" + wandb.config.optimizer + "_ac_" + wandb.config.activation + "_bs_" + str(wandb.config.batch_size)\
            + "_hl_" + str(wandb.config.num_layers) + "_lr_" + str(wandb.config.learning_rate)
    wandb.run.name = run_name

    epochs = wandb.config.epochs
    nhl = wandb.config.num_layers
    sz = wandb.config.hidden_size
    w_d = wandb.config.weight_decay
    lr = wandb.config.learning_rate
    optimizer = wandb.config.optimizer
    b_sz = wandb.config.batch_size
    weight_init = wandb.config.weight_init
    act_fun = wandb.config.activation

    nn_model = my_nn(epochs = epochs, nhl = nhl, sz = sz, w_d = w_d, lr = lr, optimizer = optimizer, b_sz = b_sz, weight_init = weight_init, act_fun = act_fun)
    nn_model.train(x_train, y_train, x_valid, y_valid)
    
wandb.agent(sweep_id, function = main, count = 100)
wandb.finish()

[34m[1mwandb[0m: Agent Starting Run: nom4wg5m with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 1.92 valid_loss = 1.92, train accuracy = 23.69 valid_accuracy = 23.47
epoch 2 : train_loss = 1.92 valid_loss = 1.93, train accuracy = 22.78 valid_accuracy = 22.67
epoch 3 : train_loss = 1.92 valid_loss = 1.92, train accuracy = 22.84 valid_accuracy = 22.72
epoch 4 : train_loss = 1.92 valid_loss = 1.92, train accuracy = 22.86 valid_accuracy = 22.72
epoch 5 : train_loss = 1.92 valid_loss = 1.92, train accuracy = 22.87 valid_accuracy = 22.73


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,█▁▁▂▂
tr_loss,▃█▅▂▁
val_accuracy,█▁▁▁▂
val_loss,▃█▅▂▁

0,1
tr_accuracy,22.87407
tr_loss,1.91698
val_accuracy,22.73333
val_loss,1.92077


[34m[1mwandb[0m: Agent Starting Run: iw64jrc0 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.56 valid_loss = 0.56, train accuracy = 80.62 valid_accuracy = 79.98
epoch 2 : train_loss = 0.54 valid_loss = 0.55, train accuracy = 81.03 valid_accuracy = 80.45
epoch 3 : train_loss = 0.53 valid_loss = 0.54, train accuracy = 81.40 valid_accuracy = 80.85
epoch 4 : train_loss = 0.53 valid_loss = 0.53, train accuracy = 81.46 valid_accuracy = 81.28
epoch 5 : train_loss = 0.52 valid_loss = 0.53, train accuracy = 81.55 valid_accuracy = 81.40


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▄▇▇█
tr_loss,█▅▃▂▁
val_accuracy,▁▃▅▇█
val_loss,█▅▃▂▁

0,1
tr_accuracy,81.55185
tr_loss,0.52443
val_accuracy,81.4
val_loss,0.52814


[34m[1mwandb[0m: Agent Starting Run: gj6ii35v with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.81 valid_loss = 0.83, train accuracy = 70.41 valid_accuracy = 70.17
epoch 2 : train_loss = 0.70 valid_loss = 0.73, train accuracy = 74.26 valid_accuracy = 74.13
epoch 3 : train_loss = 0.65 valid_loss = 0.68, train accuracy = 76.41 valid_accuracy = 76.08
epoch 4 : train_loss = 0.62 valid_loss = 0.65, train accuracy = 77.61 valid_accuracy = 77.08
epoch 5 : train_loss = 0.59 valid_loss = 0.63, train accuracy = 78.36 valid_accuracy = 77.40


0,1
tr_accuracy,▁▄▆▇█
tr_loss,█▅▃▂▁
val_accuracy,▁▅▇██
val_loss,█▅▃▂▁

0,1
tr_accuracy,78.36296
tr_loss,0.59086
val_accuracy,77.4
val_loss,0.63362


[34m[1mwandb[0m: Agent Starting Run: mytm4v5k with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.42 valid_loss = 0.44, train accuracy = 84.94 valid_accuracy = 84.33
epoch 2 : train_loss = 0.38 valid_loss = 0.42, train accuracy = 85.67 valid_accuracy = 84.47
epoch 3 : train_loss = 0.34 valid_loss = 0.39, train accuracy = 87.40 valid_accuracy = 85.95
epoch 4 : train_loss = 0.32 valid_loss = 0.38, train accuracy = 88.09 valid_accuracy = 86.57
epoch 5 : train_loss = 0.32 valid_loss = 0.38, train accuracy = 88.38 valid_accuracy = 86.52


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▂▆▇█
tr_loss,█▆▃▁▁
val_accuracy,▁▁▆██
val_loss,█▆▃▁▁

0,1
tr_accuracy,88.37593
tr_loss,0.31689
val_accuracy,86.51667
val_loss,0.37784


[34m[1mwandb[0m: Agent Starting Run: zlr4fdqn with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.60 valid_loss = 0.60, train accuracy = 79.95 valid_accuracy = 79.83
epoch 2 : train_loss = 0.50 valid_loss = 0.50, train accuracy = 83.16 valid_accuracy = 82.60
epoch 3 : train_loss = 0.46 valid_loss = 0.46, train accuracy = 84.33 valid_accuracy = 83.87
epoch 4 : train_loss = 0.43 valid_loss = 0.44, train accuracy = 85.07 valid_accuracy = 84.33
epoch 5 : train_loss = 0.42 valid_loss = 0.42, train accuracy = 85.57 valid_accuracy = 84.93


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.102211…

0,1
tr_accuracy,▁▅▆▇█
tr_loss,█▄▃▂▁
val_accuracy,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
tr_accuracy,85.57037
tr_loss,0.41588
val_accuracy,84.93333
val_loss,0.42386


[34m[1mwandb[0m: Agent Starting Run: iwxcmgas with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 1.00 valid_loss = 1.00, train accuracy = 61.32 valid_accuracy = 61.30
epoch 2 : train_loss = 0.63 valid_loss = 0.64, train accuracy = 76.86 valid_accuracy = 77.37
epoch 3 : train_loss = 0.56 valid_loss = 0.58, train accuracy = 77.96 valid_accuracy = 77.60
epoch 4 : train_loss = 0.48 valid_loss = 0.51, train accuracy = 83.28 valid_accuracy = 82.17
epoch 5 : train_loss = 0.44 valid_loss = 0.47, train accuracy = 85.14 valid_accuracy = 84.22
epoch 6 : train_loss = 0.42 valid_loss = 0.46, train accuracy = 85.68 valid_accuracy = 84.90
epoch 7 : train_loss = 0.41 valid_loss = 0.45, train accuracy = 86.11 valid_accuracy = 85.08
epoch 8 : train_loss = 0.38 valid_loss = 0.43, train accuracy = 87.09 valid_accuracy = 85.50
epoch 9 : train_loss = 0.38 valid_loss = 0.43, train accuracy = 87.30 valid_accuracy = 85.83
epoch 10 : train_loss = 0.36 valid_loss = 0.42, train accuracy = 87.78 valid_accuracy = 85.82


0,1
tr_accuracy,▁▅▅▇▇▇████
tr_loss,█▄▃▂▂▂▂▁▁▁
val_accuracy,▁▆▆▇██████
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
tr_accuracy,87.77778
tr_loss,0.36233
val_accuracy,85.81667
val_loss,0.42317


[34m[1mwandb[0m: Agent Starting Run: vnqwbf9q with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.70 valid_loss = 0.69, train accuracy = 74.82 valid_accuracy = 74.67
epoch 2 : train_loss = 0.70 valid_loss = 0.70, train accuracy = 73.79 valid_accuracy = 73.72
epoch 3 : train_loss = 0.72 valid_loss = 0.72, train accuracy = 73.12 valid_accuracy = 73.08
epoch 4 : train_loss = 0.72 valid_loss = 0.72, train accuracy = 73.31 valid_accuracy = 73.43
epoch 5 : train_loss = 0.72 valid_loss = 0.72, train accuracy = 73.32 valid_accuracy = 73.38


0,1
tr_accuracy,█▄▁▂▂
tr_loss,▁▄█▇█
val_accuracy,█▄▁▃▂
val_loss,▁▄███

0,1
tr_accuracy,73.31852
tr_loss,0.71789
val_accuracy,73.38333
val_loss,0.7167


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0axg98b2 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 4.23 valid_loss = 4.24, train accuracy = 23.78 valid_accuracy = 23.83
epoch 2 : train_loss = 2.54 valid_loss = 2.50, train accuracy = 39.73 valid_accuracy = 40.47
epoch 3 : train_loss = 1.86 valid_loss = 1.84, train accuracy = 46.54 valid_accuracy = 47.12
epoch 4 : train_loss = 1.55 valid_loss = 1.55, train accuracy = 51.59 valid_accuracy = 51.85
epoch 5 : train_loss = 1.37 valid_loss = 1.38, train accuracy = 55.77 valid_accuracy = 55.70
epoch 6 : train_loss = 1.26 valid_loss = 1.27, train accuracy = 58.50 valid_accuracy = 58.88
epoch 7 : train_loss = 1.19 valid_loss = 1.19, train accuracy = 60.16 valid_accuracy = 60.52
epoch 8 : train_loss = 1.12 valid_loss = 1.13, train accuracy = 61.59 valid_accuracy = 62.15
epoch 9 : train_loss = 1.07 valid_loss = 1.07, train accuracy = 63.10 valid_accuracy = 63.95
epoch 10 : train_loss = 1.03 valid_loss = 1.04, train accuracy = 64.34 valid_accuracy = 65.13


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▄▅▆▇▇▇███
tr_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁▄▅▆▆▇▇▇██
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
tr_accuracy,64.33704
tr_loss,1.02963
val_accuracy,65.13333
val_loss,1.0383


[34m[1mwandb[0m: Agent Starting Run: yqo8x7yz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.78 valid_loss = 0.78, train accuracy = 74.35 valid_accuracy = 74.00
epoch 2 : train_loss = 0.74 valid_loss = 0.74, train accuracy = 77.84 valid_accuracy = 77.50
epoch 3 : train_loss = 0.81 valid_loss = 0.81, train accuracy = 77.09 valid_accuracy = 76.90
epoch 4 : train_loss = 0.85 valid_loss = 0.85, train accuracy = 76.44 valid_accuracy = 76.45
epoch 5 : train_loss = 0.87 valid_loss = 0.87, train accuracy = 75.56 valid_accuracy = 75.72


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁█▇▅▃
tr_loss,▃▁▅▇█
val_accuracy,▁█▇▆▄
val_loss,▃▁▅▇█

0,1
tr_accuracy,75.55556
tr_loss,0.87022
val_accuracy,75.71667
val_loss,0.86765


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xsjsqr3q with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 1.12 valid_loss = 1.15, train accuracy = 64.16 valid_accuracy = 64.23
epoch 2 : train_loss = 0.89 valid_loss = 0.91, train accuracy = 68.81 valid_accuracy = 68.23
epoch 3 : train_loss = 0.81 valid_loss = 0.84, train accuracy = 70.68 valid_accuracy = 70.00
epoch 4 : train_loss = 0.75 valid_loss = 0.79, train accuracy = 72.37 valid_accuracy = 71.25
epoch 5 : train_loss = 0.72 valid_loss = 0.75, train accuracy = 73.42 valid_accuracy = 72.53
epoch 6 : train_loss = 0.68 valid_loss = 0.72, train accuracy = 74.71 valid_accuracy = 73.73
epoch 7 : train_loss = 0.67 valid_loss = 0.71, train accuracy = 75.22 valid_accuracy = 73.98
epoch 8 : train_loss = 0.65 valid_loss = 0.70, train accuracy = 76.12 valid_accuracy = 74.48
epoch 9 : train_loss = 0.62 valid_loss = 0.69, train accuracy = 76.84 valid_accuracy = 74.87
epoch 10 : train_loss = 0.61 valid_loss = 0.67, train accuracy = 77.80 valid_accuracy = 76.37


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▃▄▅▆▆▇▇██
tr_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▃▄▅▆▆▇▇▇█
val_loss,█▅▃▃▂▂▁▁▁▁

0,1
tr_accuracy,77.80185
tr_loss,0.60532
val_accuracy,76.36667
val_loss,0.67455


[34m[1mwandb[0m: Agent Starting Run: luqvcro5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.45 valid_loss = 0.46, train accuracy = 83.52 valid_accuracy = 83.10
epoch 2 : train_loss = 0.42 valid_loss = 0.45, train accuracy = 84.10 valid_accuracy = 83.10
epoch 3 : train_loss = 0.40 valid_loss = 0.44, train accuracy = 84.95 valid_accuracy = 84.08
epoch 4 : train_loss = 0.40 valid_loss = 0.45, train accuracy = 85.03 valid_accuracy = 83.68
epoch 5 : train_loss = 0.37 valid_loss = 0.43, train accuracy = 86.02 valid_accuracy = 84.63


0,1
tr_accuracy,▁▃▅▅█
tr_loss,█▆▄▃▁
val_accuracy,▁▁▅▄█
val_loss,█▆▃▄▁

0,1
tr_accuracy,86.02222
tr_loss,0.36924
val_accuracy,84.63333
val_loss,0.42974


[34m[1mwandb[0m: Agent Starting Run: 6acyvutj with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 1.40 valid_loss = 1.40, train accuracy = 48.99 valid_accuracy = 48.62
epoch 2 : train_loss = 1.16 valid_loss = 1.17, train accuracy = 57.31 valid_accuracy = 56.77
epoch 3 : train_loss = 1.04 valid_loss = 1.05, train accuracy = 62.29 valid_accuracy = 61.63
epoch 4 : train_loss = 0.96 valid_loss = 0.97, train accuracy = 65.49 valid_accuracy = 65.02
epoch 5 : train_loss = 0.91 valid_loss = 0.92, train accuracy = 67.59 valid_accuracy = 67.50


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▄▆▇█
tr_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
tr_accuracy,67.58519
tr_loss,0.91133
val_accuracy,67.5
val_loss,0.91884


[34m[1mwandb[0m: Agent Starting Run: 5ebi4ceb with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 9.96 valid_accuracy = 10.32
epoch 2 : train_loss = 2.30 valid_loss = 2.31, train accuracy = 9.96 valid_accuracy = 10.32
epoch 3 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 9.96 valid_accuracy = 10.32
epoch 4 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 9.96 valid_accuracy = 10.32
epoch 5 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 9.96 valid_accuracy = 10.32
epoch 6 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 9.96 valid_accuracy = 10.32
epoch 7 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 9.96 valid_accuracy = 10.32
epoch 8 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 9.96 valid_accuracy = 10.32
epoch 9 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 9.96 valid_accuracy = 10.32
epoch 10 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 9.96 valid_accuracy = 10.32


VBox(children=(Label(value='0.001 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.521309…

0,1
tr_accuracy,▁▁▁▁▁▁▁▁▁▁
tr_loss,▇█▇▆▅▄▃▂▂▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,██▇▆▅▄▃▂▂▁

0,1
tr_accuracy,9.96481
tr_loss,2.30355
val_accuracy,10.31667
val_loss,2.30419


[34m[1mwandb[0m: Agent Starting Run: 1zpcxl08 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.46 valid_loss = 0.47, train accuracy = 83.73 valid_accuracy = 82.85
epoch 2 : train_loss = 0.39 valid_loss = 0.41, train accuracy = 86.31 valid_accuracy = 85.35
epoch 3 : train_loss = 0.36 valid_loss = 0.38, train accuracy = 87.11 valid_accuracy = 86.03
epoch 4 : train_loss = 0.35 valid_loss = 0.37, train accuracy = 87.72 valid_accuracy = 86.72
epoch 5 : train_loss = 0.33 valid_loss = 0.37, train accuracy = 88.14 valid_accuracy = 86.92


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▅▆▇█
tr_loss,█▄▃▂▁
val_accuracy,▁▅▆██
val_loss,█▄▂▁▁

0,1
tr_accuracy,88.14444
tr_loss,0.33129
val_accuracy,86.91667
val_loss,0.36657


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7il2lhzv with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 1.17 valid_loss = 1.17, train accuracy = 59.64 valid_accuracy = 60.12
epoch 2 : train_loss = 0.90 valid_loss = 0.90, train accuracy = 68.04 valid_accuracy = 67.97
epoch 3 : train_loss = 0.80 valid_loss = 0.80, train accuracy = 71.07 valid_accuracy = 71.47
epoch 4 : train_loss = 0.74 valid_loss = 0.74, train accuracy = 73.13 valid_accuracy = 73.40
epoch 5 : train_loss = 0.70 valid_loss = 0.70, train accuracy = 74.62 valid_accuracy = 74.58
epoch 6 : train_loss = 0.67 valid_loss = 0.67, train accuracy = 75.80 valid_accuracy = 75.72
epoch 7 : train_loss = 0.65 valid_loss = 0.65, train accuracy = 76.77 valid_accuracy = 76.50
epoch 8 : train_loss = 0.63 valid_loss = 0.63, train accuracy = 77.50 valid_accuracy = 77.22
epoch 9 : train_loss = 0.61 valid_loss = 0.61, train accuracy = 78.09 valid_accuracy = 77.93
epoch 10 : train_loss = 0.60 valid_loss = 0.60, train accuracy = 78.60 valid_accuracy = 78.43


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▄▅▆▇▇▇███
tr_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▇▇▇███
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
tr_accuracy,78.5963
tr_loss,0.59609
val_accuracy,78.43333
val_loss,0.59953


[34m[1mwandb[0m: Agent Starting Run: 0qyf50w2 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.55 valid_loss = 0.55, train accuracy = 82.67 valid_accuracy = 82.23
epoch 2 : train_loss = 0.54 valid_loss = 0.54, train accuracy = 83.29 valid_accuracy = 82.40
epoch 3 : train_loss = 0.53 valid_loss = 0.53, train accuracy = 83.34 valid_accuracy = 82.53
epoch 4 : train_loss = 0.53 valid_loss = 0.53, train accuracy = 83.36 valid_accuracy = 82.57
epoch 5 : train_loss = 0.52 valid_loss = 0.53, train accuracy = 83.30 valid_accuracy = 82.57
epoch 6 : train_loss = 0.52 valid_loss = 0.53, train accuracy = 83.31 valid_accuracy = 82.47
epoch 7 : train_loss = 0.52 valid_loss = 0.53, train accuracy = 83.29 valid_accuracy = 82.48
epoch 8 : train_loss = 0.52 valid_loss = 0.52, train accuracy = 83.32 valid_accuracy = 82.47
epoch 9 : train_loss = 0.52 valid_loss = 0.52, train accuracy = 83.31 valid_accuracy = 82.47
epoch 10 : train_loss = 0.52 valid_loss = 0.52, train accuracy = 83.32 valid_accuracy = 82.53


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.105634…

0,1
tr_accuracy,▁▇██▇▇▇█▇█
tr_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▅▇██▆▆▆▆▇
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
tr_accuracy,83.32037
tr_loss,0.51677
val_accuracy,82.53333
val_loss,0.52391


[34m[1mwandb[0m: Agent Starting Run: jntl8537 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.90 valid_loss = 0.90, train accuracy = 68.61 valid_accuracy = 68.73
epoch 2 : train_loss = 0.76 valid_loss = 0.77, train accuracy = 72.86 valid_accuracy = 72.87
epoch 3 : train_loss = 0.70 valid_loss = 0.71, train accuracy = 74.98 valid_accuracy = 74.78
epoch 4 : train_loss = 0.66 valid_loss = 0.67, train accuracy = 76.43 valid_accuracy = 76.02
epoch 5 : train_loss = 0.63 valid_loss = 0.65, train accuracy = 77.43 valid_accuracy = 76.78


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▄▆▇█
tr_loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
tr_accuracy,77.42963
tr_loss,0.62923
val_accuracy,76.78333
val_loss,0.64724


[34m[1mwandb[0m: Agent Starting Run: ef20wrsf with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 2.13 valid_loss = 2.13, train accuracy = 23.83 valid_accuracy = 24.43
epoch 2 : train_loss = 1.32 valid_loss = 1.31, train accuracy = 47.93 valid_accuracy = 48.63
epoch 3 : train_loss = 0.97 valid_loss = 0.97, train accuracy = 62.72 valid_accuracy = 63.20
epoch 4 : train_loss = 0.81 valid_loss = 0.81, train accuracy = 68.28 valid_accuracy = 68.33
epoch 5 : train_loss = 0.73 valid_loss = 0.72, train accuracy = 72.63 valid_accuracy = 72.40
epoch 6 : train_loss = 0.67 valid_loss = 0.67, train accuracy = 75.26 valid_accuracy = 75.23
epoch 7 : train_loss = 0.62 valid_loss = 0.63, train accuracy = 77.29 valid_accuracy = 77.20
epoch 8 : train_loss = 0.58 valid_loss = 0.59, train accuracy = 79.01 valid_accuracy = 78.98
epoch 9 : train_loss = 0.55 valid_loss = 0.56, train accuracy = 80.73 valid_accuracy = 80.08
epoch 10 : train_loss = 0.52 valid_loss = 0.53, train accuracy = 82.07 valid_accuracy = 81.13


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▄▆▆▇▇▇███
tr_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁▄▆▆▇▇████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
tr_accuracy,82.06667
tr_loss,0.51788
val_accuracy,81.13333
val_loss,0.5272


[34m[1mwandb[0m: Agent Starting Run: v6p63ec9 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.44 valid_loss = 0.46, train accuracy = 84.15 valid_accuracy = 83.62
epoch 2 : train_loss = 0.39 valid_loss = 0.41, train accuracy = 85.85 valid_accuracy = 85.02
epoch 3 : train_loss = 0.34 valid_loss = 0.38, train accuracy = 87.44 valid_accuracy = 86.50
epoch 4 : train_loss = 0.33 valid_loss = 0.38, train accuracy = 87.96 valid_accuracy = 86.93
epoch 5 : train_loss = 0.34 valid_loss = 0.39, train accuracy = 87.98 valid_accuracy = 86.87
epoch 6 : train_loss = 0.31 valid_loss = 0.38, train accuracy = 88.65 valid_accuracy = 87.17
epoch 7 : train_loss = 0.30 valid_loss = 0.37, train accuracy = 89.26 valid_accuracy = 87.58
epoch 8 : train_loss = 0.30 valid_loss = 0.38, train accuracy = 89.32 valid_accuracy = 87.60
epoch 9 : train_loss = 0.29 valid_loss = 0.38, train accuracy = 89.56 valid_accuracy = 87.15
epoch 10 : train_loss = 0.30 valid_loss = 0.38, train accuracy = 89.23 valid_accuracy = 87.12


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▃▅▆▆▇████
tr_loss,█▅▃▃▃▂▁▁▁▁
val_accuracy,▁▃▆▇▇▇██▇▇
val_loss,█▄▂▂▃▁▁▁▂▂

0,1
tr_accuracy,89.23148
tr_loss,0.29645
val_accuracy,87.11667
val_loss,0.38187


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ssm01omo with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.51 valid_loss = 0.51, train accuracy = 82.37 valid_accuracy = 82.15
epoch 2 : train_loss = 0.50 valid_loss = 0.50, train accuracy = 82.70 valid_accuracy = 82.50
epoch 3 : train_loss = 0.50 valid_loss = 0.50, train accuracy = 82.54 valid_accuracy = 82.32
epoch 4 : train_loss = 0.50 valid_loss = 0.50, train accuracy = 82.60 valid_accuracy = 82.20
epoch 5 : train_loss = 0.50 valid_loss = 0.50, train accuracy = 82.43 valid_accuracy = 82.00


0,1
tr_accuracy,▁█▅▆▂
tr_loss,█▃▁▁▂
val_accuracy,▃█▅▄▁
val_loss,█▄▂▁▁

0,1
tr_accuracy,82.43148
tr_loss,0.49911
val_accuracy,82.0
val_loss,0.5007


[34m[1mwandb[0m: Agent Starting Run: 5167ijrj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 1.80 valid_loss = 1.80, train accuracy = 43.20 valid_accuracy = 43.12
epoch 2 : train_loss = 1.34 valid_loss = 1.34, train accuracy = 53.36 valid_accuracy = 53.70
epoch 3 : train_loss = 1.15 valid_loss = 1.15, train accuracy = 58.71 valid_accuracy = 59.17
epoch 4 : train_loss = 1.03 valid_loss = 1.03, train accuracy = 62.99 valid_accuracy = 63.18
epoch 5 : train_loss = 0.96 valid_loss = 0.96, train accuracy = 66.01 valid_accuracy = 66.53


0,1
tr_accuracy,▁▄▆▇█
tr_loss,█▄▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
tr_accuracy,66.00741
tr_loss,0.96053
val_accuracy,66.53333
val_loss,0.95603


[34m[1mwandb[0m: Agent Starting Run: khcn72ve with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 1.25 valid_loss = 1.25, train accuracy = 57.88 valid_accuracy = 58.07
epoch 2 : train_loss = 0.84 valid_loss = 0.83, train accuracy = 68.12 valid_accuracy = 68.82
epoch 3 : train_loss = 0.68 valid_loss = 0.67, train accuracy = 74.22 valid_accuracy = 74.85
epoch 4 : train_loss = 0.61 valid_loss = 0.60, train accuracy = 77.26 valid_accuracy = 77.57
epoch 5 : train_loss = 0.56 valid_loss = 0.56, train accuracy = 79.91 valid_accuracy = 79.67
epoch 6 : train_loss = 0.53 valid_loss = 0.53, train accuracy = 81.42 valid_accuracy = 81.23
epoch 7 : train_loss = 0.50 valid_loss = 0.51, train accuracy = 82.39 valid_accuracy = 82.32
epoch 8 : train_loss = 0.48 valid_loss = 0.49, train accuracy = 83.12 valid_accuracy = 82.67
epoch 9 : train_loss = 0.46 valid_loss = 0.47, train accuracy = 83.77 valid_accuracy = 83.00
epoch 10 : train_loss = 0.45 valid_loss = 0.46, train accuracy = 84.24 valid_accuracy = 83.42


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▄▅▆▇▇████
tr_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁▄▆▆▇▇████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
tr_accuracy,84.23519
tr_loss,0.44615
val_accuracy,83.41667
val_loss,0.45919


[34m[1mwandb[0m: Agent Starting Run: rfs593uv with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.53 valid_loss = 0.53, train accuracy = 81.20 valid_accuracy = 80.83
epoch 2 : train_loss = 0.51 valid_loss = 0.52, train accuracy = 82.68 valid_accuracy = 82.08
epoch 3 : train_loss = 0.51 valid_loss = 0.52, train accuracy = 82.54 valid_accuracy = 82.12
epoch 4 : train_loss = 0.51 valid_loss = 0.52, train accuracy = 82.55 valid_accuracy = 82.08
epoch 5 : train_loss = 0.51 valid_loss = 0.51, train accuracy = 82.71 valid_accuracy = 82.38


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁█▇▇█
tr_loss,█▃▂▂▁
val_accuracy,▁▇▇▇█
val_loss,█▃▂▃▁

0,1
tr_accuracy,82.70741
tr_loss,0.50592
val_accuracy,82.38333
val_loss,0.51261


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: thk4irwv with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.89 valid_loss = 0.88, train accuracy = 68.88 valid_accuracy = 69.17
epoch 2 : train_loss = 0.69 valid_loss = 0.70, train accuracy = 74.99 valid_accuracy = 74.47
epoch 3 : train_loss = 0.61 valid_loss = 0.63, train accuracy = 77.71 valid_accuracy = 77.00
epoch 4 : train_loss = 0.57 valid_loss = 0.59, train accuracy = 79.51 valid_accuracy = 78.43
epoch 5 : train_loss = 0.53 valid_loss = 0.57, train accuracy = 80.64 valid_accuracy = 79.15
epoch 6 : train_loss = 0.51 valid_loss = 0.55, train accuracy = 81.54 valid_accuracy = 79.77
epoch 7 : train_loss = 0.49 valid_loss = 0.53, train accuracy = 82.35 valid_accuracy = 80.47
epoch 8 : train_loss = 0.47 valid_loss = 0.52, train accuracy = 82.92 valid_accuracy = 81.22
epoch 9 : train_loss = 0.45 valid_loss = 0.51, train accuracy = 83.54 valid_accuracy = 81.63
epoch 10 : train_loss = 0.44 valid_loss = 0.50, train accuracy = 83.99 valid_accuracy = 81.92


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▄▅▆▆▇▇███
tr_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▆▇▇███
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
tr_accuracy,83.99074
tr_loss,0.43865
val_accuracy,81.91667
val_loss,0.49752


[34m[1mwandb[0m: Agent Starting Run: 4gr34mhc with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.40 valid_loss = 0.42, train accuracy = 85.51 valid_accuracy = 84.97
epoch 2 : train_loss = 0.36 valid_loss = 0.39, train accuracy = 86.78 valid_accuracy = 85.82
epoch 3 : train_loss = 0.35 valid_loss = 0.39, train accuracy = 87.17 valid_accuracy = 86.18
epoch 4 : train_loss = 0.33 valid_loss = 0.38, train accuracy = 87.74 valid_accuracy = 86.45
epoch 5 : train_loss = 0.32 valid_loss = 0.38, train accuracy = 87.94 valid_accuracy = 86.45
epoch 6 : train_loss = 0.31 valid_loss = 0.37, train accuracy = 88.34 valid_accuracy = 86.98
epoch 7 : train_loss = 0.32 valid_loss = 0.39, train accuracy = 88.20 valid_accuracy = 86.73
epoch 8 : train_loss = 0.30 valid_loss = 0.38, train accuracy = 88.78 valid_accuracy = 86.93
epoch 9 : train_loss = 0.30 valid_loss = 0.38, train accuracy = 88.79 valid_accuracy = 87.40
epoch 10 : train_loss = 0.29 valid_loss = 0.37, train accuracy = 89.34 valid_accuracy = 87.60


0,1
tr_accuracy,▁▃▄▅▅▆▆▇▇█
tr_loss,█▅▅▄▃▃▃▂▂▁
val_accuracy,▁▃▄▅▅▆▆▆▇█
val_loss,█▃▃▁▂▁▃▂▂▁

0,1
tr_accuracy,89.34444
tr_loss,0.28548
val_accuracy,87.6
val_loss,0.37345


[34m[1mwandb[0m: Agent Starting Run: lw061f6c with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.71 valid_loss = 0.71, train accuracy = 71.78 valid_accuracy = 71.78
epoch 2 : train_loss = 0.54 valid_loss = 0.55, train accuracy = 80.00 valid_accuracy = 79.63
epoch 3 : train_loss = 0.44 valid_loss = 0.45, train accuracy = 84.74 valid_accuracy = 84.12
epoch 4 : train_loss = 0.41 valid_loss = 0.43, train accuracy = 85.61 valid_accuracy = 84.92
epoch 5 : train_loss = 0.39 valid_loss = 0.41, train accuracy = 86.32 valid_accuracy = 85.52
epoch 6 : train_loss = 0.38 valid_loss = 0.41, train accuracy = 86.59 valid_accuracy = 85.70
epoch 7 : train_loss = 0.37 valid_loss = 0.41, train accuracy = 87.00 valid_accuracy = 85.70
epoch 8 : train_loss = 0.36 valid_loss = 0.40, train accuracy = 87.27 valid_accuracy = 85.80
epoch 9 : train_loss = 0.35 valid_loss = 0.40, train accuracy = 87.46 valid_accuracy = 85.82
epoch 10 : train_loss = 0.34 valid_loss = 0.40, train accuracy = 87.67 valid_accuracy = 85.88


VBox(children=(Label(value='0.001 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.518614…

0,1
tr_accuracy,▁▅▇▇▇█████
tr_loss,█▅▃▂▂▂▁▁▁▁
val_accuracy,▁▅▇███████
val_loss,█▄▂▂▁▁▁▁▁▁

0,1
tr_accuracy,87.66852
tr_loss,0.34427
val_accuracy,85.88333
val_loss,0.39597


[34m[1mwandb[0m: Agent Starting Run: m6cli6x5 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.48 valid_loss = 0.49, train accuracy = 83.34 valid_accuracy = 82.37
epoch 2 : train_loss = 0.42 valid_loss = 0.44, train accuracy = 85.29 valid_accuracy = 84.15
epoch 3 : train_loss = 0.39 valid_loss = 0.41, train accuracy = 86.33 valid_accuracy = 84.93
epoch 4 : train_loss = 0.37 valid_loss = 0.39, train accuracy = 87.06 valid_accuracy = 85.68
epoch 5 : train_loss = 0.35 valid_loss = 0.38, train accuracy = 87.56 valid_accuracy = 86.28


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.111049…

0,1
tr_accuracy,▁▄▆▇█
tr_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
tr_accuracy,87.55556
tr_loss,0.34851
val_accuracy,86.28333
val_loss,0.37624


[34m[1mwandb[0m: Agent Starting Run: pziyc5qc with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.72 valid_loss = 0.72, train accuracy = 74.09 valid_accuracy = 73.18
epoch 2 : train_loss = 0.48 valid_loss = 0.50, train accuracy = 83.93 valid_accuracy = 83.20
epoch 3 : train_loss = 0.41 valid_loss = 0.44, train accuracy = 85.77 valid_accuracy = 84.63
epoch 4 : train_loss = 0.38 valid_loss = 0.41, train accuracy = 86.95 valid_accuracy = 85.78
epoch 5 : train_loss = 0.35 valid_loss = 0.40, train accuracy = 87.55 valid_accuracy = 86.17


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▆▇██
tr_loss,█▃▂▁▁
val_accuracy,▁▆▇██
val_loss,█▃▂▁▁

0,1
tr_accuracy,87.54815
tr_loss,0.35435
val_accuracy,86.16667
val_loss,0.40104


[34m[1mwandb[0m: Agent Starting Run: nebd2g1p with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.92 valid_loss = 0.94, train accuracy = 65.34 valid_accuracy = 65.48
epoch 2 : train_loss = 0.75 valid_loss = 0.76, train accuracy = 73.15 valid_accuracy = 73.43
epoch 3 : train_loss = 0.64 valid_loss = 0.64, train accuracy = 77.98 valid_accuracy = 77.57
epoch 4 : train_loss = 0.55 valid_loss = 0.56, train accuracy = 81.49 valid_accuracy = 80.83
epoch 5 : train_loss = 0.50 valid_loss = 0.51, train accuracy = 83.45 valid_accuracy = 82.52


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▄▆▇█
tr_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
tr_accuracy,83.44815
tr_loss,0.49907
val_accuracy,82.51667
val_loss,0.51061


[34m[1mwandb[0m: Agent Starting Run: 607jvxiq with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.46 valid_loss = 0.47, train accuracy = 83.82 valid_accuracy = 83.02
epoch 2 : train_loss = 0.40 valid_loss = 0.42, train accuracy = 85.85 valid_accuracy = 84.88
epoch 3 : train_loss = 0.37 valid_loss = 0.39, train accuracy = 87.03 valid_accuracy = 86.37
epoch 4 : train_loss = 0.34 valid_loss = 0.37, train accuracy = 87.79 valid_accuracy = 86.82
epoch 5 : train_loss = 0.32 valid_loss = 0.36, train accuracy = 88.36 valid_accuracy = 87.15
epoch 6 : train_loss = 0.31 valid_loss = 0.35, train accuracy = 88.86 valid_accuracy = 87.40
epoch 7 : train_loss = 0.30 valid_loss = 0.34, train accuracy = 89.25 valid_accuracy = 87.80
epoch 8 : train_loss = 0.29 valid_loss = 0.34, train accuracy = 89.56 valid_accuracy = 87.97
epoch 9 : train_loss = 0.28 valid_loss = 0.34, train accuracy = 89.91 valid_accuracy = 88.07
epoch 10 : train_loss = 0.27 valid_loss = 0.33, train accuracy = 90.17 valid_accuracy = 88.18


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▃▅▅▆▇▇▇██
tr_loss,█▆▄▄▃▂▂▂▁▁
val_accuracy,▁▄▆▆▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
tr_accuracy,90.16852
tr_loss,0.27086
val_accuracy,88.18333
val_loss,0.33322


[34m[1mwandb[0m: Agent Starting Run: qp6053kn with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 2.29 valid_loss = 2.29, train accuracy = 16.20 valid_accuracy = 16.12
epoch 2 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 10.00 valid_accuracy = 10.03
epoch 3 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 10.00 valid_accuracy = 10.03
epoch 4 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 10.00 valid_accuracy = 10.03
epoch 5 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 10.00 valid_accuracy = 10.03


0,1
tr_accuracy,█▁▁▁▁
tr_loss,▁▄███
val_accuracy,█▁▁▁▁
val_loss,▁▄███

0,1
tr_accuracy,9.9963
tr_loss,2.30284
val_accuracy,10.03333
val_loss,2.30312


[34m[1mwandb[0m: Agent Starting Run: db6e6ol9 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.43 valid_loss = 0.44, train accuracy = 84.60 valid_accuracy = 83.73
epoch 2 : train_loss = 0.37 valid_loss = 0.40, train accuracy = 86.53 valid_accuracy = 85.60
epoch 3 : train_loss = 0.33 valid_loss = 0.37, train accuracy = 87.88 valid_accuracy = 86.92
epoch 4 : train_loss = 0.31 valid_loss = 0.36, train accuracy = 88.45 valid_accuracy = 87.20
epoch 5 : train_loss = 0.30 valid_loss = 0.35, train accuracy = 89.06 valid_accuracy = 87.50
epoch 6 : train_loss = 0.28 valid_loss = 0.34, train accuracy = 89.63 valid_accuracy = 87.87
epoch 7 : train_loss = 0.27 valid_loss = 0.34, train accuracy = 89.83 valid_accuracy = 87.77
epoch 8 : train_loss = 0.27 valid_loss = 0.34, train accuracy = 90.16 valid_accuracy = 87.92
epoch 9 : train_loss = 0.25 valid_loss = 0.33, train accuracy = 90.60 valid_accuracy = 87.97
epoch 10 : train_loss = 0.24 valid_loss = 0.33, train accuracy = 90.96 valid_accuracy = 88.32


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▃▅▅▆▇▇▇██
tr_loss,█▆▄▄▃▂▂▂▁▁
val_accuracy,▁▄▆▆▇▇▇▇▇█
val_loss,█▅▄▃▂▂▂▂▁▁

0,1
tr_accuracy,90.96296
tr_loss,0.24485
val_accuracy,88.31667
val_loss,0.32663


[34m[1mwandb[0m: Agent Starting Run: dfanjqhs with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.49 valid_loss = 0.50, train accuracy = 82.40 valid_accuracy = 82.28
epoch 2 : train_loss = 0.46 valid_loss = 0.47, train accuracy = 83.77 valid_accuracy = 83.17
epoch 3 : train_loss = 0.45 valid_loss = 0.46, train accuracy = 84.10 valid_accuracy = 83.32
epoch 4 : train_loss = 0.44 valid_loss = 0.46, train accuracy = 84.18 valid_accuracy = 83.43
epoch 5 : train_loss = 0.44 valid_loss = 0.45, train accuracy = 84.42 valid_accuracy = 83.60
epoch 6 : train_loss = 0.45 valid_loss = 0.47, train accuracy = 83.88 valid_accuracy = 82.87
epoch 7 : train_loss = 0.44 valid_loss = 0.46, train accuracy = 84.21 valid_accuracy = 83.25
epoch 8 : train_loss = 0.44 valid_loss = 0.46, train accuracy = 84.31 valid_accuracy = 83.53
epoch 9 : train_loss = 0.43 valid_loss = 0.45, train accuracy = 84.93 valid_accuracy = 83.90
epoch 10 : train_loss = 0.42 valid_loss = 0.44, train accuracy = 85.22 valid_accuracy = 84.00


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▄▅▅▆▅▅▆▇█
tr_loss,█▅▃▃▂▄▃▃▁▁
val_accuracy,▁▅▅▆▆▃▅▆██
val_loss,█▄▃▃▂▄▃▃▂▁

0,1
tr_accuracy,85.22037
tr_loss,0.42423
val_accuracy,84.0
val_loss,0.4411


[34m[1mwandb[0m: Agent Starting Run: vmpx2sma with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 5.62 valid_loss = 5.61, train accuracy = 33.98 valid_accuracy = 33.27
epoch 2 : train_loss = 2.65 valid_loss = 2.64, train accuracy = 46.64 valid_accuracy = 46.40
epoch 3 : train_loss = 2.12 valid_loss = 2.08, train accuracy = 48.18 valid_accuracy = 47.98
epoch 4 : train_loss = 1.75 valid_loss = 1.77, train accuracy = 52.52 valid_accuracy = 51.80
epoch 5 : train_loss = 1.60 valid_loss = 1.58, train accuracy = 54.26 valid_accuracy = 54.00


0,1
tr_accuracy,▁▅▆▇█
tr_loss,█▃▂▁▁
val_accuracy,▁▅▆▇█
val_loss,█▃▂▁▁

0,1
tr_accuracy,54.26481
tr_loss,1.597
val_accuracy,54.0
val_loss,1.58461


[34m[1mwandb[0m: Agent Starting Run: gs8nrszs with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.70 valid_loss = 0.70, train accuracy = 79.16 valid_accuracy = 79.30
epoch 2 : train_loss = 0.54 valid_loss = 0.55, train accuracy = 82.57 valid_accuracy = 81.97
epoch 3 : train_loss = 0.47 valid_loss = 0.49, train accuracy = 84.14 valid_accuracy = 83.50
epoch 4 : train_loss = 0.44 valid_loss = 0.45, train accuracy = 85.17 valid_accuracy = 84.42
epoch 5 : train_loss = 0.41 valid_loss = 0.43, train accuracy = 85.89 valid_accuracy = 85.02


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.111160…

0,1
tr_accuracy,▁▅▆▇█
tr_loss,█▄▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▄▂▂▁

0,1
tr_accuracy,85.88519
tr_loss,0.41043
val_accuracy,85.01667
val_loss,0.42859


[34m[1mwandb[0m: Agent Starting Run: waibf9io with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.60 valid_loss = 0.60, train accuracy = 79.33 valid_accuracy = 78.55
epoch 2 : train_loss = 0.60 valid_loss = 0.60, train accuracy = 78.70 valid_accuracy = 78.07
epoch 3 : train_loss = 0.56 valid_loss = 0.57, train accuracy = 80.78 valid_accuracy = 80.05
epoch 4 : train_loss = 0.56 valid_loss = 0.56, train accuracy = 81.35 valid_accuracy = 80.80
epoch 5 : train_loss = 0.55 valid_loss = 0.56, train accuracy = 81.52 valid_accuracy = 80.53
epoch 6 : train_loss = 0.54 valid_loss = 0.55, train accuracy = 81.76 valid_accuracy = 80.73
epoch 7 : train_loss = 0.53 valid_loss = 0.54, train accuracy = 82.01 valid_accuracy = 81.00
epoch 8 : train_loss = 0.53 valid_loss = 0.53, train accuracy = 82.31 valid_accuracy = 81.28
epoch 9 : train_loss = 0.52 valid_loss = 0.53, train accuracy = 82.43 valid_accuracy = 81.35
epoch 10 : train_loss = 0.52 valid_loss = 0.53, train accuracy = 82.43 valid_accuracy = 81.37


0,1
tr_accuracy,▂▁▅▆▆▇▇███
tr_loss,██▅▅▄▃▃▂▁▁
val_accuracy,▂▁▅▇▆▇▇███
val_loss,██▅▄▄▃▂▂▁▁

0,1
tr_accuracy,82.42963
tr_loss,0.51551
val_accuracy,81.36667
val_loss,0.52554


[34m[1mwandb[0m: Agent Starting Run: zyrfbuuv with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 1.68 valid_loss = 1.68, train accuracy = 39.86 valid_accuracy = 39.92
epoch 2 : train_loss = 1.11 valid_loss = 1.10, train accuracy = 58.48 valid_accuracy = 59.02
epoch 3 : train_loss = 0.90 valid_loss = 0.89, train accuracy = 65.69 valid_accuracy = 65.98
epoch 4 : train_loss = 0.78 valid_loss = 0.78, train accuracy = 70.18 valid_accuracy = 70.38
epoch 5 : train_loss = 0.69 valid_loss = 0.69, train accuracy = 74.03 valid_accuracy = 74.22
epoch 6 : train_loss = 0.62 valid_loss = 0.63, train accuracy = 77.06 valid_accuracy = 76.98
epoch 7 : train_loss = 0.57 valid_loss = 0.58, train accuracy = 79.80 valid_accuracy = 79.40
epoch 8 : train_loss = 0.53 valid_loss = 0.54, train accuracy = 81.72 valid_accuracy = 81.08
epoch 9 : train_loss = 0.50 valid_loss = 0.51, train accuracy = 82.84 valid_accuracy = 82.08
epoch 10 : train_loss = 0.48 valid_loss = 0.49, train accuracy = 83.52 valid_accuracy = 82.80


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.097836…

0,1
tr_accuracy,▁▄▅▆▆▇▇███
tr_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▇▇▇███
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
tr_accuracy,83.51667
tr_loss,0.47538
val_accuracy,82.8
val_loss,0.48953


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7vrc0he1 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.55 valid_loss = 0.55, train accuracy = 81.74 valid_accuracy = 82.00
epoch 2 : train_loss = 0.47 valid_loss = 0.47, train accuracy = 84.19 valid_accuracy = 83.60
epoch 3 : train_loss = 0.43 valid_loss = 0.44, train accuracy = 85.14 valid_accuracy = 84.45
epoch 4 : train_loss = 0.41 valid_loss = 0.42, train accuracy = 85.84 valid_accuracy = 85.07
epoch 5 : train_loss = 0.39 valid_loss = 0.41, train accuracy = 86.41 valid_accuracy = 85.47


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▅▆▇█
tr_loss,█▄▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
tr_accuracy,86.40741
tr_loss,0.38996
val_accuracy,85.46667
val_loss,0.40864


[34m[1mwandb[0m: Agent Starting Run: q5owcjci with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.58 valid_loss = 0.60, train accuracy = 79.15 valid_accuracy = 78.33
epoch 2 : train_loss = 0.51 valid_loss = 0.54, train accuracy = 81.62 valid_accuracy = 80.45
epoch 3 : train_loss = 0.47 valid_loss = 0.52, train accuracy = 82.89 valid_accuracy = 81.23
epoch 4 : train_loss = 0.45 valid_loss = 0.51, train accuracy = 83.65 valid_accuracy = 81.82
epoch 5 : train_loss = 0.44 valid_loss = 0.49, train accuracy = 84.15 valid_accuracy = 82.27


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▄▆▇█
tr_loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
tr_accuracy,84.1537
tr_loss,0.43685
val_accuracy,82.26667
val_loss,0.49343


[34m[1mwandb[0m: Agent Starting Run: bzyv2vcq with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.46 valid_loss = 0.48, train accuracy = 83.76 valid_accuracy = 83.10
epoch 2 : train_loss = 0.42 valid_loss = 0.44, train accuracy = 85.01 valid_accuracy = 84.32
epoch 3 : train_loss = 0.39 valid_loss = 0.42, train accuracy = 86.34 valid_accuracy = 85.15
epoch 4 : train_loss = 0.40 valid_loss = 0.43, train accuracy = 86.16 valid_accuracy = 85.20
epoch 5 : train_loss = 0.38 valid_loss = 0.42, train accuracy = 86.54 valid_accuracy = 85.35
epoch 6 : train_loss = 0.36 valid_loss = 0.41, train accuracy = 87.12 valid_accuracy = 85.87
epoch 7 : train_loss = 0.38 valid_loss = 0.42, train accuracy = 86.81 valid_accuracy = 85.12
epoch 8 : train_loss = 0.38 valid_loss = 0.43, train accuracy = 87.00 valid_accuracy = 85.93
epoch 9 : train_loss = 0.44 valid_loss = 0.50, train accuracy = 84.27 valid_accuracy = 83.35
epoch 10 : train_loss = 0.34 valid_loss = 0.41, train accuracy = 87.69 valid_accuracy = 86.12


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▃▆▅▆▇▆▇▂█
tr_loss,█▅▄▄▃▂▃▃▇▁
val_accuracy,▁▄▆▆▆▇▆█▂█
val_loss,▇▃▂▂▂▁▂▃█▁

0,1
tr_accuracy,87.69444
tr_loss,0.34391
val_accuracy,86.11667
val_loss,0.41138


[34m[1mwandb[0m: Agent Starting Run: 91o74gpe with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.62 valid_loss = 0.63, train accuracy = 77.74 valid_accuracy = 77.77
epoch 2 : train_loss = 0.49 valid_loss = 0.50, train accuracy = 82.56 valid_accuracy = 82.02
epoch 3 : train_loss = 0.41 valid_loss = 0.43, train accuracy = 85.54 valid_accuracy = 84.83
epoch 4 : train_loss = 0.38 valid_loss = 0.40, train accuracy = 86.64 valid_accuracy = 85.85
epoch 5 : train_loss = 0.35 valid_loss = 0.38, train accuracy = 87.35 valid_accuracy = 86.60


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.102469…

0,1
tr_accuracy,▁▅▇▇█
tr_loss,█▅▃▂▁
val_accuracy,▁▄▇▇█
val_loss,█▄▂▂▁

0,1
tr_accuracy,87.3537
tr_loss,0.35376
val_accuracy,86.6
val_loss,0.38419


[34m[1mwandb[0m: Agent Starting Run: o07z1euo with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 9.02 valid_loss = 9.09, train accuracy = 14.71 valid_accuracy = 14.17
epoch 2 : train_loss = 7.06 valid_loss = 7.19, train accuracy = 19.41 valid_accuracy = 18.72
epoch 3 : train_loss = 5.83 valid_loss = 5.91, train accuracy = 24.04 valid_accuracy = 23.58
epoch 4 : train_loss = 4.99 valid_loss = 5.06, train accuracy = 27.72 valid_accuracy = 27.98
epoch 5 : train_loss = 4.38 valid_loss = 4.49, train accuracy = 30.59 valid_accuracy = 30.30
epoch 6 : train_loss = 3.90 valid_loss = 4.01, train accuracy = 33.36 valid_accuracy = 33.00
epoch 7 : train_loss = 3.53 valid_loss = 3.67, train accuracy = 35.51 valid_accuracy = 34.43
epoch 8 : train_loss = 3.26 valid_loss = 3.43, train accuracy = 37.14 valid_accuracy = 35.53
epoch 9 : train_loss = 3.01 valid_loss = 3.16, train accuracy = 38.87 valid_accuracy = 38.78
epoch 10 : train_loss = 2.80 valid_loss = 2.92, train accuracy = 40.38 valid_accuracy = 39.25


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.097827…

0,1
tr_accuracy,▁▂▄▅▅▆▇▇██
tr_loss,█▆▄▃▃▂▂▂▁▁
val_accuracy,▁▂▄▅▆▆▇▇██
val_loss,█▆▄▃▃▂▂▂▁▁

0,1
tr_accuracy,40.37593
tr_loss,2.7966
val_accuracy,39.25
val_loss,2.9161


[34m[1mwandb[0m: Agent Starting Run: piwlz4yh with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.50 valid_loss = 0.50, train accuracy = 82.24 valid_accuracy = 81.88
epoch 2 : train_loss = 0.42 valid_loss = 0.42, train accuracy = 85.07 valid_accuracy = 84.70
epoch 3 : train_loss = 0.38 valid_loss = 0.39, train accuracy = 86.42 valid_accuracy = 85.85
epoch 4 : train_loss = 0.36 valid_loss = 0.37, train accuracy = 87.09 valid_accuracy = 86.08
epoch 5 : train_loss = 0.34 valid_loss = 0.37, train accuracy = 87.57 valid_accuracy = 86.27
epoch 6 : train_loss = 0.33 valid_loss = 0.36, train accuracy = 87.98 valid_accuracy = 86.40
epoch 7 : train_loss = 0.32 valid_loss = 0.35, train accuracy = 88.24 valid_accuracy = 86.57
epoch 8 : train_loss = 0.31 valid_loss = 0.35, train accuracy = 88.49 valid_accuracy = 86.75
epoch 9 : train_loss = 0.30 valid_loss = 0.35, train accuracy = 88.74 valid_accuracy = 86.83
epoch 10 : train_loss = 0.29 valid_loss = 0.35, train accuracy = 89.00 valid_accuracy = 87.03


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.097587…

0,1
tr_accuracy,▁▄▅▆▇▇▇▇██
tr_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▅▆▇▇▇▇███
val_loss,█▅▃▂▂▂▁▁▁▁

0,1
tr_accuracy,88.9963
tr_loss,0.29457
val_accuracy,87.03333
val_loss,0.34639


[34m[1mwandb[0m: Agent Starting Run: lpw8aba5 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.60 valid_loss = 0.60, train accuracy = 81.33 valid_accuracy = 80.85
epoch 2 : train_loss = 0.59 valid_loss = 0.58, train accuracy = 81.72 valid_accuracy = 81.17
epoch 3 : train_loss = 0.59 valid_loss = 0.59, train accuracy = 81.54 valid_accuracy = 81.17
epoch 4 : train_loss = 0.59 valid_loss = 0.59, train accuracy = 81.30 valid_accuracy = 81.22
epoch 5 : train_loss = 0.59 valid_loss = 0.59, train accuracy = 81.17 valid_accuracy = 80.92
epoch 6 : train_loss = 0.59 valid_loss = 0.59, train accuracy = 81.04 valid_accuracy = 80.68
epoch 7 : train_loss = 0.59 valid_loss = 0.59, train accuracy = 80.98 valid_accuracy = 80.73
epoch 8 : train_loss = 0.59 valid_loss = 0.59, train accuracy = 80.89 valid_accuracy = 80.67
epoch 9 : train_loss = 0.60 valid_loss = 0.59, train accuracy = 80.85 valid_accuracy = 80.45
epoch 10 : train_loss = 0.60 valid_loss = 0.59, train accuracy = 80.82 valid_accuracy = 80.35


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▅█▇▅▄▃▂▂▁▁
tr_loss,█▁▂▃▄▄▅▅▅▆
val_accuracy,▅███▆▄▄▄▂▁
val_loss,█▁▂▃▄▅▆▆▆▇

0,1
tr_accuracy,80.81667
tr_loss,0.59564
val_accuracy,80.35
val_loss,0.59487


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vgfkyrmy with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.47 valid_loss = 0.49, train accuracy = 82.79 valid_accuracy = 82.05
epoch 2 : train_loss = 0.45 valid_loss = 0.47, train accuracy = 83.29 valid_accuracy = 82.65
epoch 3 : train_loss = 0.40 valid_loss = 0.43, train accuracy = 85.07 valid_accuracy = 83.52
epoch 4 : train_loss = 0.39 valid_loss = 0.43, train accuracy = 84.95 valid_accuracy = 83.88
epoch 5 : train_loss = 0.39 valid_loss = 0.43, train accuracy = 85.20 valid_accuracy = 83.82
epoch 6 : train_loss = 0.37 valid_loss = 0.41, train accuracy = 86.46 valid_accuracy = 85.25
epoch 7 : train_loss = 0.35 valid_loss = 0.40, train accuracy = 87.15 valid_accuracy = 86.17
epoch 8 : train_loss = 0.39 valid_loss = 0.44, train accuracy = 85.61 valid_accuracy = 84.20
epoch 9 : train_loss = 0.37 valid_loss = 0.43, train accuracy = 86.42 valid_accuracy = 84.78
epoch 10 : train_loss = 0.36 valid_loss = 0.41, train accuracy = 86.93 valid_accuracy = 85.48


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▂▅▄▅▇█▆▇█
tr_loss,█▇▄▃▃▂▁▃▂▁
val_accuracy,▁▂▃▄▄▆█▅▆▇
val_loss,█▇▄▃▃▂▁▄▃▂

0,1
tr_accuracy,86.93333
tr_loss,0.35551
val_accuracy,85.48333
val_loss,0.41081


[34m[1mwandb[0m: Agent Starting Run: e0eza2zr with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.68 valid_loss = 0.68, train accuracy = 79.03 valid_accuracy = 79.13
epoch 2 : train_loss = 0.69 valid_loss = 0.69, train accuracy = 76.01 valid_accuracy = 76.07
epoch 3 : train_loss = 0.69 valid_loss = 0.69, train accuracy = 76.06 valid_accuracy = 75.65
epoch 4 : train_loss = 0.69 valid_loss = 0.69, train accuracy = 76.20 valid_accuracy = 75.77
epoch 5 : train_loss = 0.69 valid_loss = 0.69, train accuracy = 76.17 valid_accuracy = 75.58
epoch 6 : train_loss = 0.69 valid_loss = 0.69, train accuracy = 76.11 valid_accuracy = 75.45
epoch 7 : train_loss = 0.69 valid_loss = 0.69, train accuracy = 76.09 valid_accuracy = 75.42
epoch 8 : train_loss = 0.69 valid_loss = 0.69, train accuracy = 76.23 valid_accuracy = 75.70
epoch 9 : train_loss = 0.69 valid_loss = 0.69, train accuracy = 76.31 valid_accuracy = 75.73
epoch 10 : train_loss = 0.69 valid_loss = 0.69, train accuracy = 76.33 valid_accuracy = 75.73


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.108411…

0,1
tr_accuracy,█▁▁▁▁▁▁▁▂▂
tr_loss,▁█▆▆▇▇█▇▇▇
val_accuracy,█▂▁▂▁▁▁▂▂▂
val_loss,▁▇▆▇▇██▇▇█

0,1
tr_accuracy,76.33148
tr_loss,0.69092
val_accuracy,75.73333
val_loss,0.69268


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 1v4ivsah with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.43 valid_loss = 0.45, train accuracy = 84.32 valid_accuracy = 83.27
epoch 2 : train_loss = 0.38 valid_loss = 0.41, train accuracy = 86.13 valid_accuracy = 85.10
epoch 3 : train_loss = 0.35 valid_loss = 0.39, train accuracy = 87.11 valid_accuracy = 85.63
epoch 4 : train_loss = 0.33 valid_loss = 0.38, train accuracy = 87.77 valid_accuracy = 86.10
epoch 5 : train_loss = 0.32 valid_loss = 0.37, train accuracy = 88.30 valid_accuracy = 86.60


0,1
tr_accuracy,▁▄▆▇█
tr_loss,█▅▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
tr_accuracy,88.3037
tr_loss,0.31802
val_accuracy,86.6
val_loss,0.36694


[34m[1mwandb[0m: Agent Starting Run: rl2mjmeb with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 10.00 valid_accuracy = 10.03
epoch 2 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 10.00 valid_accuracy = 10.03
epoch 3 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 10.00 valid_accuracy = 10.03
epoch 4 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 10.00 valid_accuracy = 10.03
epoch 5 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 10.00 valid_accuracy = 10.03


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.110975…

0,1
tr_accuracy,▁▁▁▁▁
tr_loss,█▆▆▃▁
val_accuracy,▁▁▁▁▁
val_loss,▂█▇▄▁

0,1
tr_accuracy,9.9963
tr_loss,2.30271
val_accuracy,10.03333
val_loss,2.30304


[34m[1mwandb[0m: Agent Starting Run: f7q7qpre with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.69 valid_loss = 0.69, train accuracy = 80.05 valid_accuracy = 79.48
epoch 2 : train_loss = 0.67 valid_loss = 0.68, train accuracy = 80.33 valid_accuracy = 79.50
epoch 3 : train_loss = 0.67 valid_loss = 0.67, train accuracy = 80.39 valid_accuracy = 79.83
epoch 4 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 80.49 valid_accuracy = 80.20
epoch 5 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 80.57 valid_accuracy = 80.35


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▅▆▇█
tr_loss,█▅▄▂▁
val_accuracy,▁▁▄▇█
val_loss,█▅▄▂▁

0,1
tr_accuracy,80.57407
tr_loss,0.65823
val_accuracy,80.35
val_loss,0.65917


[34m[1mwandb[0m: Agent Starting Run: 9u75pk81 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.40 valid_loss = 0.42, train accuracy = 85.45 valid_accuracy = 84.65
epoch 2 : train_loss = 0.35 valid_loss = 0.38, train accuracy = 87.28 valid_accuracy = 86.52
epoch 3 : train_loss = 0.32 valid_loss = 0.36, train accuracy = 88.25 valid_accuracy = 87.03
epoch 4 : train_loss = 0.31 valid_loss = 0.35, train accuracy = 88.62 valid_accuracy = 87.33
epoch 5 : train_loss = 0.30 valid_loss = 0.36, train accuracy = 88.85 valid_accuracy = 87.05
epoch 6 : train_loss = 0.29 valid_loss = 0.35, train accuracy = 89.34 valid_accuracy = 87.40
epoch 7 : train_loss = 0.29 valid_loss = 0.36, train accuracy = 89.41 valid_accuracy = 87.13
epoch 8 : train_loss = 0.26 valid_loss = 0.35, train accuracy = 90.13 valid_accuracy = 87.33
epoch 9 : train_loss = 0.25 valid_loss = 0.34, train accuracy = 90.77 valid_accuracy = 87.57
epoch 10 : train_loss = 0.24 valid_loss = 0.34, train accuracy = 90.94 valid_accuracy = 87.93


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▃▅▅▅▆▆▇██
tr_loss,█▆▄▄▄▃▃▂▁▁
val_accuracy,▁▅▆▇▆▇▆▇▇█
val_loss,█▄▃▂▂▂▃▂▁▁

0,1
tr_accuracy,90.93889
tr_loss,0.24317
val_accuracy,87.93333
val_loss,0.3437


[34m[1mwandb[0m: Agent Starting Run: 1uk8d1jk with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.70 valid_loss = 0.69, train accuracy = 75.87 valid_accuracy = 75.65
epoch 2 : train_loss = 0.58 valid_loss = 0.58, train accuracy = 80.53 valid_accuracy = 80.08
epoch 3 : train_loss = 0.53 valid_loss = 0.54, train accuracy = 81.89 valid_accuracy = 81.42
epoch 4 : train_loss = 0.50 valid_loss = 0.51, train accuracy = 82.65 valid_accuracy = 82.02
epoch 5 : train_loss = 0.48 valid_loss = 0.49, train accuracy = 83.32 valid_accuracy = 82.62


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▅▇▇█
tr_loss,█▄▃▂▁
val_accuracy,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
tr_accuracy,83.31667
tr_loss,0.48293
val_accuracy,82.61667
val_loss,0.49001


[34m[1mwandb[0m: Agent Starting Run: 8azyb2b4 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 1.15 valid_loss = 1.15, train accuracy = 59.74 valid_accuracy = 59.93
epoch 2 : train_loss = 0.90 valid_loss = 0.90, train accuracy = 67.46 valid_accuracy = 67.43
epoch 3 : train_loss = 0.81 valid_loss = 0.81, train accuracy = 71.08 valid_accuracy = 71.28
epoch 4 : train_loss = 0.75 valid_loss = 0.75, train accuracy = 73.18 valid_accuracy = 73.35
epoch 5 : train_loss = 0.72 valid_loss = 0.71, train accuracy = 74.55 valid_accuracy = 74.97


0,1
tr_accuracy,▁▅▆▇█
tr_loss,█▄▂▂▁
val_accuracy,▁▄▆▇█
val_loss,█▄▂▂▁

0,1
tr_accuracy,74.55
tr_loss,0.71521
val_accuracy,74.96667
val_loss,0.71493


[34m[1mwandb[0m: Agent Starting Run: ptsh4jez with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.91 valid_loss = 0.92, train accuracy = 64.94 valid_accuracy = 64.42
epoch 2 : train_loss = 0.69 valid_loss = 0.71, train accuracy = 74.52 valid_accuracy = 74.05
epoch 3 : train_loss = 0.57 valid_loss = 0.58, train accuracy = 79.74 valid_accuracy = 79.28
epoch 4 : train_loss = 0.50 valid_loss = 0.51, train accuracy = 82.72 valid_accuracy = 82.07
epoch 5 : train_loss = 0.45 valid_loss = 0.46, train accuracy = 84.47 valid_accuracy = 83.75
epoch 6 : train_loss = 0.42 valid_loss = 0.44, train accuracy = 85.35 valid_accuracy = 84.55
epoch 7 : train_loss = 0.42 valid_loss = 0.43, train accuracy = 85.74 valid_accuracy = 84.92
epoch 8 : train_loss = 0.41 valid_loss = 0.43, train accuracy = 85.83 valid_accuracy = 85.02
epoch 9 : train_loss = 0.41 valid_loss = 0.42, train accuracy = 85.90 valid_accuracy = 84.97
epoch 10 : train_loss = 0.41 valid_loss = 0.42, train accuracy = 85.96 valid_accuracy = 85.05


0,1
tr_accuracy,▁▄▆▇██████
tr_loss,█▅▃▂▂▁▁▁▁▁
val_accuracy,▁▄▆▇██████
val_loss,█▅▃▂▂▁▁▁▁▁

0,1
tr_accuracy,85.95926
tr_loss,0.40981
val_accuracy,85.05
val_loss,0.42149


[34m[1mwandb[0m: Agent Starting Run: rk9ql7xs with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.47 valid_loss = 0.47, train accuracy = 83.70 valid_accuracy = 83.40
epoch 2 : train_loss = 0.40 valid_loss = 0.41, train accuracy = 85.68 valid_accuracy = 84.80
epoch 3 : train_loss = 0.37 valid_loss = 0.38, train accuracy = 86.83 valid_accuracy = 85.88
epoch 4 : train_loss = 0.35 valid_loss = 0.37, train accuracy = 87.48 valid_accuracy = 86.48
epoch 5 : train_loss = 0.34 valid_loss = 0.36, train accuracy = 88.01 valid_accuracy = 86.68


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▄▆▇█
tr_loss,█▅▃▂▁
val_accuracy,▁▄▆██
val_loss,█▄▃▂▁

0,1
tr_accuracy,88.01481
tr_loss,0.33639
val_accuracy,86.68333
val_loss,0.35679


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pi2d6xda with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.98 valid_loss = 0.98, train accuracy = 65.67 valid_accuracy = 66.07
epoch 2 : train_loss = 0.77 valid_loss = 0.78, train accuracy = 71.79 valid_accuracy = 71.82
epoch 3 : train_loss = 0.69 valid_loss = 0.70, train accuracy = 74.82 valid_accuracy = 74.07
epoch 4 : train_loss = 0.63 valid_loss = 0.65, train accuracy = 76.80 valid_accuracy = 75.97
epoch 5 : train_loss = 0.59 valid_loss = 0.62, train accuracy = 78.23 valid_accuracy = 77.15


0,1
tr_accuracy,▁▄▆▇█
tr_loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
tr_accuracy,78.22593
tr_loss,0.59309
val_accuracy,77.15
val_loss,0.61916


[34m[1mwandb[0m: Agent Starting Run: of7kt99c with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.66 valid_loss = 0.65, train accuracy = 76.41 valid_accuracy = 76.53
epoch 2 : train_loss = 0.65 valid_loss = 0.65, train accuracy = 77.34 valid_accuracy = 77.43
epoch 3 : train_loss = 0.64 valid_loss = 0.64, train accuracy = 78.14 valid_accuracy = 77.73
epoch 4 : train_loss = 0.63 valid_loss = 0.63, train accuracy = 78.29 valid_accuracy = 77.85
epoch 5 : train_loss = 0.62 valid_loss = 0.62, train accuracy = 78.52 valid_accuracy = 78.05
epoch 6 : train_loss = 0.62 valid_loss = 0.62, train accuracy = 78.47 valid_accuracy = 77.93
epoch 7 : train_loss = 0.62 valid_loss = 0.62, train accuracy = 78.44 valid_accuracy = 77.95
epoch 8 : train_loss = 0.62 valid_loss = 0.62, train accuracy = 78.46 valid_accuracy = 78.08
epoch 9 : train_loss = 0.62 valid_loss = 0.62, train accuracy = 78.39 valid_accuracy = 77.98
epoch 10 : train_loss = 0.62 valid_loss = 0.62, train accuracy = 78.40 valid_accuracy = 78.08


0,1
tr_accuracy,▁▄▇▇██████
tr_loss,█▇▅▃▂▂▂▁▁▁
val_accuracy,▁▅▆▇█▇▇███
val_loss,█▇▅▄▃▂▂▁▁▁

0,1
tr_accuracy,78.4
tr_loss,0.61674
val_accuracy,78.08333
val_loss,0.61564


[34m[1mwandb[0m: Agent Starting Run: pktgiuwv with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.68 valid_loss = 0.68, train accuracy = 79.87 valid_accuracy = 79.33
epoch 2 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 80.07 valid_accuracy = 79.33
epoch 3 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 80.00 valid_accuracy = 79.22
epoch 4 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 79.92 valid_accuracy = 79.20
epoch 5 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 79.87 valid_accuracy = 79.23
epoch 6 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 79.84 valid_accuracy = 79.10
epoch 7 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 79.81 valid_accuracy = 79.00
epoch 8 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 79.76 valid_accuracy = 78.88
epoch 9 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 79.69 valid_accuracy = 78.85
epoch 10 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 79.58 valid_accuracy = 78.70


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.097865…

0,1
tr_accuracy,▅█▇▆▅▅▄▃▃▁
tr_loss,█▃▂▁▁▁▁▁▁▂
val_accuracy,██▇▇▇▅▄▃▃▁
val_loss,█▃▂▁▁▁▁▁▁▂

0,1
tr_accuracy,79.58148
tr_loss,0.65939
val_accuracy,78.7
val_loss,0.65984


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7rul6p3l with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.92 valid_loss = 0.92, train accuracy = 64.53 valid_accuracy = 64.37
epoch 2 : train_loss = 0.75 valid_loss = 0.76, train accuracy = 70.44 valid_accuracy = 70.70
epoch 3 : train_loss = 0.65 valid_loss = 0.67, train accuracy = 74.56 valid_accuracy = 73.55
epoch 4 : train_loss = 0.57 valid_loss = 0.58, train accuracy = 78.99 valid_accuracy = 78.52
epoch 5 : train_loss = 0.48 valid_loss = 0.49, train accuracy = 83.45 valid_accuracy = 82.70
epoch 6 : train_loss = 0.44 valid_loss = 0.46, train accuracy = 84.98 valid_accuracy = 84.27
epoch 7 : train_loss = 0.43 valid_loss = 0.44, train accuracy = 85.73 valid_accuracy = 84.88
epoch 8 : train_loss = 0.42 valid_loss = 0.43, train accuracy = 86.04 valid_accuracy = 85.00
epoch 9 : train_loss = 0.41 valid_loss = 0.43, train accuracy = 86.25 valid_accuracy = 85.25
epoch 10 : train_loss = 0.40 valid_loss = 0.42, train accuracy = 86.40 valid_accuracy = 85.28


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.105568…

0,1
tr_accuracy,▁▃▄▆▇█████
tr_loss,█▆▄▃▂▂▁▁▁▁
val_accuracy,▁▃▄▆▇█████
val_loss,█▆▄▃▂▁▁▁▁▁

0,1
tr_accuracy,86.4
tr_loss,0.40438
val_accuracy,85.28333
val_loss,0.4211


[34m[1mwandb[0m: Agent Starting Run: 09esdvxh with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.38 valid_loss = 0.41, train accuracy = 85.96 valid_accuracy = 84.72
epoch 2 : train_loss = 0.34 valid_loss = 0.38, train accuracy = 87.37 valid_accuracy = 86.00
epoch 3 : train_loss = 0.32 valid_loss = 0.38, train accuracy = 88.26 valid_accuracy = 86.80
epoch 4 : train_loss = 0.32 valid_loss = 0.39, train accuracy = 88.31 valid_accuracy = 86.35
epoch 5 : train_loss = 0.29 valid_loss = 0.36, train accuracy = 89.38 valid_accuracy = 87.52


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▄▆▆█
tr_loss,█▅▃▃▁
val_accuracy,▁▄▆▅█
val_loss,█▃▄▅▁

0,1
tr_accuracy,89.37963
tr_loss,0.2887
val_accuracy,87.51667
val_loss,0.36426


[34m[1mwandb[0m: Agent Starting Run: fiu6e8jd with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 3.22 valid_loss = 3.23, train accuracy = 39.71 valid_accuracy = 38.48
epoch 2 : train_loss = 2.00 valid_loss = 2.11, train accuracy = 50.01 valid_accuracy = 48.60
epoch 3 : train_loss = 1.62 valid_loss = 1.74, train accuracy = 55.40 valid_accuracy = 54.77
epoch 4 : train_loss = 1.42 valid_loss = 1.51, train accuracy = 58.43 valid_accuracy = 57.25
epoch 5 : train_loss = 1.29 valid_loss = 1.40, train accuracy = 60.71 valid_accuracy = 59.48
epoch 6 : train_loss = 1.20 valid_loss = 1.30, train accuracy = 62.16 valid_accuracy = 61.00
epoch 7 : train_loss = 1.13 valid_loss = 1.25, train accuracy = 63.51 valid_accuracy = 61.90
epoch 8 : train_loss = 1.08 valid_loss = 1.20, train accuracy = 64.68 valid_accuracy = 62.70
epoch 9 : train_loss = 1.04 valid_loss = 1.17, train accuracy = 65.67 valid_accuracy = 63.85
epoch 10 : train_loss = 1.01 valid_loss = 1.15, train accuracy = 66.31 valid_accuracy = 63.95


0,1
tr_accuracy,▁▄▅▆▇▇▇███
tr_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁▄▅▆▇▇▇███
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
tr_accuracy,66.30926
tr_loss,1.00705
val_accuracy,63.95
val_loss,1.15456


[34m[1mwandb[0m: Agent Starting Run: r5mu87qv with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 2.16 valid_loss = 2.13, train accuracy = 37.54 valid_accuracy = 38.55
epoch 2 : train_loss = 1.65 valid_loss = 1.64, train accuracy = 46.75 valid_accuracy = 47.72
epoch 3 : train_loss = 1.42 valid_loss = 1.41, train accuracy = 52.04 valid_accuracy = 52.45
epoch 4 : train_loss = 1.29 valid_loss = 1.30, train accuracy = 55.31 valid_accuracy = 55.40
epoch 5 : train_loss = 1.22 valid_loss = 1.23, train accuracy = 57.08 valid_accuracy = 56.83
epoch 6 : train_loss = 1.16 valid_loss = 1.17, train accuracy = 57.80 valid_accuracy = 57.92
epoch 7 : train_loss = 1.11 valid_loss = 1.12, train accuracy = 59.12 valid_accuracy = 59.02
epoch 8 : train_loss = 1.08 valid_loss = 1.09, train accuracy = 60.02 valid_accuracy = 60.40
epoch 9 : train_loss = 1.05 valid_loss = 1.06, train accuracy = 60.83 valid_accuracy = 61.28
epoch 10 : train_loss = 1.02 valid_loss = 1.04, train accuracy = 61.63 valid_accuracy = 61.65


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.105840…

0,1
tr_accuracy,▁▄▅▆▇▇▇███
tr_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▇▇▇███
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
tr_accuracy,61.62778
tr_loss,1.02341
val_accuracy,61.65
val_loss,1.04317


[34m[1mwandb[0m: Agent Starting Run: jw3f2yqo with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.43 valid_loss = 0.44, train accuracy = 84.56 valid_accuracy = 83.93
epoch 2 : train_loss = 0.40 valid_loss = 0.42, train accuracy = 85.33 valid_accuracy = 84.23
epoch 3 : train_loss = 0.36 valid_loss = 0.39, train accuracy = 86.98 valid_accuracy = 85.88
epoch 4 : train_loss = 0.35 valid_loss = 0.38, train accuracy = 87.43 valid_accuracy = 85.93
epoch 5 : train_loss = 0.32 valid_loss = 0.37, train accuracy = 88.05 valid_accuracy = 86.82


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▃▆▇█
tr_loss,█▆▃▂▁
val_accuracy,▁▂▆▆█
val_loss,█▆▃▂▁

0,1
tr_accuracy,88.04815
tr_loss,0.3245
val_accuracy,86.81667
val_loss,0.36607


[34m[1mwandb[0m: Agent Starting Run: n5ewqaj1 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.37 valid_loss = 0.39, train accuracy = 86.80 valid_accuracy = 85.35
epoch 2 : train_loss = 0.33 valid_loss = 0.37, train accuracy = 87.90 valid_accuracy = 86.60
epoch 3 : train_loss = 0.30 valid_loss = 0.34, train accuracy = 88.88 valid_accuracy = 87.32
epoch 4 : train_loss = 0.29 valid_loss = 0.34, train accuracy = 89.32 valid_accuracy = 87.63
epoch 5 : train_loss = 0.28 valid_loss = 0.34, train accuracy = 89.84 valid_accuracy = 87.88
epoch 6 : train_loss = 0.27 valid_loss = 0.35, train accuracy = 89.89 valid_accuracy = 87.88
epoch 7 : train_loss = 0.26 valid_loss = 0.34, train accuracy = 90.29 valid_accuracy = 87.78
epoch 8 : train_loss = 0.26 valid_loss = 0.35, train accuracy = 90.44 valid_accuracy = 87.92
epoch 9 : train_loss = 0.25 valid_loss = 0.35, train accuracy = 90.81 valid_accuracy = 88.02
epoch 10 : train_loss = 0.25 valid_loss = 0.35, train accuracy = 90.62 valid_accuracy = 87.55


0,1
tr_accuracy,▁▃▅▅▆▆▇▇██
tr_loss,█▆▄▃▃▃▂▂▁▁
val_accuracy,▁▄▆▇██▇██▇
val_loss,█▅▂▁▁▂▂▂▂▃

0,1
tr_accuracy,90.62037
tr_loss,0.24698
val_accuracy,87.55
val_loss,0.35442


[34m[1mwandb[0m: Agent Starting Run: rsi19dd5 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.67 valid_loss = 0.66, train accuracy = 77.24 valid_accuracy = 76.67
epoch 2 : train_loss = 0.57 valid_loss = 0.57, train accuracy = 80.93 valid_accuracy = 80.45
epoch 3 : train_loss = 0.52 valid_loss = 0.52, train accuracy = 82.28 valid_accuracy = 81.85
epoch 4 : train_loss = 0.49 valid_loss = 0.50, train accuracy = 83.05 valid_accuracy = 82.63
epoch 5 : train_loss = 0.48 valid_loss = 0.48, train accuracy = 83.49 valid_accuracy = 83.20


0,1
tr_accuracy,▁▅▇██
tr_loss,█▄▃▂▁
val_accuracy,▁▅▇▇█
val_loss,█▄▃▂▁

0,1
tr_accuracy,83.49074
tr_loss,0.47795
val_accuracy,83.2
val_loss,0.4829


[34m[1mwandb[0m: Agent Starting Run: s2ttcgip with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 1.55 valid_loss = 1.55, train accuracy = 43.37 valid_accuracy = 44.52
epoch 2 : train_loss = 1.73 valid_loss = 1.73, train accuracy = 37.16 valid_accuracy = 38.02
epoch 3 : train_loss = 1.80 valid_loss = 1.80, train accuracy = 23.91 valid_accuracy = 23.77
epoch 4 : train_loss = 1.80 valid_loss = 1.80, train accuracy = 23.97 valid_accuracy = 23.83
epoch 5 : train_loss = 1.79 valid_loss = 1.80, train accuracy = 23.93 valid_accuracy = 23.58
epoch 6 : train_loss = 1.79 valid_loss = 1.80, train accuracy = 23.93 valid_accuracy = 23.58
epoch 7 : train_loss = 1.79 valid_loss = 1.80, train accuracy = 23.96 valid_accuracy = 23.63
epoch 8 : train_loss = 1.79 valid_loss = 1.80, train accuracy = 23.98 valid_accuracy = 23.65
epoch 9 : train_loss = 1.79 valid_loss = 1.80, train accuracy = 24.00 valid_accuracy = 23.65
epoch 10 : train_loss = 1.79 valid_loss = 1.79, train accuracy = 24.03 valid_accuracy = 23.65


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,█▆▁▁▁▁▁▁▁▁
tr_loss,▁▆████████
val_accuracy,█▆▁▁▁▁▁▁▁▁
val_loss,▁▆████████

0,1
tr_accuracy,24.02963
tr_loss,1.7909
val_accuracy,23.65
val_loss,1.79487


[34m[1mwandb[0m: Agent Starting Run: of8mf08i with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.64 valid_loss = 0.64, train accuracy = 76.77 valid_accuracy = 76.77
epoch 2 : train_loss = 0.63 valid_loss = 0.62, train accuracy = 77.93 valid_accuracy = 77.93
epoch 3 : train_loss = 0.62 valid_loss = 0.62, train accuracy = 78.05 valid_accuracy = 77.93
epoch 4 : train_loss = 0.62 valid_loss = 0.62, train accuracy = 78.09 valid_accuracy = 78.10
epoch 5 : train_loss = 0.62 valid_loss = 0.62, train accuracy = 78.14 valid_accuracy = 78.07


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▇███
tr_loss,█▃▂▁▁
val_accuracy,▁▇▇██
val_loss,█▃▂▁▁

0,1
tr_accuracy,78.14259
tr_loss,0.62061
val_accuracy,78.06667
val_loss,0.61786


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kunh6gnu with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.56 valid_loss = 0.57, train accuracy = 80.25 valid_accuracy = 79.03
epoch 2 : train_loss = 0.48 valid_loss = 0.50, train accuracy = 82.61 valid_accuracy = 81.80
epoch 3 : train_loss = 0.44 valid_loss = 0.46, train accuracy = 83.98 valid_accuracy = 83.30
epoch 4 : train_loss = 0.41 valid_loss = 0.44, train accuracy = 85.02 valid_accuracy = 84.27
epoch 5 : train_loss = 0.39 valid_loss = 0.42, train accuracy = 85.77 valid_accuracy = 85.03
epoch 6 : train_loss = 0.38 valid_loss = 0.41, train accuracy = 86.35 valid_accuracy = 85.38
epoch 7 : train_loss = 0.37 valid_loss = 0.40, train accuracy = 86.83 valid_accuracy = 85.72
epoch 8 : train_loss = 0.36 valid_loss = 0.39, train accuracy = 87.13 valid_accuracy = 85.93
epoch 9 : train_loss = 0.35 valid_loss = 0.38, train accuracy = 87.46 valid_accuracy = 85.78
epoch 10 : train_loss = 0.34 valid_loss = 0.38, train accuracy = 87.69 valid_accuracy = 86.10


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▃▅▅▆▇▇▇██
tr_loss,█▆▄▃▃▂▂▂▁▁
val_accuracy,▁▄▅▆▇▇████
val_loss,█▆▄▃▃▂▂▁▁▁

0,1
tr_accuracy,87.69444
tr_loss,0.33946
val_accuracy,86.1
val_loss,0.3756


[34m[1mwandb[0m: Agent Starting Run: 2reao6mz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.90 valid_loss = 0.90, train accuracy = 68.09 valid_accuracy = 67.87
epoch 2 : train_loss = 0.77 valid_loss = 0.77, train accuracy = 72.71 valid_accuracy = 71.93
epoch 3 : train_loss = 0.70 valid_loss = 0.71, train accuracy = 75.05 valid_accuracy = 73.88
epoch 4 : train_loss = 0.66 valid_loss = 0.68, train accuracy = 76.36 valid_accuracy = 74.72
epoch 5 : train_loss = 0.63 valid_loss = 0.65, train accuracy = 77.29 valid_accuracy = 75.72
epoch 6 : train_loss = 0.61 valid_loss = 0.63, train accuracy = 78.10 valid_accuracy = 76.43
epoch 7 : train_loss = 0.59 valid_loss = 0.62, train accuracy = 78.66 valid_accuracy = 77.28
epoch 8 : train_loss = 0.57 valid_loss = 0.60, train accuracy = 79.26 valid_accuracy = 77.95
epoch 9 : train_loss = 0.56 valid_loss = 0.59, train accuracy = 79.71 valid_accuracy = 78.35
epoch 10 : train_loss = 0.55 valid_loss = 0.58, train accuracy = 80.18 valid_accuracy = 78.93


0,1
tr_accuracy,▁▄▅▆▆▇▇▇██
tr_loss,█▅▄▃▃▂▂▁▁▁
val_accuracy,▁▄▅▅▆▆▇▇██
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
tr_accuracy,80.17593
tr_loss,0.54588
val_accuracy,78.93333
val_loss,0.5785


[34m[1mwandb[0m: Agent Starting Run: k3w4a15j with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.52 valid_loss = 0.53, train accuracy = 82.08 valid_accuracy = 81.65
epoch 2 : train_loss = 0.51 valid_loss = 0.52, train accuracy = 82.41 valid_accuracy = 82.08
epoch 3 : train_loss = 0.51 valid_loss = 0.51, train accuracy = 82.45 valid_accuracy = 82.05
epoch 4 : train_loss = 0.50 valid_loss = 0.51, train accuracy = 82.85 valid_accuracy = 82.22
epoch 5 : train_loss = 0.49 valid_loss = 0.50, train accuracy = 83.05 valid_accuracy = 82.60


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.102522…

0,1
tr_accuracy,▁▃▄▇█
tr_loss,█▅▄▂▁
val_accuracy,▁▄▄▅█
val_loss,█▅▄▂▁

0,1
tr_accuracy,83.04815
tr_loss,0.49282
val_accuracy,82.6
val_loss,0.50261


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3g2jad7v with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.79 valid_loss = 0.78, train accuracy = 74.72 valid_accuracy = 75.43
epoch 2 : train_loss = 0.71 valid_loss = 0.71, train accuracy = 78.17 valid_accuracy = 77.87
epoch 3 : train_loss = 0.69 valid_loss = 0.68, train accuracy = 79.13 valid_accuracy = 78.50
epoch 4 : train_loss = 0.68 valid_loss = 0.67, train accuracy = 79.29 valid_accuracy = 78.75
epoch 5 : train_loss = 0.67 valid_loss = 0.67, train accuracy = 79.35 valid_accuracy = 78.82


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▆███
tr_loss,█▃▂▁▁
val_accuracy,▁▆▇██
val_loss,█▃▂▁▁

0,1
tr_accuracy,79.3537
tr_loss,0.67134
val_accuracy,78.81667
val_loss,0.66996


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3w3ji79e with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 1.06 valid_loss = 1.06, train accuracy = 60.58 valid_accuracy = 60.77
epoch 2 : train_loss = 0.74 valid_loss = 0.73, train accuracy = 74.87 valid_accuracy = 74.85
epoch 3 : train_loss = 0.60 valid_loss = 0.60, train accuracy = 79.45 valid_accuracy = 79.55
epoch 4 : train_loss = 0.52 valid_loss = 0.52, train accuracy = 82.21 valid_accuracy = 81.75
epoch 5 : train_loss = 0.47 valid_loss = 0.48, train accuracy = 83.70 valid_accuracy = 82.92
epoch 6 : train_loss = 0.44 valid_loss = 0.45, train accuracy = 84.52 valid_accuracy = 83.62
epoch 7 : train_loss = 0.42 valid_loss = 0.43, train accuracy = 85.12 valid_accuracy = 84.02
epoch 8 : train_loss = 0.41 valid_loss = 0.42, train accuracy = 85.64 valid_accuracy = 84.23
epoch 9 : train_loss = 0.39 valid_loss = 0.41, train accuracy = 86.08 valid_accuracy = 84.80
epoch 10 : train_loss = 0.38 valid_loss = 0.39, train accuracy = 86.46 valid_accuracy = 85.08


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.108411…

0,1
tr_accuracy,▁▅▆▇▇▇████
tr_loss,█▅▃▂▂▂▁▁▁▁
val_accuracy,▁▅▆▇▇█████
val_loss,█▅▃▂▂▂▁▁▁▁

0,1
tr_accuracy,86.46296
tr_loss,0.38056
val_accuracy,85.08333
val_loss,0.39498


[34m[1mwandb[0m: Agent Starting Run: 805rjslx with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.48 valid_loss = 0.49, train accuracy = 83.05 valid_accuracy = 82.17
epoch 2 : train_loss = 0.43 valid_loss = 0.46, train accuracy = 84.31 valid_accuracy = 83.13
epoch 3 : train_loss = 0.37 valid_loss = 0.41, train accuracy = 86.50 valid_accuracy = 85.47
epoch 4 : train_loss = 0.38 valid_loss = 0.42, train accuracy = 86.52 valid_accuracy = 85.27
epoch 5 : train_loss = 0.37 valid_loss = 0.43, train accuracy = 86.36 valid_accuracy = 85.17


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.102263…

0,1
tr_accuracy,▁▄███
tr_loss,█▄▁▂▁
val_accuracy,▁▃██▇
val_loss,█▅▁▁▂

0,1
tr_accuracy,86.36111
tr_loss,0.37438
val_accuracy,85.16667
val_loss,0.43125


[34m[1mwandb[0m: Agent Starting Run: 7c6qsvv2 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 2.30 valid_loss = 2.31, train accuracy = 9.96 valid_accuracy = 10.32
epoch 2 : train_loss = 2.30 valid_loss = 2.31, train accuracy = 9.96 valid_accuracy = 10.32
epoch 3 : train_loss = 2.30 valid_loss = 2.31, train accuracy = 9.96 valid_accuracy = 10.32
epoch 4 : train_loss = 2.30 valid_loss = 2.31, train accuracy = 9.96 valid_accuracy = 10.32
epoch 5 : train_loss = 2.30 valid_loss = 2.31, train accuracy = 9.96 valid_accuracy = 10.32
epoch 6 : train_loss = 2.30 valid_loss = 2.31, train accuracy = 9.96 valid_accuracy = 10.32
epoch 7 : train_loss = 2.30 valid_loss = 2.31, train accuracy = 9.96 valid_accuracy = 10.32
epoch 8 : train_loss = 2.30 valid_loss = 2.31, train accuracy = 9.96 valid_accuracy = 10.32
epoch 9 : train_loss = 2.30 valid_loss = 2.31, train accuracy = 9.96 valid_accuracy = 10.32
epoch 10 : train_loss = 2.30 valid_loss = 2.31, train accuracy = 9.96 valid_accuracy = 10.32


0,1
tr_accuracy,▁▁▁▁▁▁▁▁▁▁
tr_loss,██▇▆▅▄▃▂▂▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,███▇▆▅▄▃▂▁

0,1
tr_accuracy,9.96481
tr_loss,2.30417
val_accuracy,10.31667
val_loss,2.30509


[34m[1mwandb[0m: Agent Starting Run: 1wa2hlki with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 1.24 valid_loss = 1.24, train accuracy = 56.99 valid_accuracy = 57.42
epoch 2 : train_loss = 0.88 valid_loss = 0.87, train accuracy = 66.98 valid_accuracy = 67.55
epoch 3 : train_loss = 0.71 valid_loss = 0.71, train accuracy = 73.67 valid_accuracy = 73.82
epoch 4 : train_loss = 0.63 valid_loss = 0.63, train accuracy = 76.77 valid_accuracy = 77.10
epoch 5 : train_loss = 0.57 valid_loss = 0.57, train accuracy = 79.42 valid_accuracy = 79.20
epoch 6 : train_loss = 0.53 valid_loss = 0.54, train accuracy = 81.18 valid_accuracy = 80.97
epoch 7 : train_loss = 0.50 valid_loss = 0.51, train accuracy = 82.30 valid_accuracy = 81.83
epoch 8 : train_loss = 0.48 valid_loss = 0.49, train accuracy = 83.09 valid_accuracy = 82.38
epoch 9 : train_loss = 0.46 valid_loss = 0.47, train accuracy = 83.66 valid_accuracy = 82.83
epoch 10 : train_loss = 0.45 valid_loss = 0.46, train accuracy = 84.17 valid_accuracy = 83.48


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▄▅▆▇▇████
tr_loss,█▅▃▃▂▂▁▁▁▁
val_accuracy,▁▄▅▆▇▇████
val_loss,█▅▃▃▂▂▁▁▁▁

0,1
tr_accuracy,84.16667
tr_loss,0.44784
val_accuracy,83.48333
val_loss,0.45781


[34m[1mwandb[0m: Agent Starting Run: hiyt458m with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.91 valid_loss = 0.91, train accuracy = 66.78 valid_accuracy = 66.97
epoch 2 : train_loss = 0.76 valid_loss = 0.77, train accuracy = 72.10 valid_accuracy = 72.08
epoch 3 : train_loss = 0.70 valid_loss = 0.70, train accuracy = 74.90 valid_accuracy = 74.95
epoch 4 : train_loss = 0.65 valid_loss = 0.66, train accuracy = 76.47 valid_accuracy = 76.58
epoch 5 : train_loss = 0.62 valid_loss = 0.63, train accuracy = 77.60 valid_accuracy = 77.77


0,1
tr_accuracy,▁▄▆▇█
tr_loss,█▄▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
tr_accuracy,77.5963
tr_loss,0.62068
val_accuracy,77.76667
val_loss,0.63219


[34m[1mwandb[0m: Agent Starting Run: ab2rwfjx with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.95 valid_loss = 0.94, train accuracy = 62.16 valid_accuracy = 62.32
epoch 2 : train_loss = 0.88 valid_loss = 0.87, train accuracy = 65.81 valid_accuracy = 66.57
epoch 3 : train_loss = 0.88 valid_loss = 0.87, train accuracy = 66.21 valid_accuracy = 66.88
epoch 4 : train_loss = 0.87 valid_loss = 0.86, train accuracy = 66.13 valid_accuracy = 66.28
epoch 5 : train_loss = 0.88 valid_loss = 0.87, train accuracy = 65.70 valid_accuracy = 65.95
epoch 6 : train_loss = 0.88 valid_loss = 0.88, train accuracy = 65.56 valid_accuracy = 65.88
epoch 7 : train_loss = 0.88 valid_loss = 0.88, train accuracy = 65.49 valid_accuracy = 65.75
epoch 8 : train_loss = 0.89 valid_loss = 0.88, train accuracy = 65.41 valid_accuracy = 65.72
epoch 9 : train_loss = 0.89 valid_loss = 0.88, train accuracy = 65.38 valid_accuracy = 65.60
epoch 10 : train_loss = 0.89 valid_loss = 0.88, train accuracy = 65.36 valid_accuracy = 65.55


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▇██▇▇▇▇▇▇
tr_loss,█▂▁▁▂▂▂▂▂▂
val_accuracy,▁██▇▇▆▆▆▆▆
val_loss,█▂▁▁▂▂▂▂▃▃

0,1
tr_accuracy,65.35926
tr_loss,0.88744
val_accuracy,65.55
val_loss,0.88299


[34m[1mwandb[0m: Agent Starting Run: wxmygkin with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.48 valid_loss = 0.49, train accuracy = 83.64 valid_accuracy = 83.15
epoch 2 : train_loss = 0.43 valid_loss = 0.44, train accuracy = 85.20 valid_accuracy = 84.73
epoch 3 : train_loss = 0.41 valid_loss = 0.42, train accuracy = 85.93 valid_accuracy = 85.40
epoch 4 : train_loss = 0.40 valid_loss = 0.41, train accuracy = 86.30 valid_accuracy = 85.65
epoch 5 : train_loss = 0.39 valid_loss = 0.41, train accuracy = 86.57 valid_accuracy = 85.90
epoch 6 : train_loss = 0.39 valid_loss = 0.40, train accuracy = 86.82 valid_accuracy = 86.15
epoch 7 : train_loss = 0.39 valid_loss = 0.40, train accuracy = 86.95 valid_accuracy = 86.20
epoch 8 : train_loss = 0.38 valid_loss = 0.40, train accuracy = 87.01 valid_accuracy = 86.20
epoch 9 : train_loss = 0.38 valid_loss = 0.40, train accuracy = 87.12 valid_accuracy = 86.28
epoch 10 : train_loss = 0.38 valid_loss = 0.39, train accuracy = 87.23 valid_accuracy = 86.30


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▄▅▆▇▇▇███
tr_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▅▆▇▇█████
val_loss,█▅▃▂▂▂▁▁▁▁

0,1
tr_accuracy,87.22963
tr_loss,0.37819
val_accuracy,86.3
val_loss,0.3942


[34m[1mwandb[0m: Agent Starting Run: 4oc2ljsl with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.42 valid_loss = 0.43, train accuracy = 84.96 valid_accuracy = 84.45
epoch 2 : train_loss = 0.37 valid_loss = 0.39, train accuracy = 86.70 valid_accuracy = 85.72
epoch 3 : train_loss = 0.34 valid_loss = 0.36, train accuracy = 87.75 valid_accuracy = 86.62
epoch 4 : train_loss = 0.32 valid_loss = 0.35, train accuracy = 88.39 valid_accuracy = 87.15
epoch 5 : train_loss = 0.30 valid_loss = 0.34, train accuracy = 88.90 valid_accuracy = 87.45


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.102459…

0,1
tr_accuracy,▁▄▆▇█
tr_loss,█▅▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▅▃▂▁

0,1
tr_accuracy,88.9037
tr_loss,0.29976
val_accuracy,87.45
val_loss,0.34481


[34m[1mwandb[0m: Agent Starting Run: z72nmdz2 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 1.11 valid_loss = 1.09, train accuracy = 62.35 valid_accuracy = 64.08
epoch 2 : train_loss = 0.84 valid_loss = 0.83, train accuracy = 70.46 valid_accuracy = 71.35
epoch 3 : train_loss = 0.74 valid_loss = 0.73, train accuracy = 73.47 valid_accuracy = 74.18
epoch 4 : train_loss = 0.69 valid_loss = 0.68, train accuracy = 75.20 valid_accuracy = 75.68
epoch 5 : train_loss = 0.65 valid_loss = 0.64, train accuracy = 76.46 valid_accuracy = 76.73
epoch 6 : train_loss = 0.62 valid_loss = 0.61, train accuracy = 77.51 valid_accuracy = 77.38
epoch 7 : train_loss = 0.60 valid_loss = 0.59, train accuracy = 78.36 valid_accuracy = 78.17
epoch 8 : train_loss = 0.58 valid_loss = 0.58, train accuracy = 78.95 valid_accuracy = 78.68
epoch 9 : train_loss = 0.56 valid_loss = 0.57, train accuracy = 79.59 valid_accuracy = 79.17
epoch 10 : train_loss = 0.55 valid_loss = 0.55, train accuracy = 80.11 valid_accuracy = 79.62


0,1
tr_accuracy,▁▄▅▆▇▇▇███
tr_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▄▆▆▇▇▇███
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
tr_accuracy,80.10741
tr_loss,0.55154
val_accuracy,79.61667
val_loss,0.55459


[34m[1mwandb[0m: Agent Starting Run: iidg0hi4 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 2.30 valid_loss = 2.31, train accuracy = 10.06 valid_accuracy = 9.42
epoch 2 : train_loss = 2.30 valid_loss = 2.30, train accuracy = 13.99 valid_accuracy = 13.00
epoch 3 : train_loss = 2.28 valid_loss = 2.28, train accuracy = 19.34 valid_accuracy = 19.05
epoch 4 : train_loss = 1.67 valid_loss = 1.67, train accuracy = 36.38 valid_accuracy = 35.72
epoch 5 : train_loss = 1.08 valid_loss = 1.08, train accuracy = 55.06 valid_accuracy = 55.82
epoch 6 : train_loss = 0.92 valid_loss = 0.91, train accuracy = 65.69 valid_accuracy = 66.32
epoch 7 : train_loss = 0.81 valid_loss = 0.81, train accuracy = 70.06 valid_accuracy = 70.05
epoch 8 : train_loss = 0.74 valid_loss = 0.74, train accuracy = 73.18 valid_accuracy = 73.02
epoch 9 : train_loss = 0.69 valid_loss = 0.69, train accuracy = 75.56 valid_accuracy = 75.55
epoch 10 : train_loss = 0.64 valid_loss = 0.65, train accuracy = 77.31 valid_accuracy = 77.17


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.105634…

0,1
tr_accuracy,▁▁▂▄▆▇▇███
tr_loss,███▅▃▂▂▁▁▁
val_accuracy,▁▁▂▄▆▇▇███
val_loss,███▅▃▂▂▁▁▁

0,1
tr_accuracy,77.30556
tr_loss,0.64376
val_accuracy,77.16667
val_loss,0.65377


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: t1a4llab with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 1.22 valid_loss = 1.21, train accuracy = 64.78 valid_accuracy = 65.48
epoch 2 : train_loss = 0.98 valid_loss = 0.97, train accuracy = 71.32 valid_accuracy = 71.63
epoch 3 : train_loss = 0.96 valid_loss = 0.95, train accuracy = 72.74 valid_accuracy = 73.52
epoch 4 : train_loss = 0.96 valid_loss = 0.96, train accuracy = 72.88 valid_accuracy = 73.92
epoch 5 : train_loss = 0.98 valid_loss = 0.97, train accuracy = 72.13 valid_accuracy = 72.90


0,1
tr_accuracy,▁▇██▇
tr_loss,█▂▁▁▁
val_accuracy,▁▆██▇
val_loss,█▂▁▁▁

0,1
tr_accuracy,72.12593
tr_loss,0.97685
val_accuracy,72.9
val_loss,0.96889


[34m[1mwandb[0m: Agent Starting Run: u61h3d0l with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 2.28 valid_loss = 2.29, train accuracy = 10.00 valid_accuracy = 10.03
epoch 2 : train_loss = 2.13 valid_loss = 2.13, train accuracy = 36.97 valid_accuracy = 36.12
epoch 3 : train_loss = 1.79 valid_loss = 1.79, train accuracy = 28.88 valid_accuracy = 29.12
epoch 4 : train_loss = 1.57 valid_loss = 1.57, train accuracy = 29.56 valid_accuracy = 29.73
epoch 5 : train_loss = 1.45 valid_loss = 1.45, train accuracy = 39.14 valid_accuracy = 39.22


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▇▆▆█
tr_loss,█▇▄▂▁
val_accuracy,▁▇▆▆█
val_loss,█▇▄▂▁

0,1
tr_accuracy,39.14444
tr_loss,1.44702
val_accuracy,39.21667
val_loss,1.44756


[34m[1mwandb[0m: Agent Starting Run: 830lcuer with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.68 valid_loss = 0.68, train accuracy = 80.02 valid_accuracy = 79.93
epoch 2 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 80.39 valid_accuracy = 79.78
epoch 3 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 80.36 valid_accuracy = 79.82
epoch 4 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 80.32 valid_accuracy = 79.68
epoch 5 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 80.24 valid_accuracy = 79.72
epoch 6 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 80.20 valid_accuracy = 79.75
epoch 7 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 80.18 valid_accuracy = 79.70
epoch 8 : train_loss = 0.67 valid_loss = 0.67, train accuracy = 79.78 valid_accuracy = 79.50
epoch 9 : train_loss = 0.67 valid_loss = 0.67, train accuracy = 79.38 valid_accuracy = 78.98
epoch 10 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 79.33 valid_accuracy = 78.88


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▆███▇▇▇▄▁▁
tr_loss,█▃▂▁▁▁▂▄▄▃
val_accuracy,█▇▇▆▇▇▆▅▂▁
val_loss,█▃▂▁▁▁▂▄▄▄

0,1
tr_accuracy,79.32593
tr_loss,0.66373
val_accuracy,78.88333
val_loss,0.66367


[34m[1mwandb[0m: Agent Starting Run: vbw6v8ua with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.51 valid_loss = 0.53, train accuracy = 81.47 valid_accuracy = 80.42
epoch 2 : train_loss = 0.46 valid_loss = 0.48, train accuracy = 84.11 valid_accuracy = 82.88
epoch 3 : train_loss = 0.42 valid_loss = 0.46, train accuracy = 85.12 valid_accuracy = 83.67
epoch 4 : train_loss = 0.42 valid_loss = 0.47, train accuracy = 85.39 valid_accuracy = 83.98
epoch 5 : train_loss = 0.42 valid_loss = 0.47, train accuracy = 85.46 valid_accuracy = 84.08
epoch 6 : train_loss = 0.43 valid_loss = 0.47, train accuracy = 85.15 valid_accuracy = 83.58
epoch 7 : train_loss = 0.41 valid_loss = 0.47, train accuracy = 85.82 valid_accuracy = 84.12
epoch 8 : train_loss = 0.42 valid_loss = 0.47, train accuracy = 85.69 valid_accuracy = 84.47
epoch 9 : train_loss = 0.41 valid_loss = 0.48, train accuracy = 85.75 valid_accuracy = 84.18
epoch 10 : train_loss = 0.41 valid_loss = 0.46, train accuracy = 85.46 valid_accuracy = 83.95


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.097836…

0,1
tr_accuracy,▁▅▇▇▇▇███▇
tr_loss,█▄▂▂▁▂▁▂▁▁
val_accuracy,▁▅▇▇▇▆▇██▇
val_loss,█▃▁▂▂▂▂▂▂▁

0,1
tr_accuracy,85.46296
tr_loss,0.41272
val_accuracy,83.95
val_loss,0.46124


[34m[1mwandb[0m: Agent Starting Run: bkgsbuyo with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.77 valid_loss = 0.78, train accuracy = 72.41 valid_accuracy = 72.05
epoch 2 : train_loss = 0.67 valid_loss = 0.69, train accuracy = 75.80 valid_accuracy = 75.48
epoch 3 : train_loss = 0.62 valid_loss = 0.64, train accuracy = 77.62 valid_accuracy = 77.08
epoch 4 : train_loss = 0.58 valid_loss = 0.61, train accuracy = 78.69 valid_accuracy = 77.85
epoch 5 : train_loss = 0.56 valid_loss = 0.59, train accuracy = 79.50 valid_accuracy = 78.50


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.102490…

0,1
tr_accuracy,▁▄▆▇█
tr_loss,█▅▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
tr_accuracy,79.5037
tr_loss,0.55838
val_accuracy,78.5
val_loss,0.59321


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vnqq55qj with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.62 valid_loss = 0.61, train accuracy = 79.31 valid_accuracy = 79.48
epoch 2 : train_loss = 0.53 valid_loss = 0.53, train accuracy = 81.77 valid_accuracy = 81.20
epoch 3 : train_loss = 0.49 valid_loss = 0.50, train accuracy = 82.77 valid_accuracy = 81.93
epoch 4 : train_loss = 0.46 valid_loss = 0.48, train accuracy = 83.47 valid_accuracy = 82.70
epoch 5 : train_loss = 0.45 valid_loss = 0.46, train accuracy = 84.04 valid_accuracy = 83.05


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▅▆▇█
tr_loss,█▄▃▂▁
val_accuracy,▁▄▆▇█
val_loss,█▄▃▂▁

0,1
tr_accuracy,84.04074
tr_loss,0.44575
val_accuracy,83.05
val_loss,0.46417


[34m[1mwandb[0m: Agent Starting Run: 6galy8yd with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016670714550006474, max=1.0…

epoch 1 : train_loss = 2.30 valid_loss = 2.31, train accuracy = 9.96 valid_accuracy = 10.32
epoch 2 : train_loss = 2.30 valid_loss = 2.31, train accuracy = 9.96 valid_accuracy = 10.32
epoch 3 : train_loss = 2.30 valid_loss = 2.31, train accuracy = 9.96 valid_accuracy = 10.32
epoch 4 : train_loss = 2.30 valid_loss = 2.31, train accuracy = 9.96 valid_accuracy = 10.32
epoch 5 : train_loss = 2.30 valid_loss = 2.31, train accuracy = 9.96 valid_accuracy = 10.32


0,1
tr_accuracy,▁▁▁▁▁
tr_loss,▁▂▆██
val_accuracy,▁▁▁▁▁
val_loss,▁▄██▇

0,1
tr_accuracy,9.96481
tr_loss,2.30416
val_accuracy,10.31667
val_loss,2.3051


[34m[1mwandb[0m: Agent Starting Run: mvacjlgf with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.46 valid_loss = 0.47, train accuracy = 83.99 valid_accuracy = 83.25
epoch 2 : train_loss = 0.44 valid_loss = 0.46, train accuracy = 84.56 valid_accuracy = 83.62
epoch 3 : train_loss = 0.44 valid_loss = 0.45, train accuracy = 84.67 valid_accuracy = 83.87
epoch 4 : train_loss = 0.44 valid_loss = 0.45, train accuracy = 84.71 valid_accuracy = 83.95
epoch 5 : train_loss = 0.43 valid_loss = 0.44, train accuracy = 84.89 valid_accuracy = 84.25


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▅▆▇█
tr_loss,█▄▃▂▁
val_accuracy,▁▄▅▆█
val_loss,█▄▃▂▁

0,1
tr_accuracy,84.89259
tr_loss,0.42828
val_accuracy,84.25
val_loss,0.44462


[34m[1mwandb[0m: Agent Starting Run: kz53kwpu with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.55 valid_loss = 0.56, train accuracy = 81.39 valid_accuracy = 80.78
epoch 2 : train_loss = 0.54 valid_loss = 0.54, train accuracy = 82.01 valid_accuracy = 81.17
epoch 3 : train_loss = 0.54 valid_loss = 0.55, train accuracy = 82.00 valid_accuracy = 81.10
epoch 4 : train_loss = 0.54 valid_loss = 0.55, train accuracy = 81.83 valid_accuracy = 81.08
epoch 5 : train_loss = 0.54 valid_loss = 0.55, train accuracy = 81.80 valid_accuracy = 81.10
epoch 6 : train_loss = 0.54 valid_loss = 0.55, train accuracy = 81.91 valid_accuracy = 81.17
epoch 7 : train_loss = 0.54 valid_loss = 0.55, train accuracy = 82.06 valid_accuracy = 81.18
epoch 8 : train_loss = 0.54 valid_loss = 0.55, train accuracy = 82.19 valid_accuracy = 81.50
epoch 9 : train_loss = 0.54 valid_loss = 0.54, train accuracy = 82.30 valid_accuracy = 81.52
epoch 10 : train_loss = 0.54 valid_loss = 0.54, train accuracy = 82.33 valid_accuracy = 81.63


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▆▆▄▄▅▆▇██
tr_loss,█▁▃▃▄▄▃▂▁▁
val_accuracy,▁▄▄▃▄▄▄▇▇█
val_loss,█▁▄▄▅▅▅▃▂▂

0,1
tr_accuracy,82.33148
tr_loss,0.53558
val_accuracy,81.63333
val_loss,0.54217


[34m[1mwandb[0m: Agent Starting Run: cou8ukgr with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.59 valid_loss = 0.59, train accuracy = 79.39 valid_accuracy = 78.98
epoch 2 : train_loss = 0.51 valid_loss = 0.52, train accuracy = 81.98 valid_accuracy = 81.35
epoch 3 : train_loss = 0.47 valid_loss = 0.48, train accuracy = 83.55 valid_accuracy = 82.60
epoch 4 : train_loss = 0.44 valid_loss = 0.45, train accuracy = 84.35 valid_accuracy = 83.50
epoch 5 : train_loss = 0.42 valid_loss = 0.44, train accuracy = 85.00 valid_accuracy = 84.08
epoch 6 : train_loss = 0.40 valid_loss = 0.42, train accuracy = 85.61 valid_accuracy = 84.58
epoch 7 : train_loss = 0.39 valid_loss = 0.41, train accuracy = 85.98 valid_accuracy = 85.05
epoch 8 : train_loss = 0.38 valid_loss = 0.41, train accuracy = 86.34 valid_accuracy = 85.33
epoch 9 : train_loss = 0.37 valid_loss = 0.40, train accuracy = 86.64 valid_accuracy = 85.35
epoch 10 : train_loss = 0.37 valid_loss = 0.39, train accuracy = 86.94 valid_accuracy = 85.63


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁▃▅▆▆▇▇▇██
tr_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▃▅▆▆▇▇███
val_loss,█▅▄▃▃▂▂▁▁▁

0,1
tr_accuracy,86.94259
tr_loss,0.36553
val_accuracy,85.63333
val_loss,0.39326


[34m[1mwandb[0m: Agent Starting Run: kduby741 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.76 valid_loss = 0.76, train accuracy = 73.90 valid_accuracy = 74.05
epoch 2 : train_loss = 0.75 valid_loss = 0.75, train accuracy = 74.49 valid_accuracy = 74.67
epoch 3 : train_loss = 0.75 valid_loss = 0.75, train accuracy = 74.69 valid_accuracy = 74.93
epoch 4 : train_loss = 0.75 valid_loss = 0.75, train accuracy = 74.71 valid_accuracy = 74.97
epoch 5 : train_loss = 0.75 valid_loss = 0.75, train accuracy = 74.74 valid_accuracy = 75.15
epoch 6 : train_loss = 0.75 valid_loss = 0.75, train accuracy = 74.79 valid_accuracy = 75.18
epoch 7 : train_loss = 0.75 valid_loss = 0.75, train accuracy = 74.87 valid_accuracy = 75.13
epoch 8 : train_loss = 0.75 valid_loss = 0.75, train accuracy = 74.91 valid_accuracy = 75.20
epoch 9 : train_loss = 0.75 valid_loss = 0.74, train accuracy = 74.94 valid_accuracy = 75.33
epoch 10 : train_loss = 0.75 valid_loss = 0.74, train accuracy = 74.89 valid_accuracy = 75.27


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.097760…

0,1
tr_accuracy,▁▅▆▆▇▇████
tr_loss,█▃▂▂▂▂▁▁▁▁
val_accuracy,▁▄▆▆▇▇▇▇██
val_loss,█▃▂▂▂▂▁▁▁▁

0,1
tr_accuracy,74.89259
tr_loss,0.74696
val_accuracy,75.26667
val_loss,0.74473


[34m[1mwandb[0m: Agent Starting Run: vfyta4oo with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 9.40 valid_loss = 9.60, train accuracy = 25.09 valid_accuracy = 24.10
epoch 2 : train_loss = 6.93 valid_loss = 7.34, train accuracy = 35.85 valid_accuracy = 33.45
epoch 3 : train_loss = 5.65 valid_loss = 5.89, train accuracy = 41.54 valid_accuracy = 40.75
epoch 4 : train_loss = 4.79 valid_loss = 5.16, train accuracy = 46.55 valid_accuracy = 44.37
epoch 5 : train_loss = 4.20 valid_loss = 4.50, train accuracy = 49.34 valid_accuracy = 47.85
epoch 6 : train_loss = 3.74 valid_loss = 4.02, train accuracy = 51.54 valid_accuracy = 50.43
epoch 7 : train_loss = 3.39 valid_loss = 3.69, train accuracy = 53.29 valid_accuracy = 51.43
epoch 8 : train_loss = 3.12 valid_loss = 3.42, train accuracy = 54.56 valid_accuracy = 52.10
epoch 9 : train_loss = 2.83 valid_loss = 3.17, train accuracy = 55.46 valid_accuracy = 52.95
epoch 10 : train_loss = 2.62 valid_loss = 2.87, train accuracy = 56.70 valid_accuracy = 54.78


0,1
tr_accuracy,▁▃▅▆▆▇▇███
tr_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▃▅▆▆▇▇▇██
val_loss,█▆▄▃▃▂▂▂▁▁

0,1
tr_accuracy,56.7
tr_loss,2.61809
val_accuracy,54.78333
val_loss,2.86797


[34m[1mwandb[0m: Agent Starting Run: xhsyalhk with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.69 valid_loss = 0.70, train accuracy = 76.41 valid_accuracy = 76.47
epoch 2 : train_loss = 0.67 valid_loss = 0.67, train accuracy = 79.82 valid_accuracy = 79.67
epoch 3 : train_loss = 0.65 valid_loss = 0.65, train accuracy = 79.71 valid_accuracy = 79.25
epoch 4 : train_loss = 0.64 valid_loss = 0.64, train accuracy = 79.65 valid_accuracy = 79.17
epoch 5 : train_loss = 0.64 valid_loss = 0.64, train accuracy = 79.57 valid_accuracy = 79.02


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁███▇
tr_loss,█▅▃▁▁
val_accuracy,▁█▇▇▇
val_loss,█▄▃▁▁

0,1
tr_accuracy,79.57407
tr_loss,0.64122
val_accuracy,79.01667
val_loss,0.64047


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: a2eyvpo1 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 1.00 valid_loss = 1.03, train accuracy = 63.44 valid_accuracy = 62.57
epoch 2 : train_loss = 0.67 valid_loss = 0.68, train accuracy = 77.60 valid_accuracy = 76.93
epoch 3 : train_loss = 0.72 valid_loss = 0.71, train accuracy = 73.85 valid_accuracy = 74.10
epoch 4 : train_loss = 0.73 valid_loss = 0.72, train accuracy = 72.28 valid_accuracy = 72.52
epoch 5 : train_loss = 0.71 valid_loss = 0.70, train accuracy = 73.33 valid_accuracy = 73.92
epoch 6 : train_loss = 0.69 valid_loss = 0.69, train accuracy = 75.78 valid_accuracy = 76.52
epoch 7 : train_loss = 0.69 valid_loss = 0.68, train accuracy = 76.00 valid_accuracy = 76.88
epoch 8 : train_loss = 0.70 valid_loss = 0.70, train accuracy = 73.99 valid_accuracy = 74.62
epoch 9 : train_loss = 0.70 valid_loss = 0.69, train accuracy = 74.29 valid_accuracy = 74.93
epoch 10 : train_loss = 0.69 valid_loss = 0.68, train accuracy = 76.00 valid_accuracy = 76.87


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
tr_accuracy,▁█▆▅▆▇▇▆▆▇
tr_loss,█▁▂▂▂▁▁▂▂▁
val_accuracy,▁█▇▆▇██▇▇█
val_loss,█▁▂▂▂▁▁▁▁▁

0,1
tr_accuracy,75.9963
tr_loss,0.68523
val_accuracy,76.86667
val_loss,0.67554


[34m[1mwandb[0m: Agent Starting Run: zs7y8gyx with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.67 valid_loss = 0.67, train accuracy = 75.94 valid_accuracy = 75.87
epoch 2 : train_loss = 0.67 valid_loss = 0.67, train accuracy = 75.61 valid_accuracy = 75.28
epoch 3 : train_loss = 0.66 valid_loss = 0.66, train accuracy = 76.88 valid_accuracy = 76.48
epoch 4 : train_loss = 0.65 valid_loss = 0.65, train accuracy = 77.28 valid_accuracy = 76.93
epoch 5 : train_loss = 0.65 valid_loss = 0.65, train accuracy = 77.61 valid_accuracy = 77.12
epoch 6 : train_loss = 0.64 valid_loss = 0.64, train accuracy = 77.82 valid_accuracy = 77.33
epoch 7 : train_loss = 0.64 valid_loss = 0.64, train accuracy = 77.78 valid_accuracy = 77.12
epoch 8 : train_loss = 0.64 valid_loss = 0.64, train accuracy = 78.17 valid_accuracy = 77.65
epoch 9 : train_loss = 0.64 valid_loss = 0.64, train accuracy = 78.33 valid_accuracy = 77.78
epoch 10 : train_loss = 0.63 valid_loss = 0.63, train accuracy = 78.33 valid_accuracy = 77.70


VBox(children=(Label(value='0.001 MB of 0.010 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.097836…

0,1
tr_accuracy,▂▁▄▅▆▇▇███
tr_loss,██▅▄▃▃▂▂▁▁
val_accuracy,▃▁▄▆▆▇▆███
val_loss,██▆▄▃▃▂▂▁▁

0,1
tr_accuracy,78.33148
tr_loss,0.63443
val_accuracy,77.7
val_loss,0.63409


[34m[1mwandb[0m: Agent Starting Run: x40mi6iu with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.61 valid_loss = 0.62, train accuracy = 78.61 valid_accuracy = 78.02
epoch 2 : train_loss = 0.52 valid_loss = 0.52, train accuracy = 81.69 valid_accuracy = 81.27
epoch 3 : train_loss = 0.47 valid_loss = 0.48, train accuracy = 83.07 valid_accuracy = 82.48
epoch 4 : train_loss = 0.44 valid_loss = 0.46, train accuracy = 84.00 valid_accuracy = 83.42
epoch 5 : train_loss = 0.42 valid_loss = 0.44, train accuracy = 84.71 valid_accuracy = 84.17


0,1
tr_accuracy,▁▅▆▇█
tr_loss,█▄▃▂▁
val_accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
tr_accuracy,84.71296
tr_loss,0.42456
val_accuracy,84.16667
val_loss,0.44068


[34m[1mwandb[0m: Agent Starting Run: b933v0fa with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.95 valid_loss = 0.97, train accuracy = 66.13 valid_accuracy = 66.00
epoch 2 : train_loss = 0.76 valid_loss = 0.78, train accuracy = 72.76 valid_accuracy = 72.50
epoch 3 : train_loss = 0.68 valid_loss = 0.70, train accuracy = 75.75 valid_accuracy = 75.20
epoch 4 : train_loss = 0.63 valid_loss = 0.66, train accuracy = 77.54 valid_accuracy = 76.48
epoch 5 : train_loss = 0.60 valid_loss = 0.63, train accuracy = 78.70 valid_accuracy = 77.48


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.111197…

0,1
tr_accuracy,▁▅▆▇█
tr_loss,█▄▃▂▁
val_accuracy,▁▅▇▇█
val_loss,█▄▂▂▁

0,1
tr_accuracy,78.6963
tr_loss,0.59587
val_accuracy,77.48333
val_loss,0.63241


[34m[1mwandb[0m: Agent Starting Run: huvpe1jp with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: Xavier


epoch 1 : train_loss = 0.49 valid_loss = 0.51, train accuracy = 82.61 valid_accuracy = 81.68
epoch 2 : train_loss = 0.42 valid_loss = 0.45, train accuracy = 84.57 valid_accuracy = 83.72
epoch 3 : train_loss = 0.40 valid_loss = 0.42, train accuracy = 85.66 valid_accuracy = 84.83
epoch 4 : train_loss = 0.37 valid_loss = 0.40, train accuracy = 86.31 valid_accuracy = 85.15
epoch 5 : train_loss = 0.36 valid_loss = 0.39, train accuracy = 86.91 valid_accuracy = 85.55
epoch 6 : train_loss = 0.34 valid_loss = 0.38, train accuracy = 87.32 valid_accuracy = 85.75
epoch 7 : train_loss = 0.33 valid_loss = 0.37, train accuracy = 87.77 valid_accuracy = 86.25
epoch 8 : train_loss = 0.33 valid_loss = 0.38, train accuracy = 87.88 valid_accuracy = 86.15
epoch 9 : train_loss = 0.31 valid_loss = 0.37, train accuracy = 88.28 valid_accuracy = 86.45
epoch 10 : train_loss = 0.30 valid_loss = 0.36, train accuracy = 88.71 valid_accuracy = 86.73


0,1
tr_accuracy,▁▃▄▅▆▆▇▇██
tr_loss,█▆▅▄▃▂▂▂▁▁
val_accuracy,▁▄▅▆▆▇▇▇██
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
tr_accuracy,88.70556
tr_loss,0.30251
val_accuracy,86.73333
val_loss,0.35892


[34m[1mwandb[0m: Agent Starting Run: g2aaezcs with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


  return (2. / (1. + np.exp(-2.*x))) - 1.


epoch 1 : train_loss = 2.56 valid_loss = 2.55, train accuracy = 10.00 valid_accuracy = 10.03
epoch 2 : train_loss = 2.56 valid_loss = 2.55, train accuracy = 10.00 valid_accuracy = 10.03
epoch 3 : train_loss = 2.56 valid_loss = 2.55, train accuracy = 10.00 valid_accuracy = 10.03
epoch 4 : train_loss = 2.56 valid_loss = 2.55, train accuracy = 10.00 valid_accuracy = 10.03
epoch 5 : train_loss = 2.56 valid_loss = 2.55, train accuracy = 10.00 valid_accuracy = 10.03
epoch 6 : train_loss = 2.56 valid_loss = 2.55, train accuracy = 10.00 valid_accuracy = 10.03
epoch 7 : train_loss = 2.56 valid_loss = 2.55, train accuracy = 10.00 valid_accuracy = 10.03
epoch 8 : train_loss = 2.56 valid_loss = 2.55, train accuracy = 10.00 valid_accuracy = 10.03
epoch 9 : train_loss = 2.56 valid_loss = 2.55, train accuracy = 10.00 valid_accuracy = 10.03
epoch 10 : train_loss = 2.56 valid_loss = 2.55, train accuracy = 10.00 valid_accuracy = 10.03


0,1
tr_accuracy,▁▁▁▁▁▁▁▁▁▁
tr_loss,▇██▆▁▄▄▃▃▂
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,███▆▁▄▄▃▃▂

0,1
tr_accuracy,9.9963
tr_loss,2.55518
val_accuracy,10.03333
val_loss,2.54646


[34m[1mwandb[0m: Agent Starting Run: 75xb3o0j with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


epoch 1 : train_loss = 0.41 valid_loss = 0.42, train accuracy = 85.04 valid_accuracy = 84.72
epoch 2 : train_loss = 0.36 valid_loss = 0.38, train accuracy = 86.95 valid_accuracy = 86.23
epoch 3 : train_loss = 0.33 valid_loss = 0.36, train accuracy = 87.84 valid_accuracy = 86.87
epoch 4 : train_loss = 0.31 valid_loss = 0.35, train accuracy = 88.58 valid_accuracy = 87.20
epoch 5 : train_loss = 0.30 valid_loss = 0.34, train accuracy = 89.15 valid_accuracy = 87.53
epoch 6 : train_loss = 0.28 valid_loss = 0.34, train accuracy = 89.54 valid_accuracy = 87.57
epoch 7 : train_loss = 0.27 valid_loss = 0.34, train accuracy = 89.93 valid_accuracy = 87.67
epoch 8 : train_loss = 0.27 valid_loss = 0.34, train accuracy = 90.09 valid_accuracy = 87.90
epoch 9 : train_loss = 0.26 valid_loss = 0.34, train accuracy = 90.25 valid_accuracy = 87.83
epoch 10 : train_loss = 0.26 valid_loss = 0.34, train accuracy = 90.39 valid_accuracy = 87.90


0,1
tr_accuracy,▁▃▅▆▆▇▇███
tr_loss,█▆▄▃▃▂▂▁▁▁
val_accuracy,▁▄▆▆▇▇▇███
val_loss,█▅▃▂▁▁▁▁▁▁

0,1
tr_accuracy,90.39259
tr_loss,0.25569
val_accuracy,87.9
val_loss,0.33938
