In [17]:
import keras
import numpy as np
from keras.datasets import fashion_mnist
from tqdm.auto import tqdm
import tensorflow as tf
import wandb
import pprint

# Question 1

In [3]:
def load_fashion_mnist(return_images=False):

  (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

  train_shuffler = np.random.shuffle(np.arange(50000))
  x_train, y_train = x_train[train_shuffler][0], y_train[train_shuffler][0]

  test_shuffler = np.random.shuffle(np.arange(10000))
  x_test, y_test = x_test[test_shuffler][0], y_test[test_shuffler][0]

  x_train = np.array(x_train/255).astype('float32')
  x_test = np.array(x_test/255).astype('float32')

  x_train, x_val = x_train[:50000], x_train[50000:]
  y_train, y_val = y_train[:50000], y_train[50000:]


  if (return_images==False):
    return {
        'train': {
            'X': x_train.reshape([50000, 784]),
            'Y': y_train.reshape([50000])
        },
        'val': {
            'X': x_val.reshape([10000, 784]),
            'Y': y_val.reshape([10000])
        },
        'test': {
            'X': x_test.reshape([10000, 784]),
            'Y': y_test.reshape([10000])
        }
  }

  else :
    return {
      'train': {
          	'X': x_train,
          	'Y': y_train
      },
      'val': {
            'X': x_val,
            'Y': y_val
      },
      'test': {
            'X': x_test,
            'Y': y_test
      }
    }


data = load_fashion_mnist()

# Question 2

In [4]:
class neural_network:

  # constructor function - initializes weights
  def __init__(self, dict_layers, initializer):

    self.weights_list = []
    self.biases_list = []
    self.dict_layers= dict_layers

    self.weights_list, self.biases_list = wandb_initializer(dict_layers, self.weights_list, self.biases_list, initializer)

  # function to compute forward propogation
  def forward_prop(self, W, b, X, Y, activation_func):

    A = []
    H = []
    
    H_pre = X
    
    L = self.dict_layers['num_hidden_layers']

    for i in range(L) :
      A.append(W[i] @ H_pre + b[i])
      H_pre = getattr(activation, activation_func)(A[i])
      H.append(H_pre)
    
    A.append(W[L] @ H_pre + b[L])
    
    Y_hat = activation.softmax(A[L])
    
    return {
        'A' : A,
        'H' : H,
        'Y_hat' : Y_hat
    }

  def self_forward_prop(self, X, Y, activation_func) :

    temp = self.forward_prop(self.weights_list,self.biases_list, X, Y, activation_func)
    return temp

  def back_prop(self, W, b, A, H, Y_hat, X, Y,activation_func):

    batch_size = len(Y)
    
    del_w = []
    del_b = []
    L = self.dict_layers['num_hidden_layers']
    
    E = np.zeros(Y_hat.shape)
    E[Y,np.arange(batch_size)] = 1
    # for j in range(len(Y)):
    #     E[int(Y[j])][j] = 1
    # what shape do you need y_hat and e to be in? Column or row vector?
    grad_A = -(E - Y_hat)
    #print('grad_a', grad_a.shape)

    for i in range(L,-1,-1) :

      temp1 = grad_A.reshape(-1,batch_size)
      
      if i==0 :
        temp2 = X.T
      else :
        temp2 = H[i-1].reshape((batch_size ,-1))

      del_w.append(temp1 @ temp2)
      del_b.append(grad_A)

      if(i!=0) :
        grad_H = W[i].T @ grad_A      
        grad_A = grad_H * getattr(activation,activation_func+'_der')(H[i-1])

    return {
        'dw' : del_w,
        'db' : del_b
    }

  def self_back_prop(self, A, H, Y_hat, X, Y,activation_func) :
    temp = self.back_prop(self.weights_list,self.biases_list, A, H, Y_hat, X, Y, activation_func)
    return temp

  def grad_wandb(self, W, b, X, Y,activation_func):

    X = X.T.reshape((784,-1))
    
    temp = self.forward_prop(W, b, X, Y, activation_func)
    temp2 = self.back_prop(W, b, temp['A'], temp['H'], temp['Y_hat'], X, Y, activation_func)

    return {
        'dw' : temp2['dw'],
        'db' : temp2['db']
    }

  def self_grad_wandb(self, X, Y, activation_func) :
    temp = self.grad_wandb(self.weights_list, self.biases_list, X, Y,activation_func)
    return temp

  def predict(self, X, activation_func):
    X = X.T.reshape((784,-1))
    temp = self.forward_prop(self.weights_list,self.biases_list, X, 0, activation_func)
    return {
      'y' : np.argmax(temp['Y_hat'],axis=0),
      'y_hat' : temp['Y_hat']
    }

  def update_vals(self,dw,db) :
    L = len(self.weights_list)
    for i in range(L) :
      # print('dw['+str(L-i-1)+']',dw[L-i-1])
      self.weights_list[i] =self.weights_list[i] - dw[L-i-1].reshape(self.weights_list[i].shape)

    for i in range(len(self.biases_list)) :
      # print('db['+str(L-i-1)+']',db[L-i-1])
      self.biases_list[i] =self.biases_list[i] - db[L-i-1].reshape(self.biases_list[i].shape)
##################################################################################
class activation:
  
  @staticmethod
  def sigmoid(z):
    return 1 / (1 + np.exp(-z))
  
  @staticmethod
  def relu(z):
    return (z>0) * z

  @staticmethod
  def tanh(z):
    return np.tanh(z)

  @staticmethod
  def sigmoid_der(z) :
    return z * (1-z)
  
  @staticmethod
  def relu_der(z) :
    return (z>0)

  @staticmethod
  def tanh_der(z):
    return 1 - z*z

  @staticmethod
  def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / np.sum(e_x,axis=0)
    #return np.array(tf.nn.softmax(np.array(x)))

def set_nn_shape(verbose=True, num_hidden_layers=-1, hidden_layer_size=-1):

  input_layer_size = 784
  hidden_layer_size = hidden_layer_size
  num_hidden_layers = num_hidden_layers
  output_layer_size = 10
  
  # input_layer_size = 3
  # hidden_layer_size = hidden_layer_size
  # num_hidden_layers = num_hidden_layers
  # output_layer_size = 2
  if (verbose):
    print("\nNumber Of Hidden Layers:")
    num_hidden_layers = int(input())

    print("\nSize Of Each Hidden Layer:")
    hidden_layer_size = int(input())

    print(f"\nThe Neural Network Has {num_hidden_layers+2} Layers In Total!")
  
  return {"input_layer_size": input_layer_size, "hidden_layer_size": hidden_layer_size, "output_layer_size": output_layer_size, "num_hidden_layers": num_hidden_layers}


In [5]:
def wandb_initializer(nn_shape, weights_list, biases_list, type='random', mu = 0, sigma = 1):
  
  # random initialization
  if (type=='random'):
    initializer = tf.keras.initializers.TruncatedNormal(mean=mu, stddev=sigma)
  # xavier initialization
  elif (type=='xavier'):
    initializer = tf.keras.initializers.GlorotNormal()

  weights_list.append(initializer(shape=(nn_shape['hidden_layer_size'], nn_shape['input_layer_size'])).numpy())
  biases_list.append(initializer(shape=(nn_shape['hidden_layer_size'], 1)).numpy())
  for i in range(nn_shape['num_hidden_layers'] - 1):
    weights_list.append(initializer(shape=(nn_shape['hidden_layer_size'], nn_shape['hidden_layer_size'])).numpy())
    biases_list.append(initializer(shape=(nn_shape['hidden_layer_size'], 1)).numpy())

  weights_list.append(initializer(shape=(nn_shape['output_layer_size'], nn_shape['hidden_layer_size'])).numpy())
  biases_list.append(initializer(shape=(nn_shape['output_layer_size'], 1)).numpy())

  return weights_list, biases_list

In [6]:
class optimizer:

  @staticmethod
  def sgd_old(network, data, config):

    # num_hidden_layers, hidden_layers_size = config['num_hidden_layers'], config['hidden_layer_size']
    num_epochs, batch_size = config['num_epochs'], config['batch_size']
    eta, lambda_ = config['lr'], config['weight_decay']
    initializer, activation_func = config['weights_initializer'], config['activation']
 
    ### google the getattr function - eg: getattr(activation, 'relu')(junk) is same as activation.relu(junk)
    X_train, Y_train = data['train']['X'], data['train']['Y']
    # print(num_hidden_layers, hidden_layers_size)

    for i in range(num_epochs):
      dw = []
      db = []
      check = 0

      for k in tqdm(range(len(X_train))) :
        x = X_train[k]
        y = Y_train[k]
        temp = network.self_grad_wandb(x,y,activation_func)

        if check==0 :
          dw = temp['dw']
          db = temp['db']
          check=1

        else :
          for j in range(len(dw)) :
            dw[j] =dw[j] + temp['dw'][j]
            db[j] =db[j] + temp['db'][j]

        if (k+1)%batch_size == 0 :
          for dd in dw :
            dd *=eta
          for dd in db :
            dd *=eta
          network.update_vals(dw, db)
          check=0

  @staticmethod
  def sgd(network, data, config):

    # num_hidden_layers, hidden_layers_size = config['num_hidden_layers'], config['hidden_layer_size']
    num_epochs, batch_size = config['num_epochs'], config['batch_size']
    eta, lambda_ = config['lr'], config['weight_decay']
    initializer, activation_func = config['weights_initializer'], config['activation']
 
    ### google the getattr function - eg: getattr(activation, 'relu')(junk) is same as activation.relu(junk)
    X_train, Y_train = data['train']['X'], data['train']['Y']
    # print(num_hidden_layers, hidden_layers_size)

    for i in range(num_epochs):
      for k in tqdm(range(0, len(X_train), batch_size)) :
        X = X_train[k: k+batch_size]
        Y = Y_train[k: k+batch_size]
        temp = network.self_grad_wandb(X, Y, activation_func)
        for j in range(len(temp['db'])) :
          temp['db'][j] = np.sum(temp['db'][j],axis=1)
          
        dw = temp['dw']
        db = temp['db']
        for dd in dw :
          dd*= eta
        for dd in db :
          dd*=eta

        network.update_vals(dw, db)


  @staticmethod
  def momentum(X,Y,max_epochs,eta,gamma,batch_size) :
    test = get_nn_shape()
    network = neural_network(test,test)
    dw = []
    db = []
    pred1 = []
    check = 0
    for i in range(max_epochs) :
      for k in tqdm(range(len(X))) :
        x = X[k]
        y = Y[k]
        temp = network.self_grad_wandb(x,y)
        if check==0 :
          dw = temp['dw']
          db = temp['db']
          for dd in db :
            dd*= eta
          for dd in dw :
            dd*=eta
          check = 1
        else :
          for j in range(len(dw)) :
            dw[j] += eta*temp['dw'][j]
            db[j] += eta*temp['db'][j]
        if (k+1) % batch_size == 0 or k == len(X)-1:
          network.update_vals(dw,db)
          for dd in db :
            dd *=gamma
          for dd in dw :
            dd*=gamma

      pred = 0
      for x,y in zip(X,Y) :
        pred = pred + (network.predict(x)!=y)

      print('error',pred)
    
    return pred1
  
  @staticmethod
  def NAG(X,Y,max_epochs,eta,gamma,batch_size) :
    test = get_nn_shape()
    network = neural_network(test,test)
    dw = []
    db = []
    check = 0
    for i in range(max_epochs) :
      for k in tqdm(range(len(X))) :
        x = X[k]
        y = Y[k]
        temp = network.self_grad_wandb(x,y)
        if check==0 :
          dw = temp['dw']
          db = temp['db']
          for dd in db :
            dd*= eta
          for dd in dw :
            dd*=eta
          check = 1
        else :
          for j in range(len(dw)) :
            dw[j] += eta*temp['dw'][j]
            db[j] += eta*temp['db'][j]

        if (k+1) % batch_size == 0 or k == len(X)-1:
          network.update_vals(dw,db)
          for dd in db :
            dd*=gamma
          for dd in dw :
            dd*=gamma
          network.update_vals(dw,db)

      pred = 0
      for x,y in zip(X,Y) :
        pred += (network.predict(x)!=y)

      print('error',pred)

  @staticmethod
  def RMSprop(X,Y,max_epochs,eta,beta,batch_size,epsilon) :
    test = get_nn_shape()
    network = neural_network(test,test)
    v_dw = []
    v_db = []
    check1 = 0
    for i in range(max_epochs) :
      dw = []
      db = []
      check = 0
      for k in tqdm(range(len(X))) :
        x = X[k]
        y = Y[k]
        temp = network.self_grad_wandb(x,y)
        if check==0 :
          dw = temp['dw']
          db = temp['db']
          check = 1
        else :
          for j in range(len(dw)) :
            dw[j] += temp['dw'][j]
            db[j] += temp['db'][j]

        if (k+1) % batch_size == 0 or k == len(X)-1:
          if check1==0 :
            for j in range(len(dw)) :
              v_dw.append( (1-beta)*(dw[j]**2) )
              dw[j] *= eta/np.sqrt(v_dw[j]+epsilon)

            for j in range(len(db)) :
              v_db.append( (1-beta)*(db[j]**2) )
              db[j] *= eta/np.sqrt(v_db[j]+epsilon)

            check1 = 1

          else :
            for j in range(len(dw)) :
              v_dw[j] *= beta
              v_dw[j] += (1-beta)*(dw[j]**2) 
              dw[j] *= eta/np.sqrt(v_dw[j]+epsilon)

            for j in range(len(db)) :
              v_db[j] *= beta
              v_db[j] += (1-beta)*(db[j]**2) 
              db[j] *= eta/np.sqrt(v_db[j]+epsilon)

          network.update_vals(dw,db)
          check = 1

      pred = 0
      for x,y in zip(X,Y) :
        pred += (network.predict(x)!=y)

      print('error',pred)


  @staticmethod
  def adam(X,Y,max_epochs,eta,beta1,beta2,batch_size) :
    test = get_nn_shape()
    network = neural_network(test,test)
    v_dw = []
    v_db = []
    m_dw = []
    m_db = []
    check1 = 0
    num_updates = 0
    for i in range(max_epochs) :
      dw = []
      db = []
      check = 0
      for k in tqdm(range(len(X))) :
        x = X[k]
        y = Y[k]
        temp = network.self_grad_wandb(x,y)
        if check==0 :
          dw = temp['dw']
          db = temp['db']
          check = 1
        else :
          for j in range(len(dw)) :
            dw[j] += temp['dw'][j]
            db[j] += temp['db'][j]

        if (k+1) % batch_size == 0 or k == len(X)-1:
          if check1==0 :
            for j in range(len(dw)) :
              v_dw.append( (1-beta1)*(dw[j]**2) )
              dw[j] *= eta/np.sqrt(v_dw[j]+epsilon)

            for j in range(len(db)) :
              v_db.append( (1-beta1)*(db[j]**2) )
              db[j] *= eta/np.sqrt(v_db[j]+epsilon)

            check1 = 1

          else :
            for j in range(len(dw)) :
              v_dw[j] *= beta1
              v_dw[j] += (1-beta1)*(dw[j]**2) 
              dw[j] *= eta/np.sqrt(v_dw[j]+epsilon)

            for j in range(len(db)) :
              v_db[j] *= beta1
              v_db[j] += (1-beta1)*(db[j]**2) 
              db[j] *= eta/np.sqrt(v_db[j]+epsilon)

          network.update_vals(dw,db)
          check = 1

      pred = 0
      for x,y in zip(X,Y) :
        pred += (network.predict(x)!=y)

      print('error',pred)

In [8]:
# X = np.array([[1,1,2],[-1,2,3],[10,-67,43],[-5,45,-67]])
# Y = np.array([1,0,1,0])
# temp = solver.sgd(X,Y,100,1e-3)
X=data['train']['X']
# print(len(X))
# print(X[0])
Y = data['train']['Y']
temp = solver.RMSprop(X,Y,100,0.001,0.9,32,1e-2)
print(temp)

NameError: name 'solver' is not defined

In [11]:
def run_callback(network,data,config) :
    
    activation_func = config['activation']
    
    X_train = data['train']['X']
    Y_train = data['train']['Y']

    X_val = data['val']['X']
    Y_val = data['val']['Y']

    X_test = data['test']['X']
    Y_test = data['test']['Y']

    train_loss = 0
    train_count = 0
    train_sq_error = 0
    
    temp = network.predict(X_train,activation_func)
    train_count = np.sum(temp['y'].reshape(Y_train.shape)==Y_train)
    
    Y_pred = np.array(temp['y_hat'].T)
    train_loss = np.sum(-np.log(Y_pred[np.arange(len(X_train)),Y_train]))
    #temp['y_hat'][y] = 1 - temp['y_hat'][y]
    #train_sq_error += np.sum(np.dot(temp['y_hat'],temp['y_hat']))

    val_loss = 0
    val_count = 0
    val_sq_error = 0
    # for x,y in zip(X_val,Y_val) :
    #     temp = network.predict(x,activation_func)
    #     if temp['y'] == y :
    #         val_count += 1
    #     val_loss -= np.log(temp['y_hat'][y]) 
    #     temp['y_hat'][y] = 1 - temp['y_hat'][y]
    #     val_sq_error += np.sum(np.dot(temp['y_hat'],temp['y_hat'])) 
    '''
    test_loss = 0
    test_count = 0
    test_sq_error = 0
    for x,y in zip(X_test,Y_test) :
        temp = network.predict(x,activation_func)
        if temp['y'] == y :
            test_count += 1
        test_loss -= np.log(temp['y_hat'][y]) 
        temp['y_hat'][y] = 1 - temp['y_hat'][y]
        test_sq_error += np.sum(np.dot(temp['y_hat'],temp['y_hat'])) 
    '''
    return  {
        'loss': {
            'train' : np.array([train_sq_error,train_loss,train_count])/len(X_train),
            'val' : np.array([val_sq_error,val_loss,val_count])/len(X_val)
        },
        'accuracy': {
            'train': train_count / len(X_train),
            'val': val_count / len(X_val)
        }
        #'test' : np.array([test_sq_error,test_loss,test_count])/len(X_test)
    }

    


In [12]:
nn = set_nn_shape()
network = neural_network(nn, 'random')
#data1 = {'train' : {'X': np.array([[1,1,2],[-1,2,3],[10,-67,43],[-5,45,-67],[5,6,7]]), 'Y' : np.array([1,0,1,0,1]) },'val' : {'X': np.array([[1,1,1],[1,1,1],[1,1,1],[1,1,1],[1,1,1]]), 'Y' : np.array([0,1,3,2,1]) }}
# need to change this bit later to accomodate other optimization functions

config1 = {'num_epochs' : 5,'lr' : 1e-2,'optimizer': 'sgd', 'batch_size' : 32 , 'weights_initializer' : 'random' , 'weight_decay' : 0.001, 'activation' : 'sigmoid' }
optimizer.sgd(network, data,config1 )

# generating reports for the run
report = run_callback(network, data, config1) 

print(report)


Number Of Hidden Layers:
5

Size Of Each Hidden Layer:
32

The Neural Network Has 7 Layers In Total!


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))


{'loss': {'train': array([0.        , 0.62162527, 0.78382   ]), 'val': array([0., 0., 0.])}, 'accuracy': {'train': 0.78382, 'val': 0.0}}


In [10]:
# generating reports for the run
report = run_callback(network, data, config1) 
print(report)

NameError: name 'run_callback' is not defined

In [None]:
activation.softmax(np.array([[0.7,0.5],[3,7]]))

In [13]:
sweep_config = {
    'method': 'random',

    'parameters': {
        'num_epochs': {
            'values': [5, 10]
        },
        'num_hidden_layers': {
            'values': [3, 4, 5]
        },
        'hidden_layer_size': {
            'values': [32, 64, 128]
        },
        'weight_decay': {
            'values': [0, 0.0005, 0.5]
        },
        'lr': {
            'values': [1e-3, 1e-4, 1e-5, 1e-6]
        },
        'optimizer': {
            'values': ['sgd', 'momentum', 'nesterov', 'rmsprop', 'adam', 'nadam']
        },
        'batch_size': {
            'values': [16, 32, 64]
        },
        'weights_initializer': {
            'values': ['random', 'xavier']
        },
        'activation': {
            'values': ['sigmoid', 'tanh', 'relu']
        }        
    }
}

In [18]:
sweep_id = wandb.sweep(sweep_config, project='test4')

Create sweep with ID: n84t5yf1
Sweep URL: https://wandb.ai/ramkamal/test4/sweeps/n84t5yf1


In [19]:
pprint.pprint(sweep_config)

{'method': 'random',
 'parameters': {'activation': {'values': ['sigmoid', 'tanh', 'relu']},
                'batch_size': {'values': [16, 32, 64]},
                'hidden_layer_size': {'values': [32, 64, 128]},
                'lr': {'values': [0.001, 0.0001, 1e-05, 1e-06]},
                'num_epochs': {'values': [5, 10]},
                'num_hidden_layers': {'values': [3, 4, 5]},
                'optimizer': {'values': ['sgd',
                                         'momentum',
                                         'nesterov',
                                         'rmsprop',
                                         'adam',
                                         'nadam']},
                'weight_decay': {'values': [0, 0.0005, 0.5]},
                'weights_initializer': {'values': ['random', 'xavier']}}}


In [20]:
class sweep_module:
  @staticmethod
  def train(config=None):

    with wandb.init(config):

      config = wandb.config
      nn_shape = set_nn_shape(False, config['num_hidden_layers'] , config['hidden_layer_size'])
      network = neural_network(nn_shape, config['weights_initializer'])
      
      # need to change this bit later to accomodate other optimization functions
      optimizer.sgd(network, data, config)
      
      print('checkpoint 1')
      # generating reports for the run
      report = run_callback(network, data, config) 
      print(report)
      '''
      # keeping track of the best model
      if (report['accuracy']['val'] > val_acc_best):
        val_acc_best, network_best = report['accuracy']['val'], network
      '''
      wandb.log({
        'batch_size': config.batch_size, 
        'val_loss' : report['loss']['val'], 
        'train_loss': report['loss']['train'],
        'train_acc': report['accuracy']['train'],
        'val_acc': report['accuracy']['val']  
      })    

In [22]:
# for logging the best model
network_best = None
val_acc_best = -1

# performing the sweep
wandb.agent(sweep_id, sweep_module.train, count=15)

[34m[1mwandb[0m: Agent Starting Run: w9bodgk2 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weights_initializer: random


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))


checkpoint 1
{'loss': {'train': array([0.        , 0.90808694, 0.69502   ]), 'val': array([0., 0., 0.])}, 'accuracy': {'train': 0.69502, 'val': 0.0}}


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch_size,32.0
train_acc,0.69502
val_acc,0.0
_runtime,11.0
_timestamp,1615634360.0
_step,0.0


0,1
batch_size,▁
train_acc,▁
val_acc,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: 6ld0nv8n with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weights_initializer: xavier


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))


checkpoint 1
{'loss': {'train': array([0.        , 2.09095901, 0.48134   ]), 'val': array([0., 0., 0.])}, 'accuracy': {'train': 0.48134, 'val': 0.0}}


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch_size,16.0
train_acc,0.48134
val_acc,0.0
_runtime,23.0
_timestamp,1615634397.0
_step,0.0


0,1
batch_size,▁
train_acc,▁
val_acc,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: 7frgm955 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	lr: 1e-06
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weights_initializer: xavier


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))


checkpoint 1
{'loss': {'train': array([0.        , 2.34401521, 0.10008   ]), 'val': array([0., 0., 0.])}, 'accuracy': {'train': 0.10008, 'val': 0.0}}


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch_size,64.0
train_acc,0.10008
val_acc,0.0
_runtime,11.0
_timestamp,1615634412.0
_step,0.0


0,1
batch_size,▁
train_acc,▁
val_acc,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: yxyejo7i with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weights_initializer: random


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))


checkpoint 1
{'loss': {'train': array([0.        , 0.81131727, 0.723     ]), 'val': array([0., 0., 0.])}, 'accuracy': {'train': 0.723, 'val': 0.0}}


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch_size,32.0
train_acc,0.723
val_acc,0.0
_runtime,11.0
_timestamp,1615634428.0
_step,0.0


0,1
batch_size,▁
train_acc,▁
val_acc,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: rrxoiypq with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	lr: 1e-05
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weights_initializer: xavier


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))


checkpoint 1
{'loss': {'train': array([0.        , 1.22826231, 0.68988   ]), 'val': array([0., 0., 0.])}, 'accuracy': {'train': 0.68988, 'val': 0.0}}


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch_size,64.0
train_acc,0.68988
val_acc,0.0
_runtime,12.0
_timestamp,1615634444.0
_step,0.0


0,1
batch_size,▁
train_acc,▁
val_acc,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: 9zod8zum with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	lr: 1e-05
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weights_initializer: xavier


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))


checkpoint 1
{'loss': {'train': array([0.        , 2.30269043, 0.10334   ]), 'val': array([0., 0., 0.])}, 'accuracy': {'train': 0.10334, 'val': 0.0}}


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch_size,64.0
train_acc,0.10334
val_acc,0.0
_runtime,10.0
_timestamp,1615634459.0
_step,0.0


0,1
batch_size,▁
train_acc,▁
val_acc,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: dcsr9a9r with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	lr: 1e-06
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weights_initializer: xavier


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))


checkpoint 1
{'loss': {'train': array([0.        , 2.23328441, 0.14718   ]), 'val': array([0., 0., 0.])}, 'accuracy': {'train': 0.14718, 'val': 0.0}}


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch_size,32.0
train_acc,0.14718
val_acc,0.0
_runtime,10.0
_timestamp,1615634474.0
_step,0.0


0,1
batch_size,▁
train_acc,▁
val_acc,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: 824sxpdb with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	lr: 1e-05
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weights_initializer: random


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))


checkpoint 1
{'loss': {'train': array([0.        , 1.63779217, 0.42766   ]), 'val': array([0., 0., 0.])}, 'accuracy': {'train': 0.42766, 'val': 0.0}}


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch_size,32.0
train_acc,0.42766
val_acc,0.0
_runtime,31.0
_timestamp,1615634510.0
_step,0.0


0,1
batch_size,▁
train_acc,▁
val_acc,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: kv8jyqy9 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	lr: 1e-05
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weights_initializer: random


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))






HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))


checkpoint 1
{'loss': {'train': array([0.     ,     nan, 0.09954]), 'val': array([0., 0., 0.])}, 'accuracy': {'train': 0.09954, 'val': 0.0}}


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch_size,64.0
train_acc,0.09954
val_acc,0.0
_runtime,7.0
_timestamp,1615634521.0
_step,0.0


0,1
batch_size,▁
train_acc,▁
val_acc,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: di1mxgx6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weights_initializer: xavier


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))


checkpoint 1
{'loss': {'train': array([0.        , 0.56729005, 0.80518   ]), 'val': array([0., 0., 0.])}, 'accuracy': {'train': 0.80518, 'val': 0.0}}


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch_size,16.0
train_acc,0.80518
val_acc,0.0
_runtime,24.0
_timestamp,1615634549.0
_step,0.0


0,1
batch_size,▁
train_acc,▁
val_acc,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: iach0rvo with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	lr: 1e-06
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weights_initializer: random


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))


checkpoint 1
{'loss': {'train': array([0.        , 4.34277504, 0.13226   ]), 'val': array([0., 0., 0.])}, 'accuracy': {'train': 0.13226, 'val': 0.0}}


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch_size,16.0
train_acc,0.13226
val_acc,0.0
_runtime,13.0
_timestamp,1615634568.0
_step,0.0


0,1
batch_size,▁
train_acc,▁
val_acc,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: vvk7itnm with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	lr: 1e-06
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weights_initializer: random


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))


checkpoint 1
{'loss': {'train': array([0.     ,     nan, 0.09954]), 'val': array([0., 0., 0.])}, 'accuracy': {'train': 0.09954, 'val': 0.0}}


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch_size,32.0
train_acc,0.09954
val_acc,0.0
_runtime,6.0
_timestamp,1615634579.0
_step,0.0


0,1
batch_size,▁
train_acc,▁
val_acc,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fxbwrf0f with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weights_initializer: random


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3125.0), HTML(value='')))


checkpoint 1
{'loss': {'train': array([0.     ,     nan, 0.09954]), 'val': array([0., 0., 0.])}, 'accuracy': {'train': 0.09954, 'val': 0.0}}


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch_size,16.0
train_acc,0.09954
val_acc,0.0
_runtime,11.0
_timestamp,1615634605.0
_step,0.0


0,1
batch_size,▁
train_acc,▁
val_acc,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: wmomsbdf with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weights_initializer: xavier


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=782.0), HTML(value='')))


checkpoint 1
{'loss': {'train': array([0.        , 0.68656522, 0.79056   ]), 'val': array([0., 0., 0.])}, 'accuracy': {'train': 0.79056, 'val': 0.0}}


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch_size,64.0
train_acc,0.79056
val_acc,0.0
_runtime,6.0
_timestamp,1615634616.0
_step,0.0


0,1
batch_size,▁
train_acc,▁
val_acc,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: jzm8tx8g with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weights_initializer: xavier


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1563.0), HTML(value='')))


checkpoint 1
{'loss': {'train': array([0.        , 0.46584918, 0.85386   ]), 'val': array([0., 0., 0.])}, 'accuracy': {'train': 0.85386, 'val': 0.0}}


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch_size,32.0
train_acc,0.85386
val_acc,0.0
_runtime,8.0
_timestamp,1615634630.0
_step,0.0


0,1
batch_size,▁
train_acc,▁
val_acc,▁
_runtime,▁
_timestamp,▁
_step,▁
