<a href="https://colab.research.google.com/github/sid-betalol/CS6910-FODL-Assignment1/blob/main/cs6910_assignment1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### To add
1. breaking the notebook down to scripts based on code implementation instructions


###**wandb setup**

In [None]:
# !pip install wandb

In [None]:
import wandb

###**Importing the required libraries**


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

##**Template Class for Activation Functions**

In [None]:
class daddyActivation():
    def __init__(self, *args, **kwargs):
        """
        Constructor
        """
        self.grads = {}
        self.backprop_cache = {}

    def __call__(self, *args, **kwargs):
        """
        Calling the class as a function instance
        does a forward and a backward pass
        """
        op = self.forward(*args, **kwargs)
        self.grads = self.calc_grads(*args, **kwargs)
        return op

    def forward(self, *args, **kwargs):
        """
        Defining the forward pass of the activation function
        """
        pass
    
    def calc_grads(self, *args, **kwargs):
        """
        Calculates the gradient of the activation function
        with respect to its input
        """
        pass
    
    def backward(self, *args, **kwargs):
        """ 
        Calculates the gradients of the loss with respect 
        to the input of the activation function, using the gradients 
        computed in the calc_grads method
        """
        pass

###**Activation Functions**

In [None]:
class Sigmoid(daddyActivation):
    
    def __init__(self):
        
        super().__init__()

    def forward(self, x):
        
        self.backprop_cache = 1/(1+np.exp(-x))
        return self.backprop_cache

    def calc_grads(self, x):
        
        id = "x"
        y = self.backprop_cache
        diff = y*(1-y)
        return {id:diff}

    def backward(self, y_hat):
        
        return self.grads['x']*y_hat


class Tanh(daddyActivation):
    
    def __init__(self):
        
        super().__init__()

    def forward(self, x):
        
        self.backprop_cache = (np.exp(x) - np.exp(-x))/(np.exp(-x)+np.exp(x))
        return self.backprop_cache

    def calc_grads(self, x):
        
        id = "x"
        y = self.backprop_cache
        diff = 1- y**2
        return {id:diff}

    def backward(self, y_hat):
        
        return self.grads['x']*y_hat


class ReLU(daddyActivation):

    def __init__(self):
        
        super().__init__()

    def forward(self, x):
        
        self.backprop_cache = np.maximum(x, 0.0)
        return self.backprop_cache

    def calc_grads(self,x):
        
        id = "x"
        y = self.backprop_cache
        diff = (y > 0).astype("float")
        return {id:diff}

    def backward(self, y_hat):
        
        return self.grads['x']*y_hat


class LeakyReLU(daddyActivation):
    
    def __init__(self, alpha=0.1):
        super().__init__()
        self.alpha = alpha
    
    def forward(self, x):
        self.backprop_cache = np.maximum(x, self.alpha*x)
        return self.backprop_cache
    
    def calc_grads(self, x):
        id = "x"
        y = self.backprop_cache
        diff = np.where(y > 0, 1, self.alpha)
        return {id: diff}
    
    def backward(self, y_hat):
        return self.grads['x']*y_hat

##**Template Class for Loss Functions**

In [None]:
class daddyLoss():
    
    def __init__(self, *args, **kwargs):
        
        self.grads = {}
        self.backprop_cache = {}

    def __call__(self, y_pred, y_true, *args, **kwargs):
        
        op = self.forward(y_pred, y_true, *args, **kwargs)
        self.grads = self.calc_grads(y_pred, y_true, *args, **kwargs)
        return op

    def forward(self, y_pred, y_true, *args, **kwargs):
        
        pass

    def calc_grads(self, y_pred, y_true, *args, **kwargs):
        
        pass

    def backward(self, *args, **kwargs):
        
        return self.grads['x']

###**Helper Functions for Loss (Output Function: Softmax)**

In [None]:
# softmax will be used for the output layer
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis= -1, keepdims=True))
    return exp_x/ np.sum(exp_x, axis = -1, keepdims=True)

###**Loss Functions**

In [None]:
class MSE(daddyLoss):
    
    def __init__(self):
        
        super().__init__()

    def forward(self, y_pred, y_true):
        
        num_classes = y_pred.shape[-1]
        probabs = softmax(y_pred)
        y_true_encoding = np.eye(num_classes, dtype = int)[np.array(y_true).astype(int)]
        self.backprop_cache['y_true'] = y_true_encoding
        loss = np.mean(np.sum((probabs - y_true_encoding)**2, axis=1))
        self.backprop_cache['probabs'] = probabs
        return loss

    def calc_grads(self, y_pred, y_true):
        
        batch_size = y_pred.shape[0]
        sub_term = self.backprop_cache["probabs"] - self.backprop_cache["y_true"]
        grad = (sub_term - (sub_term*self.backprop_cache["probabs"]).sum(axis = 1, keepdims = True))*self.backprop_cache["probabs"]
        grad = grad/batch_size
        return {'x': grad}
        
class LogLoss(daddyLoss):

    def __init__(self):
        
        super().__init__()

    def forward(self, y_pred, y_true):
        
        num_classes = y_pred.shape[-1]
        probabs = softmax(y_pred)
        y_true_encoding = np.eye(num_classes, dtype = int)[np.array(y_true).astype(int)]
        self.backprop_cache['y_true'] = y_true_encoding
        loss = np.mean(- np.log(probabs[np.arange(y_pred.shape[0]), y_true] + 1e-16))
        self.backprop_cache['probabs'] = probabs
        return loss

    def calc_grads(self, y_pred, y_true):
        
        batch_size = y_pred.shape[0]
        grad = self.backprop_cache["probabs"] - self.backprop_cache["y_true"]
        grad = grad/batch_size
        return {'x':grad}

##**Template Class for Optimizers**

In [None]:
class daddyOptimizer():
    
    def __init__(self, *args, **kwargs):
        
        self.history = {}
        pass

    def update_weights(self, layer, *args, **kwargs):
        
        update = self.calc_update(layer)
        for k, v in layer.weights.items():
            layer.weights[k] = layer.weights[k] + update[k]

    def calc_update(self, layer, *args, **kwargs):
        #needs to be overloaded based on the optimizer
        pass

###**Optimizers**
SGD update rule: https://iitm-pod.slides.com/arunprakash_ai/cs6910-lecture-5#/0/56/7

Momentum Update Rule: https://iitm-pod.slides.com/arunprakash_ai/cs6910-lecture-5#/0/40/7

NAG Update Rule: https://iitm-pod.slides.com/arunprakash_ai/cs6910-lecture-5#/0/50/7

Simplied NAG Update Rule: 
1. [ADVANCES IN OPTIMIZING RECURRENT NETWORKS, Bengio et al.](https://arxiv.org/pdf/1212.0901.pdf); 
2. [Sections 7.1 and 7.2 of Ilya Sutskever's PhD. Thesis: TRAINING RECURRENT NEURAL NETWORKS](https://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf); 
3. Brief Mathematical Intuition: [Neural Network Nesterov Momentum by James D. McCaffrey](https://jamesmccaffrey.wordpress.com/2017/07/24/neural-network-nesterov-momentum/);
4. [Various Formulations of NAG](https://jlmelville.github.io/mize/nesterov.html)

RMSProp Update Rule: https://iitm-pod.slides.com/arunprakash_ai/cs6910-lecture-52#/0/18/5

Adam Update Rule: https://iitm-pod.slides.com/arunprakash_ai/cs6910-lecture-52#/0/40/10

NAdam Update Rule: https://iitm-pod.slides.com/arunprakash_ai/cs6910-lecture-52#/0/68

Add a comment to remark about bias correction in Adam/NAdam

Summary of optimizers: https://blog.paperspace.com/intro-to-optimization-momentum-rmsprop-adam/

In [None]:
class SGD(daddyOptimizer):

    def __init__(self, learning_rate = 0.001):

        super().__init__()
        self.learning_rate = learning_rate

    def calc_update(self, layer):

        update = {}
        for k,v in layer.weights.items():
            update[k] = -self.learning_rate*layer.del_theta[k]
        return update

class Momentum(daddyOptimizer):

    def __init__(self, learning_rate = 0.001, beta = 0.9):

        super().__init__()
        self.learning_rate = learning_rate
        self.beta = beta

    def calc_update(self, layer):

        update = {}
        
        for k, v in layer.weights.items():
            if layer.__str__() + k in self.history.keys():
                self.history[layer.__str__() + k]['u'] = self.beta*self.history[layer.__str__() + k]['u'] + self.learning_rate*layer.del_theta[k]
                
            else:
                self.history[layer.__str__() + k] = {}
                self.history[layer.__str__() + k]['u'] = self.learning_rate*layer.del_theta[k]

            update[k] = -self.history[layer.__str__() + k]['u']

        return update

class Nesterov(daddyOptimizer):
    # Implemented Bengio Nesterov Momentum
    # References:
    # [ADVANCES IN OPTIMIZING RECURRENT NETWORKS, Bengio et al.](https://arxiv.org/pdf/1212.0901.pdf)
    # [Sections 7.1 and 7.2 of Ilya Sutskever's PhD. Thesis: TRAINING RECURRENT NEURAL NETWORKS](https://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf)
    # Brief Mathematical Intuition: [Neural Network Nesterov Momentum by James D. McCaffrey](https://jamesmccaffrey.wordpress.com/2017/07/24/neural-network-nesterov-momentum/)
    # [Various Formulations of NAG](https://jlmelville.github.io/mize/nesterov.html)

    def __init__(self, learning_rate = 0.001, beta = 0.9):

        super().__init__()
        self.learning_rate = learning_rate
        self.beta = beta

    def calc_update(self, layer):

        update = {}
        for k, v in layer.weights.items():
            if layer.__str__() + k in self.history.keys():
                update[k] = self.beta*self.beta*self.history[layer.__str__() + k]['u'] - (1+self.beta)*self.learning_rate*layer.del_theta[k]
            else:
                self.history[layer.__str__() + k] = {}
                self.history[layer.__str__() + k]['u'] = 0
                update[k] = -self.learning_rate*layer.del_theta[k]
            self.history[layer.__str__() + k]['u'] = self.beta*self.history[layer.__str__() + k]['u'] - self.learning_rate*layer.del_theta[k]

        return update

class RMSProp(daddyOptimizer):

    def __init__(self, learning_rate = 0.001, beta = 0.9, epsilon = 1e-7):

        super().__init__()
        self.learning_rate = learning_rate
        self.beta = beta
        self.epsilon = epsilon

    def calc_update(self, layer):

        update = {}

        for k,v in layer.weights.items():
            if layer.__str__() + k in self.history.keys():
                self.history[layer.__str__() + k]['u'] = self.beta*self.history[layer.__str__() + k]['u'] + (1 - self.beta)*(layer.del_theta[k]**2)
            else:
                self.history[layer.__str__() + k] = {}
                self.history[layer.__str__() + k]['u'] = (1-self.beta)*(layer.del_theta[k]**2)
                
            sqrt_term = np.sqrt(self.history[layer.__str__() + k]['u'] + self.epsilon)
            update[k] = -(self.learning_rate*layer.del_theta[k]/sqrt_term)

        return update

class Adam(daddyOptimizer):

    def __init__(self, learning_rate = 0.001, epsilon = 1e-7, beta1 = 0.9, beta2 = 0.999):

        super().__init__()
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.beta1 = beta1
        self.beta2 = beta2
        self.steps = 1

    def calc_update(self, layer):

        update = {}

        for k,v in layer.weights.items():
            if layer.__str__() + k in self.history.keys():
                self.history[layer.__str__() + k]['m'] = self.beta1*self.history[layer.__str__() + k]['m'] + (1 - self.beta1)*layer.del_theta[k]
                self.history[layer.__str__() + k]['u'] = self.beta2*self.history[layer.__str__() + k]['u'] + (1 - self.beta2)*(layer.del_theta[k]**2)

            else:
                self.history[layer.__str__() + k] = {}
                self.history[layer.__str__() + k]['m'] = (1 - self.beta1)*layer.del_theta[k]
                self.history[layer.__str__() + k]['u'] = (1 - self.beta2)*(layer.del_theta[k]**2)

            corrected_avg = self.history[layer.__str__() + k]['m']/(1-(self.beta1)**self.steps)
            corrected_squared_avg = self.history[layer.__str__() + k]['u']/(1-(self.beta2)**self.steps)

            sqrt_term = np.sqrt(corrected_squared_avg) + self.epsilon
            update[k] = -(self.learning_rate*corrected_avg/sqrt_term)

        self.steps+=1
        return update

class NAdam(daddyOptimizer):

    def __init__(self, learning_rate = 0.001, epsilon = 1e-7, beta1 = 0.9, beta2 = 0.999):

        super().__init__()
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.beta1 = beta1
        self.beta2 = beta2
        self.steps = 1

    def calc_update(self, layer):

        update = {}

        for k,v in layer.weights.items():
            if layer.__str__() + k in self.history.keys():
                self.history[layer.__str__() + k]['m'] = self.beta1*self.history[layer.__str__() + k]['m'] + (1 - self.beta1)*layer.del_theta[k]
                self.history[layer.__str__() + k]['u'] = self.beta2*self.history[layer.__str__() + k]['u'] + (1 - self.beta2)*(layer.del_theta[k]**2)
            else:
                self.history[layer.__str__() + k] = {}
                self.history[layer.__str__() + k]['m'] = (1 - self.beta1)*layer.del_theta[k]
                self.history[layer.__str__() + k]['u'] = (1 - self.beta2)*(layer.del_theta[k]**2)

            corrected_avg = self.history[layer.__str__() + k]['m']/(1-(self.beta1)**self.steps)
            corrected_squared_avg = self.history[layer.__str__() + k]['u']/(1-(self.beta2)**self.steps)

            sqrt_term = np.sqrt(corrected_squared_avg) + self.epsilon
            delta_coeff = (1-self.beta1)/(1-self.beta1**self.steps)

            update[k] = -((self.learning_rate/sqrt_term)*(self.beta1*corrected_avg + delta_coeff*layer.del_theta[k]))

        self.steps+=1
        return update

##**Template Class for Layers**

In [None]:
class daddyLayer():

    def __init__(self, *args, **kwargs):
        
        self.grads = {}
        self.weights = {}
        self.backprop_cache = {}
        self.optimizer = None

    def __call__(self, *args, **kwargs):

        op = self.forward(*args, **kwargs)
        self.grads = self.calc_grads(*args, **kwargs)
        return op

    def init_weights(self, *args, **kwargs):
        pass

    def forward(self, *args, **kwargs):
        pass

    def calc_grads(self, *args, **kwargs):
        pass

    def backward(self, *args, **kwargs):
        pass

    def update_weights(self, *args, **kwargs):
        
        self.optimizer.update_weights(self)

####**Helper Functions (weight initialization)**

In [None]:
def weight_initialization(init_method, in_dim, out_dim):
    if init_method == 'random':
        weights = np.random.randn(in_dim, out_dim)*np.sqrt(1.0/in_dim)
        biases = np.random.randn(1, out_dim)*np.sqrt(1.0/in_dim)
    elif init_method == 'xavier':
        max = np.sqrt(6 / (in_dim + out_dim))
        min = -max
        weights = np.random.uniform(low = min, high = max, size = (in_dim, out_dim))
        biases = np.random.uniform(low = min, high = max, size = (1, out_dim))
    
    return weights, biases

###**Layers of a Feedforward neural network**
reference used for xavier initialization: https://towardsdatascience.com/weight-initialization-in-neural-networks-a-journey-from-the-basics-to-kaiming-954fb9b47c79

In [None]:
class FNNLayer(daddyLayer):
    
    def __init__(self, in_dim, out_dim, weight_decay = None, init_method = 'random'):

        super().__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.weight_decay = weight_decay
        self.init_method = init_method
        self.init_weights()

    def init_weights(self):

        self.weights['w'], self.weights['b'] = weight_initialization(self.init_method, self.in_dim, self.out_dim)

    def forward(self, x):

        self.backprop_cache['x'] = x
        op = np.einsum('ij,jk->ik', x, self.weights["w"]) + self.weights["b"]
        return op

    def calc_grads(self, x):

        dels = {}
        dels['w'] = np.einsum('ij -> ji', self.backprop_cache['x'])
        dels['x'] = np.einsum('ij -> ji', self.weights['w'])

        return dels

    def backward(self, y_hat):
      
        x_hat = np.einsum('ij,jk->ik', y_hat, self.grads["x"])
        w_hat = np.einsum('ij,jk->ik', self.grads["w"], y_hat)
        
        b_hat = np.sum(y_hat, axis=0, keepdims=True)
        if self.weight_decay:
            w_hat = w_hat + 2 * self.weight_decay * self.weights["w"]
            b_hat = b_hat + 2 * self.weight_decay * self.weights["b"]
        self.del_theta = {'w': w_hat, 'b': b_hat}
        return x_hat

    def update_weights(self):

        self.optimizer.update_weights(self)

## **Neural Network**

#### **Helper Functions**

In [None]:
def acc_score(true_labels, pred_labels):
    return np.sum(true_labels == pred_labels)/len(true_labels)

def deepcopy(arr):
    
    if isinstance(arr, np.ndarray):
        return np.array([deepcopy(elem) for elem in arr])
    else:
        return arr

def make_data_batches(X, Y, batch_size=32):
        batches = []

        for i in range(len(Y) // batch_size):
            start_idx = batch_size * i
            end_idx = batch_size * (i + 1)

            batches.append([X[start_idx: end_idx], Y[start_idx: end_idx]])

        # take care of the last batch which might have batch_size 
        # less than the specified one
        if len(Y) % batch_size != 0:
            batches.append([X[end_idx:], Y[end_idx:]])

        return batches

In [None]:
class NN():

    def __init__(self, layers, log_wandb = True, *args, **kwargs):

        self.layers = layers
        self.history = []
        self.log_wandb = log_wandb

    def __call__(self, *args, **kwargs):

        return self.forward(*args, **kwargs)

    def compile(self, loss, optimizer, *args, **kwargs):

        self.loss = loss
        for layer in self.layers:
            if (isinstance(layer, daddyLayer) or isinstance(layer, FNNLayer)) and layer.optimizer == None:
                layer.optimizer = deepcopy(optimizer)
    
    def forward(self, x, *args, **kwargs):
        
        for layer in self.layers:
            x = layer(x)
        return x

    def backward(self, *args, **kwargs):
        
        grad = self.loss.backward()
        for layer in reversed(self.layers):
            grad = layer.backward(grad)

        return grad

    def update_weights(self, *args, **kwargs):

        for layer in reversed(self.layers):
            if (isinstance(layer, daddyLayer) or isinstance(layer, FNNLayer)):
                layer.update_weights()

    def fit(self, X_train, Y_train, X_val, Y_val, batch_size = 32, epochs = 10):
        
        self.loss.num_classes = len(np.unique(Y_train))
        tr_batch = make_data_batches(X_train, Y_train, batch_size=batch_size)
        val_batch = make_data_batches(X_val, Y_val, batch_size=batch_size)
        tr_batch_size = len(tr_batch)
        val_batch_size = len(val_batch)

        for epoch in range(1, epochs+1):

            tr_loss = 0
            tr_acc = 0

            for X_t, Y_t in tr_batch:
                preds = self(X_t)
                tr_loss += self.loss(preds, Y_t)
                tr_acc += acc_score(np.argmax(preds, axis = 1), Y_t)
                self.backward()
                self.update_weights()
                
            mean_tr_loss = tr_loss / tr_batch_size
            mean_tr_acc = tr_acc / tr_batch_size
            val_loss = 0
            val_acc = 0

            for X_v, Y_v in val_batch:
                val_preds = self(X_v)
                val_loss += self.loss(val_preds, Y_v)
                val_acc += acc_score(np.argmax(val_preds, axis = 1), Y_v)

            mean_val_loss = val_loss / val_batch_size
            mean_val_acc = val_acc / val_batch_size

            if not self.log_wandb:
                print(f"Epoch: {epoch} Train Loss: {mean_tr_loss} Train Accuracy: {mean_tr_acc} Validation Loss: {mean_val_loss} Validation Accuracy: {mean_val_acc}")

            self.history.append({"Epoch" : epoch, "Train Loss": mean_tr_loss,"Train Accuracy": mean_tr_acc,"Val Loss": mean_val_loss,"Val Accuracy": mean_val_acc})

            if self.log_wandb:
                wandb.log(self.history[-1])
            
        print('Model trained!')

    def evaluate(self, X_test, Y_test):

        preds = self(X_test)
        test_loss = self.loss(preds, Y_test)
        accuracy = acc_score(np.argmax(preds, axis = 1), Y_test)

        print(f"Test loss: {test_loss} Test accuracy: {accuracy}")

###**Fashion MNIST**

####**Importing the data**

In [None]:
from keras.datasets import fashion_mnist
from sklearn.model_selection import train_test_split
(X_train, Y_train), (X_test, Y_test) = fashion_mnist.load_data()

# print('Train Data:')
# print('X:', X_train.shape)
# print('Y:', Y_train.shape)
# print()
# print('Test Data:')
# print('X:', X_test.shape)
# print('Y:', Y_test.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


####**Classwise Sample from Data**

In [None]:
!wandb login

[34m[1mwandb[0m: Currently logged in as: [33msidbetala[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
entity_name = 'sidbetala'
project_name = 'cs6910-assignment1'

In [None]:
# # Y_train is used instead of Y_test, as training data is expected
# # to have samples from every class unlike the test data
num_labels = np.unique(Y_train).shape[0]
# # print(num_labels) ## prints 10 in accordance with the keras dataset
labels = [
    'T-shirt/top', 
    'Trouser', 
    'Pullover', 
    'Dress', 
    'Coat', 
    'Sandal', 
    'Shirt', 
    'Sneaker', 
    'Bag', 
    'Ankle boot',
    ]
def show_samples(n, X=X_train, Y=Y_train, num_classes=num_labels, classes=labels, num_images_per_class=5, log_wandb = False):
    labelled_data = {i: X_train[Y_train==i] for i in range(num_classes)}
    max_samples = {i: len(labelled_data[i]) for i in range(num_classes)}
    shuffled_indices = {i: np.random.permutation(max_samples[i]) for i in range(num_classes)}
    num_samples = {i: min(max_samples[i], n) for i in range(num_classes)}

    if log_wandb:
        # create a new WandB run
        wandb.init(entity=entity_name,project=project_name, name="classwise_images")
        
        # add images to WandB logs
        for i in range(num_classes):
            images = []
            for j in range(num_images_per_class):
                img = labelled_data[i][shuffled_indices[i][j]].astype(np.uint8).reshape(28, 28)
                images.append(wandb.Image(img, caption = classes[i]))
            wandb.log({f"{classes[i]} ({num_images_per_class} samples)": images})
        # finish WandB run
        wandb.finish()

    fig, axs = plt.subplots(nrows=10, ncols=max(num_samples.values()), figsize=(1.2*max(num_samples.values()), 15))
    for i in range(10):
        for j in range(num_samples[i]):
            img = labelled_data[i][shuffled_indices[i][j]].astype(np.uint8).reshape(28, 28)
            axs[i, j].imshow(img, cmap="gray")
            axs[i, j].axis("off")
            axs[i, j].set_title(classes[i])
    plt.show()

In [None]:
# show_samples(15)

####**Helper Function for flattening the images**

In [None]:
def flatten(arr):
    arr = arr.reshape(arr.shape[0], -1)
    return arr

####**Preprocessing the data**

In [None]:
# (X_train, Y_train), (X_test, Y_test) = fashion_mnist.load_data()
# X_train = X_train/255.0
# Y_train = Y_train
# X_train = flatten(X_train)
# X_test = flatten(X_test)/255.0
# X_tr, X_val, Y_tr, Y_val = train_test_split(X_train, Y_train, test_size=0.1, shuffle=True)

####**Training and Testing the Neural Network Model**

In [None]:
# model = NN([FNNLayer(784, 32, weight_decay = 0.0005, init_method = 'random'), Tanh(), 
            
#             FNNLayer(32, 32, weight_decay = 0.0005, init_method = 'random'), Tanh(),
#             FNNLayer(32, 32, weight_decay = 0.0005, init_method = 'random'), Tanh(),
#             FNNLayer(32, 32, weight_decay = 0.0005, init_method = 'random'), Tanh(),
#             FNNLayer(32, 32, weight_decay = 0.0005, init_method = 'random'), Tanh(),
#             FNNLayer(32, 32, weight_decay = 0.0005, init_method = 'random'), Tanh(),

#             FNNLayer(32, 10, weight_decay = 0.0005, init_method = 'random')], log_wandb = False)
# optimizer = Momentum(learning_rate = 0.001)
# loss = LogLoss()
# model.compile(loss, optimizer)
# model.fit(X_tr, Y_tr, X_val, Y_val, batch_size=32, epochs=5)
# model.evaluate(X_test, Y_test)

###**Sweep Setup**

In [None]:
activation_funcs = {'sigmoid': Sigmoid, 'tanh': Tanh, 'relu': ReLU, 'leakyrelu': LeakyReLU}

loss_funcs = {'mean_squared_error': MSE, 'cross_entropy': LogLoss}

def optimizer_func(opt, lr):
    optimizer_map = {"sgd": SGD,"momentum":Momentum, "nag": Nesterov, "rmsprop":RMSProp, "adam": Adam, "nadam":NAdam}
    if opt not in optimizer_map:
        raise ValueError("Invalid optimizer name")
    optimizer_class = optimizer_map[opt]
    optimizer = optimizer_class(learning_rate = lr)
    return optimizer

def make_network(num_layers, size_layer, act, reg, init_method):
    layers = []

    layers.extend([FNNLayer(784, size_layer, reg, init_method), activation_funcs[act]()])
    for i in range(num_layers):
        layers.extend([FNNLayer(size_layer,size_layer, reg, init_method), activation_funcs[act]()])
    layers.append(FNNLayer(size_layer, 10, reg, init_method))
    return layers

def preprocess(train, test):
    train = flatten(train)/255.0
    test = flatten(test)/255.0
    return train, test


def sweep():
    defaults = {
        'epochs': 5,
        'num_layers': 3,
        'size_layer': 32,
        'weight_decay': 0,
        'learning_rate': 0.001,
        'init_method': 'xavier',
        'optimizer': 'sgd',
        'batch_size': 16,
        'activation': 'relu',
        'loss': 'cross_entropy'
        }
    wandb.init(config = defaults, magic = True)
    config = wandb.config
    (tr_imgs, tr_labels), (test_imgs, test_labels) = fashion_mnist.load_data()
    tr_imgs, test_imgs = preprocess(tr_imgs, test_imgs)
    X_t, X_v, Y_t, Y_v = train_test_split(tr_imgs, tr_labels, test_size = 0.1, shuffle = True)
    model = NN(make_network(config.num_layers, config.size_layer, config.activation, config.weight_decay, \
                            config.init_method))
    model.compile(loss = loss_funcs[config.loss](), \
                  optimizer = optimizer_func(config.optimizer, config.learning_rate))
    model.fit(X_t, Y_t, X_v, Y_v, batch_size = config.batch_size, epochs = config.epochs)
    name1 = f'ep_{config.epochs}_hl_{config.num_layers}_hlsize_{config.size_layer}_'
    name2 = f'wd_{config.weight_decay}_lr_{config.learning_rate}_winit_{config.init_method}_'
    name3 = f'opt_{config.optimizer}_bs_{config.batch_size}_ac_{config.activation}_loss_{config.loss}'
    sweep_name = name1+name2+name3
    wandb.run.name = sweep_name
    wandb.run.save()
    wandb.run.finish()

####**Hyperparameter tuning for Cross-entropy loss**

In [None]:
# hyperparameters = {
#       "epochs": {
#           'values': [5, 10]
#       },
#       "num_layers": {
#           'values' : [3, 4, 5]
#       },
#       "size_layer": {
#           'values' : [32, 64, 128]
#       },
#       "weight_decay": {
#           'values': [0, 0.0005, 0.005, 0.05]
#       },
#       "learning_rate":{
#         'values': [0.01, 0.001, 0.0001]
#       },
#       "init_method": {
#           'values' : ['random', 'xavier']
#       },
#       "optimizer": {
#           'values': ['sgd', 'momentum', 'nag', 'rmsprop', 'adam', 'nadam']
#       },
#       "batch_size": {
#           'values': [32, 64, 128, 256]
#       },
#       "activation": {
#           'values': ['sigmoid','tanh','relu','leakyrelu']
#       },
#       "loss": {
#           'values': ['cross_entropy']
#       }
#   }

In [None]:
# sweep_configuration = {
#     'method': 'bayes',
#     'name': 'hyperparam_tuning_for_logloss',
#     'metric': {
#         'goal': 'maximize', 
#         'name': "Val Accuracy"
#         },
#     'parameters': hyperparameters
# }

In [None]:
# sweep_id = wandb.sweep(sweep_configuration, entity=entity_name, project=project_name)

In [None]:
# sweep_id = '2z8zot2g'

In [None]:
# wandb.agent(sweep_id, sweep, project = project_name, count = 100)

####**Hyperparameter tuning for Mean-squared error**

In [None]:
hyperparameters = {
      "epochs": {
          'values': [5, 10]
      },
      "num_layers": {
          'values' : [3, 4, 5]
      },
      "size_layer": {
          'values' : [32, 64, 128]
      },
      "weight_decay": {
          'values': [0, 0.0005, 0.005, 0.05]
      },
      "learning_rate":{
        'values': [0.01, 0.001, 0.0001]
      },
      "init_method": {
          'values' : ['random', 'xavier']
      },
      "optimizer": {
          'values': ['sgd', 'momentum', 'nag', 'rmsprop', 'adam', 'nadam']
      },
      "batch_size": {
          'values': [32, 64, 128, 256]
      },
      "activation": {
          'values': ['sigmoid','tanh','relu','leakyrelu']
      },
      "loss": {
          'values': ['mean_squared_error']
      }
  }

In [None]:
sweep_configuration = {
    'method': 'bayes',
    'name': 'hyperparam_tuning_for_mse',
    'metric': {
        'goal': 'maximize', 
        'name': "Val Accuracy"
        },
    'parameters': hyperparameters
}

In [None]:
sweep_id = wandb.sweep(sweep_configuration, entity=entity_name, project=project_name)

Create sweep with ID: 5e5c3jt6
Sweep URL: https://wandb.ai/sidbetala/cs6910-assignment1/sweeps/5e5c3jt6


In [None]:
sweep_id = '5e5c3jt6'

In [None]:
wandb.agent(sweep_id, sweep, project = project_name, count = 80)

[34m[1mwandb[0m: Agent Starting Run: dn9g27wv with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: Currently logged in as: [33msidbetala[0m. Use [1m`wandb login --relogin`[0m to force relogin




Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇▇█████
Train Loss,█▃▂▂▂▁▁▁▁▁
Val Accuracy,▁▄▆▆▇▇▇███
Val Loss,█▅▃▃▂▂▂▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.86544
Train Loss,0.1993
Val Accuracy,0.84863
Val Loss,0.21856


[34m[1mwandb[0m: Agent Starting Run: c8c8ml1n with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▇███
Train Loss,▁█▅▄▄
Val Accuracy,█▁▁▁▁
Val Loss,█▄▂▂▁

0,1
Epoch,5.0
Train Accuracy,0.09995
Train Loss,0.90006
Val Accuracy,0.10045
Val Loss,0.90003


[34m[1mwandb[0m: Agent Starting Run: 7altphva with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.05


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▁▂▃▄▅▅▆▇█
Train Loss,█▇▆▅▄▃▃▂▂▁
Val Accuracy,▁▂▂▃▃▄▅▆▇█
Val Loss,█▇▆▅▄▃▃▂▂▁

0,1
Epoch,10.0
Train Accuracy,0.06899
Train Loss,0.90176
Val Accuracy,0.06666
Val Loss,0.90189


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ll540ikz with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▂▅▇█
Train Loss,█▇▆▄▁
Val Accuracy,▁▃▅▆█
Val Loss,█▇▆▄▁

0,1
Epoch,5.0
Train Accuracy,0.3082
Train Loss,0.86816
Val Accuracy,0.35831
Val Loss,0.85806


[34m[1mwandb[0m: Agent Starting Run: iknlmut2 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.005


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,█▃▃▃▄▂▂▂▁▁
Train Loss,▁█▇▇▆▆▆▆▆▆
Val Accuracy,████▁▁▁▁▁▁
Val Loss,█▆▄▃▂▂▂▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.09954
Train Loss,0.90008
Val Accuracy,0.0988
Val Loss,0.89999


[34m[1mwandb[0m: Agent Starting Run: 9btosuar with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005


Model trained!


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▇███
Train Loss,█▃▂▁▁
Val Accuracy,▁█▇▆▇
Val Loss,█▁▂▂▁

0,1
Epoch,5.0
Train Accuracy,0.64666
Train Loss,0.46326
Val Accuracy,0.64129
Val Loss,0.45233


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: a6qgux5l with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆█▇▇▇▇▇▇▇
Train Loss,▁█▄▃▃▃▃▃▃▃
Val Accuracy,▁▁▁▁▁▁▁▁▁▁
Val Loss,▁▆█████▆█▆

0,1
Epoch,10.0
Train Accuracy,0.10145
Train Loss,0.90001
Val Accuracy,0.10106
Val Loss,0.9


[34m[1mwandb[0m: Agent Starting Run: ouvkai4p with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,█▃▃▁▁▁▁▁▁▁
Train Loss,▁▇▆▇██████
Val Accuracy,█▄▄▁▁▁▁▁▁▁
Val Loss,▁▄▄▅▆▆▆▆▆█

0,1
Epoch,10.0
Train Accuracy,0.10182
Train Loss,0.96032
Val Accuracy,0.10738
Val Loss,0.99394


[34m[1mwandb[0m: Agent Starting Run: v04e24el with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▅▆▆▇▇▇███
Train Loss,█▄▃▃▂▂▂▁▁▁
Val Accuracy,▁▄▃▄▅▆▆▆▆█
Val Loss,█▅▅▅▅▃▃▄▃▁

0,1
Epoch,10.0
Train Accuracy,0.90776
Train Loss,0.13858
Val Accuracy,0.88072
Val Loss,0.17879


[34m[1mwandb[0m: Agent Starting Run: yd1v6t3r with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▇████████
Train Loss,█▅▃▂▁▁▁▁▁▁
Val Accuracy,▁█████████
Val Loss,█▄▃▂▁▁▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.09991
Train Loss,0.89966
Val Accuracy,0.10073
Val Loss,0.89959


[34m[1mwandb[0m: Agent Starting Run: ohrilyc0 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇██████
Train Loss,█▄▃▂▂▂▁▁▁▁
Val Accuracy,▁▅▇▇▇▇▇▇▇█
Val Loss,█▄▃▃▂▂▂▂▂▁

0,1
Epoch,10.0
Train Accuracy,0.859
Train Loss,0.21385
Val Accuracy,0.86633
Val Loss,0.20886


[34m[1mwandb[0m: Agent Starting Run: fgjvw8tn with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


Model trained!


0,1
Epoch,▁▃▅▆█
Train Accuracy,▅▆█▃▁
Train Loss,█▁▁▁▁
Val Accuracy,█▁▆▆▆
Val Loss,█▁▁▁▁

0,1
Epoch,5.0
Train Accuracy,0.0982
Train Loss,0.9
Val Accuracy,0.10167
Val Loss,0.9


[34m[1mwandb[0m: Agent Starting Run: uhe9r85k with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.005


Model trained!


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▅▇██
Train Loss,█▆▅▃▁
Val Accuracy,▁▆▇██
Val Loss,█▆▅▃▁

0,1
Epoch,5.0
Train Accuracy,0.31531
Train Loss,0.85722
Val Accuracy,0.30417
Val Loss,0.85168


[34m[1mwandb[0m: Agent Starting Run: sxvyyfi4 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▆▇▇▇████
Train Loss,█▃▂▂▂▂▁▁▁▁
Val Accuracy,▁▅▅▆▇▇▇███
Val Loss,█▅▃▃▂▂▂▂▁▁

0,1
Epoch,10.0
Train Accuracy,0.89231
Train Loss,0.15983
Val Accuracy,0.86209
Val Loss,0.19896


[34m[1mwandb[0m: Agent Starting Run: xpft4d9a with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,█▇▁▅▄▄▂▄▄▄
Train Loss,█▂▂▁▁▁▁▁▁▁
Val Accuracy,██▁▁▁▁▁▁▁▁
Val Loss,▁█▆▅▅▅▅▄▄▄

0,1
Epoch,10.0
Train Accuracy,0.10006
Train Loss,1.01085
Val Accuracy,0.09529
Val Loss,1.01513


[34m[1mwandb[0m: Agent Starting Run: 84hv6p8p with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▁▁▂▅▆▇▇██
Train Loss,█▇▆▆▅▄▄▃▂▁
Val Accuracy,▁▁▁▃▅▆▇▇██
Val Loss,█▇▆▆▅▄▄▃▂▁

0,1
Epoch,10.0
Train Accuracy,0.21808
Train Loss,0.85978
Val Accuracy,0.22606
Val Loss,0.85585


[34m[1mwandb[0m: Agent Starting Run: yy719g33 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.05


Model trained!


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▁▁▁▁
Train Loss,█▄▂▂▁
Val Accuracy,▁▁▁▁▁
Val Loss,█▅▃▂▁

0,1
Epoch,5.0
Train Accuracy,0.09975
Train Loss,0.90762
Val Accuracy,0.10103
Val Loss,0.90574


[34m[1mwandb[0m: Agent Starting Run: bnhkgv8h with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▁▁▁▁
Train Loss,█▃▂▁▁
Val Accuracy,▁▁▁▁▁
Val Loss,█▄▂▁▁

0,1
Epoch,5.0
Train Accuracy,0.10036
Train Loss,0.90148
Val Accuracy,0.0967
Val Loss,0.90112


[34m[1mwandb[0m: Agent Starting Run: b4opdzik with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▂▄▅▆▇▇▇██
Train Loss,█▇▇▆▅▄▄▃▂▁
Val Accuracy,▁▂▄▅▆▇▇▇██
Val Loss,█▇▇▆▅▄▄▃▂▁

0,1
Epoch,10.0
Train Accuracy,0.64552
Train Loss,0.56466
Val Accuracy,0.66092
Val Loss,0.54661


[34m[1mwandb[0m: Agent Starting Run: iuhf9uzp with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.05


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▄█▄▄
Train Loss,█▁▁▁▁
Val Accuracy,█▁▁▁▁
Val Loss,█▁▁▁▁

0,1
Epoch,5.0
Train Accuracy,0.10053
Train Loss,0.90005
Val Accuracy,0.0989
Val Loss,0.90009


[34m[1mwandb[0m: Agent Starting Run: 1pefn256 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▅▆▇▇▇████
Train Loss,█▄▃▂▂▂▁▁▁▁
Val Accuracy,▁▁▄▅▆▇▇▇▇█
Val Loss,██▅▄▂▂▂▂▂▁

0,1
Epoch,10.0
Train Accuracy,0.8771
Train Loss,0.18321
Val Accuracy,0.86591
Val Loss,0.19427


[34m[1mwandb[0m: Agent Starting Run: f7navd5s with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇▇█████
Train Loss,█▃▂▂▂▁▁▁▁▁
Val Accuracy,▁▄▅▆▆▇▇███
Val Loss,█▅▃▃▂▂▂▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.87234
Train Loss,0.19267
Val Accuracy,0.8664
Val Loss,0.19955


[34m[1mwandb[0m: Agent Starting Run: t4rkbh1g with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▃▆▁▆▅▇████
Train Loss,█▂▁▁▁▁▁▁▁▁
Val Accuracy,▁█████████
Val Loss,█▄▃▂▂▁▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.09791
Train Loss,0.90022
Val Accuracy,0.09781
Val Loss,0.90019


[34m[1mwandb[0m: Agent Starting Run: lhoskr3d with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.005


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▂▃▄▅▅▆▆▇█
Train Loss,█▆▅▄▄▃▂▂▁▁
Val Accuracy,▁▂▃▄▄▅▅▆▇█
Val Loss,█▇▆▅▄▃▂▂▁▁

0,1
Epoch,10.0
Train Accuracy,0.17951
Train Loss,0.89277
Val Accuracy,0.18899
Val Loss,0.89233


[34m[1mwandb[0m: Agent Starting Run: g49utrew with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▆▇▇▇████
Train Loss,█▃▃▂▂▂▁▁▁▁
Val Accuracy,▁▄▄▆▆▇▇▇██
Val Loss,█▆▅▃▂▂▂▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.88604
Train Loss,0.17124
Val Accuracy,0.87665
Val Loss,0.17883


[34m[1mwandb[0m: Agent Starting Run: o0igpqx8 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁██▄▃▂▂▂▁▁
Train Loss,█▂▂▂▁▁▁▁▁▁
Val Accuracy,▁▁▁▁▁▁▁▁▁▁
Val Loss,█▄▂▂▁▁▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.10012
Train Loss,0.90028
Val Accuracy,0.10086
Val Loss,0.90024


[34m[1mwandb[0m: Agent Starting Run: 75fzin51 with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇▇█████
Train Loss,█▃▂▂▂▂▁▁▁▁
Val Accuracy,▁▄▅▆▇▇████
Val Loss,█▅▄▃▃▂▂▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.87243
Train Loss,0.19084
Val Accuracy,0.8579
Val Loss,0.20702


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: lz0eylbz with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.0005


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▁▁▁▁▁▁▁▁▁
Train Loss,█▇▆▅▄▄▃▂▂▁
Val Accuracy,▁▁▁▁▁▁▁▁▁▁
Val Loss,█▇▆▅▄▄▃▂▂▁

0,1
Epoch,10.0
Train Accuracy,0.09962
Train Loss,0.91418
Val Accuracy,0.103
Val Loss,0.91343


[34m[1mwandb[0m: Agent Starting Run: vsxw1u86 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▅▆▆▇▇▇███
Train Loss,█▄▃▃▂▂▂▁▁▁
Val Accuracy,▁▂▅▇▆▆█▇██
Val Loss,█▇▄▂▃▂▁▂▂▁

0,1
Epoch,10.0
Train Accuracy,0.90934
Train Loss,0.13631
Val Accuracy,0.88566
Val Loss,0.16305


[34m[1mwandb[0m: Agent Starting Run: xy0rc3vs with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▆▇▇▇████
Train Loss,█▄▃▂▂▂▁▁▁▁
Val Accuracy,▁▃▃▅▇▆▇▇██
Val Loss,█▆▆▄▂▃▂▂▁▁

0,1
Epoch,10.0
Train Accuracy,0.87075
Train Loss,0.19061
Val Accuracy,0.85605
Val Loss,0.20353


[34m[1mwandb[0m: Agent Starting Run: rujbs9o9 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▅▆▇▇▇▇███
Train Loss,█▄▃▂▂▂▂▁▁▁
Val Accuracy,▁▄▅▇▇█████
Val Loss,█▅▃▂▂▁▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.91629
Train Loss,0.1266
Val Accuracy,0.88942
Val Loss,0.16301


[34m[1mwandb[0m: Agent Starting Run: kqtei09t with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇██████
Train Loss,█▃▂▂▁▁▁▁▁▁
Val Accuracy,▁▆▇▇▇█████
Val Loss,█▄▃▂▂▂▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.86653
Train Loss,0.19507
Val Accuracy,0.86714
Val Loss,0.19241


[34m[1mwandb[0m: Agent Starting Run: nsdjwcua with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.005


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁█████████
Train Loss,█▁▁▁▁▁▁▁▁▁
Val Accuracy,▁▁▁▁▁▁▁▁▁▁
Val Loss,▁▁▁▁▁▁▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.09903
Train Loss,0.90006
Val Accuracy,0.10189
Val Loss,0.9


[34m[1mwandb[0m: Agent Starting Run: d305ympx with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇██████
Train Loss,█▃▂▂▁▁▁▁▁▁
Val Accuracy,▁▇▇███████
Val Loss,█▃▂▂▁▁▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.86129
Train Loss,0.20555
Val Accuracy,0.85798
Val Loss,0.20552


[34m[1mwandb[0m: Agent Starting Run: tcxa8oie with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇██████
Train Loss,█▃▂▂▁▁▁▁▁▁
Val Accuracy,▁▆▇▇██████
Val Loss,█▄▂▂▂▁▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.87216
Train Loss,0.18904
Val Accuracy,0.86671
Val Loss,0.18995


[34m[1mwandb[0m: Agent Starting Run: xwpqxijp with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▂▃▃▄▅▆▇██
Train Loss,███▇▆▅▃▂▁▁
Val Accuracy,▁▂▃▃▄▆▇▇██
Val Loss,██▇▇▆▄▃▂▁▁

0,1
Epoch,10.0
Train Accuracy,0.75922
Train Loss,0.36093
Val Accuracy,0.75591
Val Loss,0.35964


[34m[1mwandb[0m: Agent Starting Run: 3nilg94u with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▁▂▃▄▅▅▆▇█
Train Loss,█▇▆▅▅▄▃▃▂▁
Val Accuracy,▁▂▃▄▄▅▅▆▇█
Val Loss,█▇▆▆▅▄▄▃▂▁

0,1
Epoch,10.0
Train Accuracy,0.34004
Train Loss,0.88171
Val Accuracy,0.36298
Val Loss,0.87914


[34m[1mwandb[0m: Agent Starting Run: thq2p7uf with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▅▆▇▇▇████
Train Loss,█▄▃▂▂▂▂▁▁▁
Val Accuracy,▁▅▆▆▆█▇███
Val Loss,█▅▃▄▃▁▂▂▁▂

0,1
Epoch,10.0
Train Accuracy,0.89215
Train Loss,0.15925
Val Accuracy,0.86427
Val Loss,0.19866


[34m[1mwandb[0m: Agent Starting Run: 9lcww6aw with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇▇█████
Train Loss,█▃▂▂▂▂▁▁▁▁
Val Accuracy,▁▃▄▅▆▆▇▇██
Val Loss,█▅▄▃▃▂▂▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.89018
Train Loss,0.16328
Val Accuracy,0.87237
Val Loss,0.18301


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: v2wefjs1 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▂▃▄▆▇▇███
Train Loss,███▇▄▃▂▁▁▁
Val Accuracy,▁▃▃▅▇▇████
Val Loss,██▇▆▃▂▂▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.79518
Train Loss,0.28641
Val Accuracy,0.78801
Val Loss,0.29256


[34m[1mwandb[0m: Agent Starting Run: f2dcejnh with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇▇█████
Train Loss,█▃▂▂▂▁▁▁▁▁
Val Accuracy,▁▃▆▆▇█▇███
Val Loss,█▇▃▄▂▁▂▁▂▁

0,1
Epoch,10.0
Train Accuracy,0.86555
Train Loss,0.19812
Val Accuracy,0.87116
Val Loss,0.19022


[34m[1mwandb[0m: Agent Starting Run: oof114q8 with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇▇▇████
Train Loss,█▃▃▂▂▂▁▁▁▁
Val Accuracy,▁▄▄▆▇▇▇███
Val Loss,█▆▅▃▂▂▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.87278
Train Loss,0.18642
Val Accuracy,0.8693
Val Loss,0.18816


[34m[1mwandb[0m: Agent Starting Run: u9r01cci with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇▇▇████
Train Loss,█▃▂▂▂▂▁▁▁▁
Val Accuracy,▁▄▄▇█▇████
Val Loss,█▅▄▃▂▂▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.88857
Train Loss,0.16385
Val Accuracy,0.87556
Val Loss,0.17982


[34m[1mwandb[0m: Agent Starting Run: u0jgkp0g with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇▇█████
Train Loss,█▃▂▂▂▁▁▁▁▁
Val Accuracy,▁▄▅▆▇▇▇███
Val Loss,█▅▃▃▂▂▂▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.88463
Train Loss,0.17197
Val Accuracy,0.86428
Val Loss,0.19381


[34m[1mwandb[0m: Agent Starting Run: xrpfd0sy with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇▇█████
Train Loss,█▃▂▂▂▁▁▁▁▁
Val Accuracy,▁▅▅▆▆▇█▇██
Val Loss,█▅▄▃▂▂▁▂▁▁

0,1
Epoch,10.0
Train Accuracy,0.88983
Train Loss,0.16375
Val Accuracy,0.87118
Val Loss,0.18745


[34m[1mwandb[0m: Agent Starting Run: nudlxy0z with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▅▆▇▇▇████
Train Loss,█▄▃▂▂▂▁▁▁▁
Val Accuracy,▁▄▃▆▄▆▇█▆▇
Val Loss,█▅▆▃▄▃▁▂▃▃

0,1
Epoch,10.0
Train Accuracy,0.90833
Train Loss,0.13758
Val Accuracy,0.87837
Val Loss,0.18316


[34m[1mwandb[0m: Agent Starting Run: 6keewyqo with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▂▃▄▄▄▅▆▇█
Train Loss,████▇▆▆▄▃▁
Val Accuracy,▁▂▃▃▃▄▅▆▇█
Val Loss,███▇▇▆▅▄▂▁

0,1
Epoch,10.0
Train Accuracy,0.62241
Train Loss,0.50475
Val Accuracy,0.65369
Val Loss,0.47617


[34m[1mwandb[0m: Agent Starting Run: uxpbv1jl with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇▇▇████
Train Loss,█▃▂▂▂▂▁▁▁▁
Val Accuracy,▁▄▅▆▇▇▇███
Val Loss,█▆▄▃▂▂▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.89051
Train Loss,0.16258
Val Accuracy,0.87927
Val Loss,0.1743


[34m[1mwandb[0m: Agent Starting Run: 4uud0wwi with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▅▆▇▇▇████
Train Loss,█▄▃▂▂▂▁▁▁▁
Val Accuracy,▁▄▅▆▇▇▇███
Val Loss,█▅▄▃▃▂▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.90961
Train Loss,0.13684
Val Accuracy,0.88874
Val Loss,0.16283


[34m[1mwandb[0m: Agent Starting Run: c4w07z3o with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▁▃▄▄▄▄▅▆█
Train Loss,██▇▇▆▆▅▄▃▁
Val Accuracy,▁▂▃▄▃▄▄▅▆█
Val Loss,██▇▇▆▆▅▄▃▁

0,1
Epoch,10.0
Train Accuracy,0.39503
Train Loss,0.78067
Val Accuracy,0.47547
Val Loss,0.75732


[34m[1mwandb[0m: Agent Starting Run: oifv4tjq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇▇█████
Train Loss,█▃▂▂▂▁▁▁▁▁
Val Accuracy,▁▃▅▅▆▆▇▇▇█
Val Loss,█▅▄▃▃▂▂▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.87643
Train Loss,0.18198
Val Accuracy,0.87044
Val Loss,0.19048


[34m[1mwandb[0m: Agent Starting Run: hxcac7sy with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▅▆▇▇▇▇███
Train Loss,█▄▃▃▂▂▁▁▁▁
Val Accuracy,▁▃▅▅█▆▇▇▇█
Val Loss,██▅▄▂▃▂▂▂▁

0,1
Epoch,10.0
Train Accuracy,0.90266
Train Loss,0.14326
Val Accuracy,0.87234
Val Loss,0.1887


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: oa0i3hg8 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▅▆▆▇▇█▇█▇
Train Loss,█▄▃▃▂▂▁▂▁▂
Val Accuracy,▁▃▇▆▆▇██▇█
Val Loss,█▆▃▅▃▃▁▂▂▁

0,1
Epoch,10.0
Train Accuracy,0.78596
Train Loss,0.30684
Val Accuracy,0.79688
Val Loss,0.29621


[34m[1mwandb[0m: Agent Starting Run: h6b5tlvs with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▇▇▇██████
Train Loss,█▃▂▂▁▁▁▁▁▁
Val Accuracy,▁▆▇▇██████
Val Loss,█▃▂▂▁▁▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.86242
Train Loss,0.20129
Val Accuracy,0.84396
Val Loss,0.223


[34m[1mwandb[0m: Agent Starting Run: eucbnoc7 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▆▆▁▅██████
Train Loss,█▆▅▄▃▂▂▁▁▁
Val Accuracy,██▁▁▁▁▁▁▁▁
Val Loss,█▆▅▄▃▂▂▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.10016
Train Loss,0.9005
Val Accuracy,0.09928
Val Loss,0.9004


[34m[1mwandb[0m: Agent Starting Run: qtzjp30m with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇▇█████
Train Loss,█▃▂▂▂▁▁▁▁▁
Val Accuracy,▁▃▅▆▇▇▇███
Val Loss,█▅▄▃▂▂▂▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.85986
Train Loss,0.20832
Val Accuracy,0.8446
Val Loss,0.22507


[34m[1mwandb[0m: Agent Starting Run: hpuqcgk9 with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▆▇▇▇████
Train Loss,█▄▃▂▂▂▁▁▁▁
Val Accuracy,▁▃▅▆▇▇▇▇██
Val Loss,█▆▄▃▂▁▂▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.87978
Train Loss,0.17502
Val Accuracy,0.86742
Val Loss,0.19195


[34m[1mwandb[0m: Agent Starting Run: ojnwev7w with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,█▄▁▁▁▁▁▁▁▁
Train Loss,▁▅████████
Val Accuracy,█▁▁▁▁▁▁▁▁▁
Val Loss,▁█████████

0,1
Epoch,10.0
Train Accuracy,0.09999
Train Loss,1.80002
Val Accuracy,0.1004
Val Loss,1.7992


[34m[1mwandb[0m: Agent Starting Run: 53jvpy7w with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▄▇▇██▇▇██
Train Loss,█▆▂▂▂▁▂▂▁▁
Val Accuracy,▁▅▇▅▆██▅█▇
Val Loss,█▄▂▄▃▁▁▅▁▃

0,1
Epoch,10.0
Train Accuracy,0.86047
Train Loss,0.20911
Val Accuracy,0.84793
Val Loss,0.22635


[34m[1mwandb[0m: Agent Starting Run: 452qdllu with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▆▇▇▇████
Train Loss,█▄▃▂▂▂▁▁▁▁
Val Accuracy,▁▃▄▄▆▆▇███
Val Loss,█▅▄▄▃▃▂▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.87346
Train Loss,0.18501
Val Accuracy,0.86031
Val Loss,0.20084


[34m[1mwandb[0m: Agent Starting Run: mw2ljkkm with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▇▇▇██████
Train Loss,█▃▂▂▁▁▁▁▁▁
Val Accuracy,▁▆▇▇▇█████
Val Loss,█▃▃▂▂▂▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.87915
Train Loss,0.17958
Val Accuracy,0.8702
Val Loss,0.18715


[34m[1mwandb[0m: Agent Starting Run: 8hwgps1q with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▅▆▇▇▇████
Train Loss,█▄▃▂▂▂▁▁▁▁
Val Accuracy,▁▆▇▇█▇████
Val Loss,█▄▂▂▁▂▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.90698
Train Loss,0.13896
Val Accuracy,0.88444
Val Loss,0.16901


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: tdtqsrqi with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇▇█████
Train Loss,█▃▂▂▂▁▁▁▁▁
Val Accuracy,▁▅▅▆▇▇▇███
Val Loss,█▅▄▃▂▂▂▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.8869
Train Loss,0.17022
Val Accuracy,0.86444
Val Loss,0.19016


[34m[1mwandb[0m: Agent Starting Run: 1ot8z6m1 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇██████
Train Loss,█▃▂▂▁▁▁▁▁▁
Val Accuracy,▁▅▆▇▇▇▇██▇
Val Loss,█▄▃▂▂▂▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.8661
Train Loss,0.20045
Val Accuracy,0.85847
Val Loss,0.20887


[34m[1mwandb[0m: Agent Starting Run: 6n3up4es with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇▇▇████
Train Loss,█▃▂▂▂▂▁▁▁▁
Val Accuracy,▁▄▅▆▇▇▇███
Val Loss,█▅▄▃▂▂▂▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.87401
Train Loss,0.18383
Val Accuracy,0.86339
Val Loss,0.19517


[34m[1mwandb[0m: Agent Starting Run: pglc5ry0 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇██████
Train Loss,█▃▂▂▂▁▁▁▁▁
Val Accuracy,▁▅▆▇▇▇▇█▇█
Val Loss,█▃▂▂▂▂▂▁▃▁

0,1
Epoch,10.0
Train Accuracy,0.88096
Train Loss,0.17751
Val Accuracy,0.8653
Val Loss,0.20122


[34m[1mwandb[0m: Agent Starting Run: j5q3utk7 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▅▆▇▇▇▇███
Train Loss,█▄▃▂▂▂▂▁▁▁
Val Accuracy,▁▄▄▄▅▅▆▆▇█
Val Loss,█▅▅▆▅▄▃▄▁▁

0,1
Epoch,10.0
Train Accuracy,0.90253
Train Loss,0.14616
Val Accuracy,0.88338
Val Loss,0.1709


[34m[1mwandb[0m: Agent Starting Run: iy1jhcdh with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▇▇▇██████
Train Loss,█▃▂▂▁▁▁▁▁▁
Val Accuracy,▁▄▅▆▇▇████
Val Loss,█▅▄▃▂▂▂▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.86058
Train Loss,0.20605
Val Accuracy,0.84824
Val Loss,0.22284


[34m[1mwandb[0m: Agent Starting Run: 3t8o9x2h with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▆▇▇▇▇███
Train Loss,█▄▃▂▂▂▂▁▁▁
Val Accuracy,▁▄▅▆▇▇█▆▇█
Val Loss,█▅▄▃▂▁▁▂▂▁

0,1
Epoch,10.0
Train Accuracy,0.88036
Train Loss,0.17745
Val Accuracy,0.86196
Val Loss,0.20264


[34m[1mwandb[0m: Agent Starting Run: cm9s1r0e with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▆▇▇▇████
Train Loss,█▃▃▂▂▂▁▁▁▁
Val Accuracy,▁▅▆▆▇▇▇███
Val Loss,█▄▃▂▂▂▂▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.89905
Train Loss,0.15006
Val Accuracy,0.88693
Val Loss,0.1645


[34m[1mwandb[0m: Agent Starting Run: 4ixc2whj with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇▇█████
Train Loss,█▃▂▂▂▂▁▁▁▁
Val Accuracy,▁▆▇▇▇▇██▇▇
Val Loss,█▄▂▃▂▂▁▁▂▂

0,1
Epoch,10.0
Train Accuracy,0.84962
Train Loss,0.22461
Val Accuracy,0.82984
Val Loss,0.24045


[34m[1mwandb[0m: Agent Starting Run: ylcw7uow with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▅▆▇▇▇████
Train Loss,█▄▃▂▂▂▁▁▁▁
Val Accuracy,▁▅▅▄▅▇▆▇██
Val Loss,█▄▃▄▃▂▂▂▁▁

0,1
Epoch,10.0
Train Accuracy,0.89504
Train Loss,0.15467
Val Accuracy,0.87963
Val Loss,0.17805


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 59r8ul2t with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▅▆▇▇▇▇███
Train Loss,█▄▃▂▂▂▂▁▁▁
Val Accuracy,▁▄▄▅▅▆▇▆▆█
Val Loss,█▄▃▃▃▃▂▂▂▁

0,1
Epoch,10.0
Train Accuracy,0.91037
Train Loss,0.13629
Val Accuracy,0.87863
Val Loss,0.17685


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7kq31l54 with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▇▇▇█████
Train Loss,█▃▃▂▂▂▁▁▁▁
Val Accuracy,▁▅▇▇██████
Val Loss,█▄▃▂▂▁▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.87256
Train Loss,0.18699
Val Accuracy,0.85884
Val Loss,0.2026


[34m[1mwandb[0m: Agent Starting Run: pn3mmkyx with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▆▇██
Train Loss,█▃▂▁▁
Val Accuracy,▁▅▇▇█
Val Loss,█▄▂▂▁

0,1
Epoch,5.0
Train Accuracy,0.89139
Train Loss,0.16029
Val Accuracy,0.87236
Val Loss,0.18343


[34m[1mwandb[0m: Agent Starting Run: 55e6j3y9 with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▆▆▇▇▇████
Train Loss,█▃▃▂▂▂▁▁▁▁
Val Accuracy,▁▄▅▆▇▇▇█▇▇
Val Loss,█▅▄▃▂▁▁▁▁▂

0,1
Epoch,10.0
Train Accuracy,0.90699
Train Loss,0.13994
Val Accuracy,0.87665
Val Loss,0.18154


[34m[1mwandb[0m: Agent Starting Run: ssq812qn with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▅▆▆▇▇▇███
Train Loss,█▄▃▃▂▂▂▁▁▁
Val Accuracy,▁▅▅▆▇▇███▇
Val Loss,█▅▄▃▂▂▂▁▁▂

0,1
Epoch,10.0
Train Accuracy,0.91935
Train Loss,0.12361
Val Accuracy,0.8846
Val Loss,0.17347


[34m[1mwandb[0m: Agent Starting Run: re9fyhlq with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_method: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▅▆▇▇▇▇███
Train Loss,█▄▃▂▂▂▂▁▁▁
Val Accuracy,▁▅▅▅▅▆▇▇██
Val Loss,█▄▄▄▃▂▂▁▁▁

0,1
Epoch,10.0
Train Accuracy,0.91034
Train Loss,0.13647
Val Accuracy,0.88735
Val Loss,0.16675


[34m[1mwandb[0m: Agent Starting Run: bzizrba4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▇██▅
Train Loss,█▂▁▁▇
Val Accuracy,▁▇█▂▇
Val Loss,▅▂▁█▂

0,1
Epoch,5.0
Train Accuracy,0.69962
Train Loss,0.46025
Val Accuracy,0.76961
Val Loss,0.32867


[34m[1mwandb[0m: Agent Starting Run: co40rk0w with config:
[34m[1mwandb[0m: 	activation: leakyrelu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	init_method: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss: mean_squared_error
[34m[1mwandb[0m: 	num_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0


Model trained!


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▇▇██
Train Loss,█▃▂▁▁
Val Accuracy,▁▆▆▇█
Val Loss,█▃▃▂▁

0,1
Epoch,5.0
Train Accuracy,0.87343
Train Loss,0.18597
Val Accuracy,0.86228
Val Loss,0.20237
