<a href="https://colab.research.google.com/github/pratik-kadlak/Weights-Biases/blob/main/dl_assignment1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
!pip install wandb



In [9]:
from keras.datasets import fashion_mnist
import wandb
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

In [10]:
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
k = len(class_names)

## Question 1

In [11]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mkadlakpratik[0m ([33mspace_monkeys[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [12]:
def plot_sample_image_of_each_class():
    """
        plots 1 image of each class in the training data
    """

    (X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

    wandb.init(
        project="DL_Assignment_1"
    )

    labels = []
    images = []

    for i in range(len(X_train)):
        if class_names[y_train[i]] not in labels:
            labels.append(class_names[y_train[i]])
            images.append(np.array(X_train[i]))
        if(len(labels) == k):
            break

    wandb.log({"Sample image for each class ": [wandb.Image(img, caption=caption) for img, caption in zip(images, labels)]})
    wandb.finish()

    # num_rows = 2
    # num_cols = 5

    # fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 5))

    # for i in range(len(images)):

    #     #plotting 1 image from each class in wandb
    #     wandb.log({"Sample image for each class ": [wandb.Image(img, caption=caption) for img, caption in zip(images, labels)]})

    #     row_idx = i // num_cols
    #     col_idx = i % num_cols

    #     axes[row_idx, col_idx].axis("off")
    #     axes[row_idx, col_idx].imshow(images[i], cmap="gray")
    #     axes[row_idx, col_idx].set_title(labels[i])

    # plt.show()


# plot_sample_image_of_each_class()

##Question 2 & Question 3

In [13]:
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
k = len(class_names)

# loading the data
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

#flattening the images, originally images is of size 28x28, converting it to 784x1
X_train  = X_train.reshape(X_train.shape[0], X_train.shape[1] * X_train.shape[2])
X_train = np.array(X_train)/255.0

X_test = X_test.reshape(X_test.shape[0], X_test.shape[1] * X_test.shape[2])
X_test = np.array(X_test)/255.0

y_train = np.array(y_train)
y_test = np.array(y_test)

In [14]:
def initialize_Wandb(neurons_per_layer, method):
    """
        initializes weights and bias by the given method of initialization
    """

    W = []
    b = []
    np.random.seed(42)

    for l in range(len(neurons_per_layer)-1):
        if method == "random_uniform":
            W.append(np.random.uniform(-0.7, 0.7, (neurons_per_layer[l+1], neurons_per_layer[l])))
            b.append(np.random.uniform(-0.7, 0.7, (neurons_per_layer[l+1],1)))
        elif method == "xavier":
            W.append(np.random.randn(neurons_per_layer[l+1],neurons_per_layer[l])*np.sqrt(6/(neurons_per_layer[l+1]+neurons_per_layer[l])))
            b.append(np.zeros((neurons_per_layer[l+1], 1)))
        else:
            W.append(np.random.randn(neurons_per_layer[l+1], neurons_per_layer[l]) * 0.001)
            b.append((np.random.randn(neurons_per_layer[l+1],1)) * 0.001)

    return W, b

In [15]:
# activation func and their derivatives

def sigmoid(x):
    return 1.0/(1.+np.exp(-x))


def sigmoid_derivative(x):
    return sigmoid(x)*(1-sigmoid(x))


def relu(x):
    return np.maximum(0, x)


def relu_derivative(x):
    return np.where(x > 0, 1, 0)


def tanh(x):
    return np.tanh(x)


def tanh_derivative(x):
    return 1 - np.tanh(x)**2

In [16]:
# loss functions

def cross_entropy(y, y_hat, W, weight_decay):
    loss = 0
    for i in range(len(y)):
        for j in range(len(y[i])):
            loss += -1.0 * y[i][j] * np.log(y_hat[i][j])

    reg = 0
    for i in range(len(W)):
        reg += np.sum(np.square(W[i]))

    regularized_loss = loss + weight_decay * reg
    return regularized_loss


def mean_square_error(y, y_hat, W, weight_decay):
    loss = 0.5 * np.sum(np.square(y-y_hat))
    reg = 0
    for i in range(len(W)):
        reg += np.sum(np.square(W[i]))

    regularized_loss = loss + weight_decay * reg
    return regularized_loss


In [17]:
# output functions

def softmax(a):
    return np.exp(a)/np.sum(np.exp(a), axis=0)


# for back prop if loss func is mse
def softmax_derivative(a):
    return softmax(a)*(1-softmax(a))

In [18]:
def evaluate_model(W, b, X, y, num_hidden_layers, activation_func, weight_decay, loss_func):
    """
        calculates loss and accuracy of the model
    """
    y_hat, activation, preactivation = forward_propogation(W, b, X, num_hidden_layers, activation_func)
    y_pred = []
    for i in range(len(y_hat[0])):
        y_pred.append(np.argmax(y_hat[:,i]))

    acc = 0
    for i in range(len(y)):
        if y[i] == y_pred[i]:
            acc += 1

    acc = (acc * 100) / len(y)

    y_one_hot = generate_one_hot_matrix(len(y), y)

    if loss_func == "cross_entropy":
        loss = cross_entropy(y_one_hot, y_hat, W, weight_decay)
    else:
        loss = cross_entropy(y_one_hot, y_hat, W, weight_decay)

    return acc, loss/len(y)

In [19]:
def forward_propogation(W, b, X, num_hidden_layers, activation_func):
    """
        does one forward pass of the data with the current weights and biases
    """

    preactivation = []
    activation = []

    preactivation.append(X.T)
    if activation_func == "sigmoid": activation.append(sigmoid(X.T))
    elif activation_func == "relu": activation.append(relu(X.T))
    else: activation.append(tanh(X.T))


    for i in range(1, num_hidden_layers+1):
        preactivation.append(np.matmul(W[i-1], activation[(i-1)]) + b[i-1])
        if activation_func == "sigmoid":
            activation.append(sigmoid(preactivation[i]))
        elif activation_func == "relu":
            activation.append(relu(preactivation[i]))
        elif activation_func == "tanh":
            activation.append(tanh(preactivation[i]))

    preactivation.append(np.dot(W[-1], activation[-1]) + b[-1])
    activation.append(softmax(preactivation[-1]))
    y_hat = activation[-1]
    return y_hat, activation, preactivation

In [20]:
def generate_one_hot_matrix(batch_size, y):
    '''
        generates one hot matrix, where the ith col gives the one hot vector for the ith image
        and in that vector only the row number of true class will be 1 and rest will be zero
    '''
    y_one_hot = np.zeros((10,batch_size))
    for i in range(batch_size):
        y_one_hot[y[i]][i] = 1
    return y_one_hot

In [21]:
# update rules for various types of gradient descent

def update_parameters(W, grad_W, b, grad_b, eta):
    """
    normal gradient descent
    """
    for i in range(0, len(W)):
        W[i] = W[i] - eta * grad_W[i]
        b[i] = b[i] - eta * grad_b[i]

    return W, b


def update_parameters_mgd(W, grad_W, b, grad_b, eta, beta, W_history, b_history):
    """
    momentum based gradient descent
    """
    for t in range(len(W)):
        W_history[t] = beta * W_history[t] + grad_W[t]
        b_history[t] = beta * b_history[t] + grad_b[t]
    for i in range(len(W)):
        W[i] = W[i] - eta * W_history[i]
        b[i] = b[i] - eta * b_history[i]

    return W, b, W_history, b_history


def update_parameters_nag(W, W_history, b, b_history, eta, beta):
    """
        nesterov accelerated gradient descent
    """
    for i in range(len(W)):
        W_history[i] = beta * W_history[i]
        b_history[i] = beta * b_history[i]

    for i in range(len(W)):
        W[i] = W[i] - eta * W_history[i]
        b[i] = b[i] - eta * b_history[i]

    return W, b, W_history, b_history


def update_parameters_rmsprop(W, grad_W, b, grad_b, vt_W, vt_b, eta, beta, epsilon):
    """
        rmsprop gradient descent
    """
    for i in range(len(grad_W)):
        vt_W[i] = beta * vt_W[i] + (1 - beta) * np.square(grad_W[i])
        vt_b[i] = beta * vt_b[i] + (1 - beta) * np.square(grad_b[i])

    for i in range(len(W)):
        W[i] = W[i] - (eta/np.sqrt(vt_W[i]+epsilon)) * grad_W[i]
        b[i] = b[i] - (eta/np.sqrt(vt_b[i]+epsilon)) * grad_b[i]

    return W, b, vt_W, vt_b


def update_parameters_adam(W, grad_W, vt_W, mt_W, b, grad_b, vt_b, mt_b, t, eta, beta1, beta2, epsilon):
    for i in range(len(W)):
        curr_mt_W = beta1 * mt_W[i] + (1 - beta1) * grad_W[i]
        curr_mt_b = beta1 * mt_b[i] + (1 - beta1) * grad_b[i]

        curr_vt_W = beta2 * vt_W[i] + (1 - beta2) * np.square(grad_W[i])
        curr_vt_b = beta2 * vt_b[i] + (1 - beta2) * np.square(grad_b[i])

        mt_W_hat = curr_mt_W / (1.0 - beta1**t)
        mt_b_hat = curr_mt_b / (1.0 - beta1**t)

        vt_W_hat = curr_vt_W / (1.0 - beta2**t)
        vt_b_hat = curr_vt_b / (1.0 - beta2**t)

        # saving for the next iteration
        mt_W[i] = curr_mt_W
        mt_b[i] = curr_mt_b
        vt_W[i] = curr_vt_W
        vt_b[i] = curr_vt_b

        # updating the parameters
        W[i] = W[i] - (eta/(np.sqrt(vt_W_hat) + epsilon)) * mt_W_hat
        b[i] = b[i] - (eta/(np.sqrt(vt_b_hat) + epsilon)) * mt_b_hat

    return W, b, vt_W, vt_b, mt_W, mt_b


def update_parameters_nadam(W, grad_W, vt_W, mt_W, b, grad_b, vt_b, mt_b, t, eta, beta1, beta2, epsilon):
    for i in range(len(W)):
        curr_mt_W = beta1 * mt_W[i] + (1 - beta1) * grad_W[i]
        curr_mt_b = beta1 * mt_b[i] + (1 - beta1) * grad_b[i]

        curr_vt_W = beta2 * vt_W[i] + (1 - beta2) * np.square(grad_W[i])
        curr_vt_b = beta2 * vt_b[i] + (1 - beta2) * np.square(grad_b[i])

        mt_W_hat = curr_mt_W / (1.0 - beta1**t)
        mt_b_hat = curr_mt_b / (1.0 - beta1**t)

        vt_W_hat = curr_vt_W / (1.0 - beta2**t)
        vt_b_hat = curr_vt_b / (1.0 - beta2**t)

        # saving for the next iteration
        mt_W[i] = curr_mt_W
        mt_b[i] = curr_mt_b
        vt_W[i] = curr_vt_W
        vt_b[i] = curr_vt_b

        # updating the parameters
        W[i] = W[i] - (eta/(np.sqrt(vt_W_hat) + epsilon)) * ((1-beta1)*grad_W[i]/(1-beta1**t))
        b[i] = b[i] - (eta/(np.sqrt(vt_b_hat) + epsilon)) * ((1-beta1)*grad_b[i]/(1-beta1**t))

    return W, b, vt_W, vt_b, mt_W, mt_b

In [22]:
def backward_propogation(W, b, y_one_hot, activation, preactivation, L, activation_func, loss_func):
    """
        back_propogation algorithm for updating the parameters
    """

    grad_preactivation = []

    # grad with respect to output units
    if loss_func == 'cross_entropy':
        grad_preactivation.append(activation[L]-y_one_hot)
    else:
        grad_preactivation.append(activation[L]-y_one_hot)

    grad_W = []
    grad_b = []

    for i in range(L, 0, -1):
        # grad with respect to weights and biases
        grad_W.append(np.matmul(grad_preactivation[-1], activation[i-1].T))
        grad_b.append(np.sum(grad_preactivation[-1], axis=1, keepdims=True))

        if i == 1:
            break

        # grad with respect to hidden units
        grad_hi = np.matmul(W[i-1].T, grad_preactivation[-1])

        if activation_func == "sigmoid":
            grad_preactivation.append(np.multiply(grad_hi, sigmoid_derivative(preactivation[i-1])))
        elif activation_func == "relu":
            grad_preactivation.append(np.multiply(grad_hi, relu_derivative(preactivation[i-1])))
        elif activation_func == "tanh":
            grad_preactivation.append(np.multiply(grad_hi, tanh_derivative(preactivation[i-1])))

    return grad_W[::-1], grad_b[::-1]

In [23]:
def split_train_val_data(X, y, validation_percent=0.1, random_seed=None):
    """
    Split the data into training and validation sets.
    """
    if random_seed is not None:
        np.random.seed(random_seed)

    num_samples = len(X)
    num_val_samples = int(validation_percent * num_samples)

    # Randomly shuffle indices
    indices = np.arange(num_samples)
    np.random.shuffle(indices)

    # Split data based on shuffled indices
    val_indices = indices[:num_val_samples]
    train_indices = indices[num_val_samples:]

    X_train, X_val = X[train_indices], X[val_indices]
    y_train, y_val = y[train_indices], y[val_indices]

    return X_train, X_val, y_train, y_val

In [24]:
def gradient_descent(X_train, X_val, y_train, y_val, config):
    # # setting hyper parameters
    num_hidden_layers = config.num_hidden_layers
    size_of_hidden_layer = config.size_of_hidden_layer
    activation_func = config.activation_func
    loss_func = "cross_entropy"
    method = config.method
    num_images = len(X_train)
    batch_size = config.batch_size
    epoch = config.epochs
    eta = config.eta
    weight_decay = config.weight_decay
    optimizer = config.optimizer

    # used for momentum
    beta = 0.5
    W_history = [0] * (num_hidden_layers+1)
    b_history = [0] * (num_hidden_layers+1)

    # used for rmsprop
    vt_W = [0] * (num_hidden_layers+1)
    vt_b = [0] * (num_hidden_layers+1)
    epsilon = 1e-8
    beta = 0.5

    # usef for adam and nadam
    vt_W = [0] * (num_hidden_layers+1)
    vt_b = [0] * (num_hidden_layers+1)
    mt_W = [0] * (num_hidden_layers+1)
    mt_b = [0] * (num_hidden_layers+1)
    beta1 = 0.9
    beta2 = 0.999
    epsilon = 1e-8
    t = 1

    run_name = f"opt_{optimizer}_act_{activation_func}_ep_{epoch}_eta_{eta}_L_{num_hidden_layers}_hs_{size_of_hidden_layer}_bs_{batch_size}_mthd_{method}_wd_{weight_decay}"
    y_pred = []

    # making the list of structure of neural networks
    neurons_per_layer = [X_train.shape[1]]
    for i in range(num_hidden_layers):
        neurons_per_layer.append(size_of_hidden_layer)
    neurons_per_layer.append(k)


    # Initialize W, b
    W, b = initialize_Wandb(neurons_per_layer, method)
    # y_one_hot = generate_one_hot(num_images, y_train)

    y_hat = []
    for iteration in tqdm(range(epoch)):
        for i in range(0, num_images, batch_size):
            if i + batch_size > num_images:
                X_batch = X_train[i:]
                y_batch = y_train[i:]
                batch_size = len(X_batch)
            else:
                X_batch = X_train[i:i+batch_size]
                y_batch = y_train[i:i+batch_size]

            if optimizer == "momentum":
                hL, activation, preactivation = forward_propogation(W, b, X_batch, num_hidden_layers, activation_func)
                y_one_hot = generate_one_hot_matrix(batch_size, y_batch)
                grad_W, grad_b = backward_propogation(W, b, y_one_hot, activation, preactivation, num_hidden_layers+1, activation_func, "cross_entropy")
                W, b, W_history, b_history = update_parameters_mgd(W, grad_W, b, grad_b, eta, beta, W_history, b_history)
            elif optimizer == "nag":
                W_look_ahead, b_look_ahead, W_history, b_history = update_parameters_nag(W, W_history, b, b_history, eta, beta) # updating by history
                hL, activation, preactivation = forward_propogation(W_look_ahead, b_look_ahead, X_batch, num_hidden_layers, activation_func)
                y_one_hot = generate_one_hot_matrix(batch_size, y_batch)
                grad_W, grad_b = backward_propogation(W, b, y_one_hot, activation, preactivation, num_hidden_layers+1, activation_func, loss_func)
                W, b = update_parameters(W, grad_W, b, grad_b, eta) # updating by the grad of lookahead point
            elif optimizer == "rmsprop":
                hL, activation, preactivation = forward_propogation(W, b, X_batch, num_hidden_layers, activation_func)
                y_one_hot = generate_one_hot_matrix(batch_size, y_batch)
                grad_W, grad_b = backward_propogation(W, b, y_one_hot, activation, preactivation, num_hidden_layers+1, activation_func, loss_func)
                W, b, vt_W, vt_b = update_parameters_rmsprop(W, grad_W, b, grad_b, vt_W, vt_b, eta, beta, epsilon)
            elif optimizer == "adam":
                hL, activation, preactivation = forward_propogation(W, b, X_batch, num_hidden_layers, activation_func)
                y_one_hot = generate_one_hot_matrix(batch_size, y_batch)
                grad_W, grad_b = backward_propogation(W, b, y_one_hot, activation, preactivation, num_hidden_layers+1, activation_func, loss_func)
                W, b, vt_W, vt_b, mt_W, mt_b = update_parameters_adam(W, grad_W, vt_W, mt_W, b, grad_b, vt_b, mt_b, t, eta, beta1, beta2, epsilon)
                t += 1
            elif optimizer == "nadam":
                W_look_ahead, b_look_ahead, vt_W, vt_b = update_parameters_nag(W, W_history, b, b_history, eta, beta) # updating by history
                hL, activation, preactivation = forward_propogation(W_look_ahead, b_look_ahead, X_batch, num_hidden_layers, activation_func)
                y_one_hot = generate_one_hot_matrix(batch_size, y_batch)
                grad_W, grad_b = backward_propogation(W, b, y_one_hot, activation, preactivation, num_hidden_layers+1, activation_func, loss_func)
                W, b, vt_W, vt_b, mt_W, mt_b = update_parameters_nadam(W, grad_W, vt_W, mt_W, b, grad_b, vt_b, mt_b, t, eta, beta1, beta2, epsilon)
                t += 1
            else:
                hL, activation, preactivation = forward_propogation(W, b, X_batch, num_hidden_layers, activation_func)
                y_one_hot = generate_one_hot_matrix(batch_size, y_batch)
                grad_W, grad_b = backward_propogation(W, b, y_one_hot, activation, preactivation, num_hidden_layers+1, activation_func, loss_func)
                W, b = update_parameters(W, grad_W, b, grad_b, eta)


        val_accuracy, val_loss = evaluate_model(W, b, X_val, y_val, num_hidden_layers, activation_func, weight_decay, loss_func)
        train_accuracy, train_loss = evaluate_model(W, b, X_train, y_train, num_hidden_layers, activation_func, weight_decay, loss_func)
        print(f" val_accuracy:{val_accuracy}, val_loss:{val_loss}, train_accuracy:{train_accuracy}, train_loss:{train_loss}")
        wandb.log({"val_accuracy":val_accuracy, 'val_loss':val_loss, "train_accuracy":train_accuracy, "train_loss":train_loss, "epoch":iteration+1})

    wandb.run.name = run_name
    wandb.run.save()
    wandb.run.finish()

## Question 7

In [25]:
def plot_confusion_matrix(y_pred, y_test):
    wandb.log({"confusion_matrix" : wandb.plot.confusion_matrix(y_true=y_test, preds=y_pred, class_names=None)})
    wandb.finish()

In [26]:
def calc_test_accuracy(X_test, y_test, W, b, num_hidden_layers, activation_func):
    hL, activation, preactivation = forward_propogation(W, b, X_test, num_hidden_layers, activation_func)
    y_pred = []
    for i in range(len(hL[0])):
        y_pred.append(np.argmax(hL[:, i]))

    count = 0
    for i in range(len(y_test)):
        if y_test[i] == y_pred[i]:
            count += 1

    return count * 100 / len(y_test)

In [None]:
X_train, X_val, y_train, y_val = split_train_val_data(X_train, y_train)

sweep_config = {
"name": "Cross Entropy Loss",
"metric": {
    "name":"val_accuracy",
    "goal": "maximize"
},
"method": "bayes",
"parameters": {
        "eta": {
            "values": [1e-3, 1e-4]
        },
        "activation_func": {
            "values": ["sigmoid", "tanh", "relu"]
        },
        "method": {
            "values": ["xavier", "random_uniform", "random_normal"]
        },
        "optimizer": {
            "values": ["sgd", "momentum", "nag", "rmsprop", "adam", "nadam"]
            # "values": ["nadam"]
        },
        "batch_size": {
            "values": [16,32]
        },
        "epochs": {
            "values": [5, 10]
        },
        "weight_decay": {
            "values": [0, 0.0005, 0.05]
        },
        "size_of_hidden_layer": {
            "values": [32, 64, 128]
        },
        "num_hidden_layers": {
            "values": [3, 4, 5]
        }
    }
}

def train():
    with wandb.init(project="DL_Assignment_1") as run:
        config = wandb.config
        gradient_descent(X_train, X_val, y_train, y_val, config)

sweep_id = wandb.sweep(sweep_config, project = "DL_Assignment_1")
wandb.agent(sweep_id, train, count = 50)
wandb.finish()

Create sweep with ID: qxdmzd5h
Sweep URL: https://wandb.ai/space_monkeys/DL_Assignment_1/sweeps/qxdmzd5h


[34m[1mwandb[0m: Agent Starting Run: 1yxw1q8t with config:
[34m[1mwandb[0m: 	activation_func: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:43<06:34, 43.83s/it]

 val_accuracy:83.55, val_loss:0.4842041820039142, train_accuracy:84.57962962962964, train_loss:0.44242950094050215


 20%|██        | 2/10 [01:16<04:56, 37.00s/it]

 val_accuracy:85.66666666666667, val_loss:0.43105996322116996, train_accuracy:86.7925925925926, train_loss:0.3820517186568632


 30%|███       | 3/10 [01:49<04:06, 35.17s/it]

 val_accuracy:85.33333333333333, val_loss:0.4681147298626789, train_accuracy:86.72777777777777, train_loss:0.40785574981548367


 40%|████      | 4/10 [02:22<03:26, 34.33s/it]

 val_accuracy:86.2, val_loss:0.4614500784708915, train_accuracy:87.28703703703704, train_loss:0.3881529152954845


 50%|█████     | 5/10 [02:55<02:49, 33.90s/it]

 val_accuracy:86.28333333333333, val_loss:0.44946704167531465, train_accuracy:88.11851851851851, train_loss:0.3585718697848538


 60%|██████    | 6/10 [03:27<02:13, 33.30s/it]

 val_accuracy:86.5, val_loss:0.43975598703447727, train_accuracy:88.67777777777778, train_loss:0.35024785540168885


 70%|███████   | 7/10 [04:01<01:40, 33.48s/it]

 val_accuracy:86.93333333333334, val_loss:0.42882793328617286, train_accuracy:88.72962962962963, train_loss:0.34087711126005943


 80%|████████  | 8/10 [04:33<01:06, 33.19s/it]

 val_accuracy:87.1, val_loss:0.4365920927958318, train_accuracy:88.99444444444444, train_loss:0.3359085570417339


 90%|█████████ | 9/10 [05:06<00:33, 33.02s/it]

 val_accuracy:87.38333333333334, val_loss:0.4454425554710036, train_accuracy:89.36481481481482, train_loss:0.3338265115472134


100%|██████████| 10/10 [05:38<00:00, 33.88s/it]


 val_accuracy:87.7, val_loss:0.4586869470076204, train_accuracy:89.03333333333333, train_loss:0.3427085916639764


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▄▅▆▇▇▇██
train_loss,█▄▆▅▃▂▁▁▁▂
val_accuracy,▁▅▄▅▆▆▇▇▇█
val_loss,█▁▆▅▄▂▁▂▃▅

0,1
epoch,10.0
train_accuracy,89.03333
train_loss,0.34271
val_accuracy,87.7
val_loss,0.45869


[34m[1mwandb[0m: Agent Starting Run: 45fi416m with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


 10%|█         | 1/10 [00:15<02:20, 15.64s/it]

 val_accuracy:82.81666666666666, val_loss:0.4922455697911488, train_accuracy:82.81111111111112, train_loss:0.4780895218950884


 20%|██        | 2/10 [00:30<02:00, 15.07s/it]

 val_accuracy:83.91666666666667, val_loss:0.45151119068486684, train_accuracy:84.28703703703704, train_loss:0.4273905416259489


 30%|███       | 3/10 [00:45<01:44, 14.90s/it]

 val_accuracy:84.73333333333333, val_loss:0.42439920162495354, train_accuracy:85.82407407407408, train_loss:0.38922764836716395


 40%|████      | 4/10 [00:59<01:29, 14.86s/it]

 val_accuracy:85.13333333333334, val_loss:0.4112658443399837, train_accuracy:86.49444444444444, train_loss:0.3681053725383026


 50%|█████     | 5/10 [01:14<01:13, 14.76s/it]

 val_accuracy:85.36666666666666, val_loss:0.39734489199366524, train_accuracy:87.12407407407407, train_loss:0.35082169553610437


 60%|██████    | 6/10 [01:28<00:58, 14.70s/it]

 val_accuracy:85.76666666666667, val_loss:0.3827508313068085, train_accuracy:87.81111111111112, train_loss:0.3314280481187672


 70%|███████   | 7/10 [01:43<00:43, 14.63s/it]

 val_accuracy:86.11666666666666, val_loss:0.3785552111631698, train_accuracy:88.21481481481482, train_loss:0.3197455888776821


 80%|████████  | 8/10 [01:58<00:29, 14.68s/it]

 val_accuracy:85.96666666666667, val_loss:0.3814482923800705, train_accuracy:88.16666666666667, train_loss:0.316799964992761


 90%|█████████ | 9/10 [02:16<00:15, 15.67s/it]

 val_accuracy:86.11666666666666, val_loss:0.3778392283304656, train_accuracy:88.55185185185185, train_loss:0.3072282368447079


100%|██████████| 10/10 [02:30<00:00, 15.07s/it]

 val_accuracy:86.58333333333333, val_loss:0.373575382886013, train_accuracy:88.83888888888889, train_loss:0.29815562895192116





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▇▇▇██
train_loss,█▆▅▄▃▂▂▂▁▁
val_accuracy,▁▃▅▅▆▆▇▇▇█
val_loss,█▆▄▃▂▂▁▁▁▁

0,1
epoch,10.0
train_accuracy,88.83889
train_loss,0.29816
val_accuracy,86.58333
val_loss,0.37358


[34m[1mwandb[0m: Agent Starting Run: pkb3pjzl with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: random_uniform
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


  return np.exp(a)/np.sum(np.exp(a), axis=0)
  return np.exp(a)/np.sum(np.exp(a), axis=0)
 20%|██        | 1/5 [00:14<00:56, 14.14s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 40%|████      | 2/5 [00:37<00:59, 19.75s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 60%|██████    | 3/5 [01:01<00:42, 21.34s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 80%|████████  | 4/5 [01:22<00:21, 21.53s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


100%|██████████| 5/5 [01:46<00:00, 21.28s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
train_accuracy,▁▁▁▁▁
val_accuracy,▁▁▁▁▁

0,1
epoch,5.0
train_accuracy,9.9963
train_loss,
val_accuracy,10.03333
val_loss,


[34m[1mwandb[0m: Agent Starting Run: ybrimj7k with config:
[34m[1mwandb[0m: 	activation_func: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	eta: 0.001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005


 20%|██        | 1/5 [00:09<00:38,  9.63s/it]

 val_accuracy:20.05, val_loss:2.21113181250372, train_accuracy:20.825925925925926, train_loss:2.2098486323084616


 40%|████      | 2/5 [00:18<00:26,  8.93s/it]

 val_accuracy:43.833333333333336, val_loss:1.519848987908129, train_accuracy:44.285185185185185, train_loss:1.5198095588323735


 60%|██████    | 3/5 [00:26<00:17,  8.57s/it]

 val_accuracy:58.266666666666666, val_loss:1.1354651093208052, train_accuracy:58.88703703703704, train_loss:1.1323149477063823


 80%|████████  | 4/5 [00:35<00:09,  9.00s/it]

 val_accuracy:63.35, val_loss:0.9377304278236822, train_accuracy:63.8537037037037, train_loss:0.9361224601467096


100%|██████████| 5/5 [00:44<00:00,  8.94s/it]

 val_accuracy:67.7, val_loss:0.8333642860338902, train_accuracy:68.27777777777777, train_loss:0.8362705288387272





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
train_accuracy,▁▄▇▇█
train_loss,█▄▃▂▁
val_accuracy,▁▄▇▇█
val_loss,█▄▃▂▁

0,1
epoch,5.0
train_accuracy,68.27778
train_loss,0.83627
val_accuracy,67.7
val_loss,0.83336


[34m[1mwandb[0m: Agent Starting Run: b30k5agj with config:
[34m[1mwandb[0m: 	activation_func: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


 10%|█         | 1/10 [00:35<05:19, 35.46s/it]

 val_accuracy:85.31666666666666, val_loss:0.40051360612297354, train_accuracy:85.98518518518519, train_loss:0.3835945876680503


 20%|██        | 2/10 [01:08<04:31, 33.99s/it]

 val_accuracy:86.68333333333334, val_loss:0.36009949545783176, train_accuracy:87.80555555555556, train_loss:0.33246939421592214


 30%|███       | 3/10 [01:42<03:57, 33.90s/it]

 val_accuracy:87.33333333333333, val_loss:0.3402234474076973, train_accuracy:88.83518518518518, train_loss:0.3046637540161254


 40%|████      | 4/10 [02:15<03:21, 33.61s/it]

 val_accuracy:87.8, val_loss:0.3282334971554472, train_accuracy:89.52777777777777, train_loss:0.2853331972234566


 50%|█████     | 5/10 [02:48<02:47, 33.52s/it]

 val_accuracy:87.93333333333334, val_loss:0.32018707020201265, train_accuracy:90.06111111111112, train_loss:0.27020346692667474


 60%|██████    | 6/10 [03:21<02:13, 33.32s/it]

 val_accuracy:88.23333333333333, val_loss:0.31460127149276557, train_accuracy:90.54074074074074, train_loss:0.25762700536397487


 70%|███████   | 7/10 [03:55<01:40, 33.36s/it]

 val_accuracy:88.4, val_loss:0.31087642242161767, train_accuracy:90.88518518518518, train_loss:0.24692918715396456


 80%|████████  | 8/10 [04:28<01:06, 33.44s/it]

 val_accuracy:88.61666666666666, val_loss:0.3085573898034445, train_accuracy:91.17777777777778, train_loss:0.2377314297525627


 90%|█████████ | 9/10 [05:04<00:34, 34.02s/it]

 val_accuracy:88.81666666666666, val_loss:0.3073591729023831, train_accuracy:91.44074074074074, train_loss:0.2297597598110067


100%|██████████| 10/10 [05:37<00:00, 33.74s/it]

 val_accuracy:88.9, val_loss:0.30708991549845205, train_accuracy:91.67962962962963, train_loss:0.2228180714283711





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▅▆▇▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▄▅▆▆▇▇▇██
val_loss,█▅▃▃▂▂▁▁▁▁

0,1
epoch,10.0
train_accuracy,91.67963
train_loss,0.22282
val_accuracy,88.9
val_loss,0.30709


[34m[1mwandb[0m: Agent Starting Run: ess7og33 with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005


 10%|█         | 1/10 [00:14<02:10, 14.49s/it]

 val_accuracy:83.4, val_loss:0.46578137269632974, train_accuracy:84.02222222222223, train_loss:0.4466217795938441


 20%|██        | 2/10 [00:38<02:39, 19.92s/it]

 val_accuracy:85.33333333333333, val_loss:0.40388801654315404, train_accuracy:86.29629629629629, train_loss:0.37627538217647793


 30%|███       | 3/10 [01:01<02:29, 21.33s/it]

 val_accuracy:86.25, val_loss:0.3797371270844235, train_accuracy:87.5537037037037, train_loss:0.33839133094001705


 40%|████      | 4/10 [01:25<02:14, 22.41s/it]

 val_accuracy:87.11666666666666, val_loss:0.36231010382964934, train_accuracy:88.4, train_loss:0.3131848605914198


 50%|█████     | 5/10 [01:50<01:56, 23.26s/it]

 val_accuracy:87.06666666666666, val_loss:0.3543389736586773, train_accuracy:89.12962962962963, train_loss:0.2951260758409558


 60%|██████    | 6/10 [02:14<01:34, 23.66s/it]

 val_accuracy:87.43333333333334, val_loss:0.34851356936579525, train_accuracy:89.57962962962964, train_loss:0.28076671431269307


 70%|███████   | 7/10 [02:39<01:12, 24.04s/it]

 val_accuracy:87.68333333333334, val_loss:0.34746181262359693, train_accuracy:89.91851851851852, train_loss:0.2713553324225681


 80%|████████  | 8/10 [03:05<00:49, 24.61s/it]

 val_accuracy:87.83333333333333, val_loss:0.34384918254514507, train_accuracy:90.51481481481481, train_loss:0.2545549630692424


 90%|█████████ | 9/10 [03:28<00:24, 24.09s/it]

 val_accuracy:87.7, val_loss:0.3461219368794528, train_accuracy:90.88333333333334, train_loss:0.24284499539747192


100%|██████████| 10/10 [03:52<00:00, 23.26s/it]

 val_accuracy:87.73333333333333, val_loss:0.349675672753118, train_accuracy:91.27962962962962, train_loss:0.23386724804933398





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▆▇▇██
train_loss,█▆▄▄▃▃▂▂▁▁
val_accuracy,▁▄▅▇▇▇████
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,91.27963
train_loss,0.23387
val_accuracy,87.73333
val_loss,0.34968


[34m[1mwandb[0m: Agent Starting Run: 0yb07941 with config:
[34m[1mwandb[0m: 	activation_func: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: random_normal
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


 20%|██        | 1/5 [00:28<01:52, 28.21s/it]

 val_accuracy:9.3, val_loss:2.3027507088614394, train_accuracy:10.077777777777778, train_loss:2.3025774098125824


 40%|████      | 2/5 [00:56<01:24, 28.03s/it]

 val_accuracy:9.3, val_loss:2.3028183209201094, train_accuracy:10.077777777777778, train_loss:2.302579016602339


 60%|██████    | 3/5 [01:25<00:57, 28.59s/it]

 val_accuracy:9.3, val_loss:2.3028419027449627, train_accuracy:10.077777777777778, train_loss:2.3025801428258332


 80%|████████  | 4/5 [01:54<00:28, 28.87s/it]

 val_accuracy:9.3, val_loss:2.3028499970120246, train_accuracy:10.077777777777778, train_loss:2.3025805931384524


100%|██████████| 5/5 [02:23<00:00, 28.73s/it]

 val_accuracy:9.3, val_loss:2.302852760686623, train_accuracy:10.077777777777778, train_loss:2.30258075417533





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
train_accuracy,▁▁▁▁▁
train_loss,▁▄▇▇█
val_accuracy,▁▁▁▁▁
val_loss,▁▆▇██

0,1
epoch,5.0
train_accuracy,10.07778
train_loss,2.30258
val_accuracy,9.3
val_loss,2.30285


[34m[1mwandb[0m: Agent Starting Run: mswrz3q3 with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:12<01:55, 12.89s/it]

 val_accuracy:83.36666666666666, val_loss:0.48074410431758496, train_accuracy:83.46666666666667, train_loss:0.4656791880548712


 20%|██        | 2/10 [00:30<02:05, 15.64s/it]

 val_accuracy:85.63333333333334, val_loss:0.41541446478689675, train_accuracy:86.21851851851852, train_loss:0.38784197754162225


 30%|███       | 3/10 [00:48<01:58, 16.92s/it]

 val_accuracy:86.51666666666667, val_loss:0.38641904693763324, train_accuracy:87.5111111111111, train_loss:0.34749920954911767


 40%|████      | 4/10 [01:07<01:45, 17.57s/it]

 val_accuracy:86.88333333333334, val_loss:0.376373227828696, train_accuracy:88.03148148148148, train_loss:0.32942822876880906


 50%|█████     | 5/10 [01:24<01:27, 17.51s/it]

 val_accuracy:87.28333333333333, val_loss:0.3636267372662742, train_accuracy:88.76481481481481, train_loss:0.3077566215851118


 60%|██████    | 6/10 [01:44<01:12, 18.18s/it]

 val_accuracy:87.48333333333333, val_loss:0.3562937180962313, train_accuracy:89.41296296296296, train_loss:0.2901443872366654


 70%|███████   | 7/10 [02:02<00:54, 18.08s/it]

 val_accuracy:87.53333333333333, val_loss:0.3526254435255846, train_accuracy:89.79074074074074, train_loss:0.27865875779205795


 80%|████████  | 8/10 [02:20<00:36, 18.04s/it]

 val_accuracy:87.73333333333333, val_loss:0.34631000370172516, train_accuracy:90.32407407407408, train_loss:0.26451409680244165


 90%|█████████ | 9/10 [02:39<00:18, 18.42s/it]

 val_accuracy:87.83333333333333, val_loss:0.3456073634423113, train_accuracy:90.63518518518518, train_loss:0.2557794252781657


100%|██████████| 10/10 [02:57<00:00, 17.71s/it]

 val_accuracy:88.08333333333333, val_loss:0.3448344921822266, train_accuracy:90.92962962962963, train_loss:0.2468433338313573





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▅▆▇▇▇██
train_loss,█▆▄▄▃▂▂▂▁▁
val_accuracy,▁▄▆▆▇▇▇▇██
val_loss,█▅▃▃▂▂▁▁▁▁

0,1
epoch,10.0
train_accuracy,90.92963
train_loss,0.24684
val_accuracy,88.08333
val_loss,0.34483


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: iojtkd57 with config:
[34m[1mwandb[0m: 	activation_func: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005


 10%|█         | 1/10 [00:26<04:00, 26.68s/it]

 val_accuracy:85.3, val_loss:0.41211258526940386, train_accuracy:85.45925925925926, train_loss:0.40748177995728563


 20%|██        | 2/10 [01:00<04:08, 31.08s/it]

 val_accuracy:86.43333333333334, val_loss:0.382725374476582, train_accuracy:87.0111111111111, train_loss:0.36293909218002957


 30%|███       | 3/10 [01:32<03:39, 31.36s/it]

 val_accuracy:86.93333333333334, val_loss:0.36983797056735096, train_accuracy:87.77037037037037, train_loss:0.3407844080368714


 40%|████      | 4/10 [02:05<03:13, 32.18s/it]

 val_accuracy:87.26666666666667, val_loss:0.3610134582311589, train_accuracy:88.28888888888889, train_loss:0.3272227084242161


 50%|█████     | 5/10 [02:38<02:41, 32.33s/it]

 val_accuracy:87.53333333333333, val_loss:0.35352428574357303, train_accuracy:88.73518518518519, train_loss:0.3160211603863007


 60%|██████    | 6/10 [03:11<02:10, 32.64s/it]

 val_accuracy:87.58333333333333, val_loss:0.34800734210041706, train_accuracy:89.09814814814816, train_loss:0.30478657503368833


 70%|███████   | 7/10 [03:44<01:38, 32.78s/it]

 val_accuracy:87.8, val_loss:0.3442405692803885, train_accuracy:89.40925925925926, train_loss:0.2954556762411069


 80%|████████  | 8/10 [04:18<01:05, 32.95s/it]

 val_accuracy:88.13333333333334, val_loss:0.3414136714252079, train_accuracy:89.66111111111111, train_loss:0.2879707437426921


 90%|█████████ | 9/10 [04:51<00:32, 32.95s/it]

 val_accuracy:88.2, val_loss:0.3389685616900477, train_accuracy:89.94259259259259, train_loss:0.28083973033095166


100%|██████████| 10/10 [05:25<00:00, 32.59s/it]

 val_accuracy:88.23333333333333, val_loss:0.33674943858699175, train_accuracy:90.24444444444444, train_loss:0.2737294489489973





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▄▅▆▆▆▇███
val_loss,█▅▄▃▃▂▂▁▁▁

0,1
epoch,10.0
train_accuracy,90.24444
train_loss,0.27373
val_accuracy,88.23333
val_loss,0.33675


[34m[1mwandb[0m: Agent Starting Run: cp9xj1tr with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005


 10%|█         | 1/10 [00:13<02:01, 13.45s/it]

 val_accuracy:81.4, val_loss:0.5249130285338766, train_accuracy:81.81851851851852, train_loss:0.5133009923785634


 20%|██        | 2/10 [00:26<01:47, 13.46s/it]

 val_accuracy:83.21666666666667, val_loss:0.4729287219252038, train_accuracy:83.97222222222223, train_loss:0.4491913830850602


 30%|███       | 3/10 [00:40<01:34, 13.50s/it]

 val_accuracy:84.06666666666666, val_loss:0.4427403382368544, train_accuracy:85.3, train_loss:0.41094749421315596


 40%|████      | 4/10 [00:54<01:21, 13.57s/it]

 val_accuracy:84.61666666666666, val_loss:0.42666184870787005, train_accuracy:85.9888888888889, train_loss:0.3886239040416434


 50%|█████     | 5/10 [01:08<01:08, 13.73s/it]

 val_accuracy:85.33333333333333, val_loss:0.4121433116487144, train_accuracy:86.72962962962963, train_loss:0.3697376681371488


 60%|██████    | 6/10 [01:21<00:54, 13.68s/it]

 val_accuracy:85.46666666666667, val_loss:0.40606092122717546, train_accuracy:87.09444444444445, train_loss:0.3593166170838483


 70%|███████   | 7/10 [01:35<00:40, 13.56s/it]

 val_accuracy:85.65, val_loss:0.4004081056047979, train_accuracy:87.4074074074074, train_loss:0.34949524634762613


 80%|████████  | 8/10 [01:48<00:27, 13.63s/it]

 val_accuracy:85.78333333333333, val_loss:0.39295963706302073, train_accuracy:87.74814814814815, train_loss:0.338806827838241


 90%|█████████ | 9/10 [02:01<00:13, 13.47s/it]

 val_accuracy:86.08333333333333, val_loss:0.38582760847776976, train_accuracy:88.11111111111111, train_loss:0.3288362709957895


100%|██████████| 10/10 [02:14<00:00, 13.50s/it]

 val_accuracy:86.3, val_loss:0.38164491612021767, train_accuracy:88.31851851851852, train_loss:0.3216648794341242





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▅▆▇▇▇██
train_loss,█▆▄▃▃▂▂▂▁▁
val_accuracy,▁▄▅▆▇▇▇▇██
val_loss,█▅▄▃▂▂▂▂▁▁

0,1
epoch,10.0
train_accuracy,88.31852
train_loss,0.32166
val_accuracy,86.3
val_loss,0.38164


[34m[1mwandb[0m: Agent Starting Run: 9c8vuuvx with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005


 10%|█         | 1/10 [00:12<01:51, 12.40s/it]

 val_accuracy:82.21666666666667, val_loss:0.5041548558613985, train_accuracy:82.32037037037037, train_loss:0.493082718283198


 20%|██        | 2/10 [00:24<01:39, 12.43s/it]

 val_accuracy:84.45, val_loss:0.43337577422882056, train_accuracy:84.99074074074075, train_loss:0.4159787198159405


 30%|███       | 3/10 [00:37<01:28, 12.60s/it]

 val_accuracy:85.6, val_loss:0.40043923070412296, train_accuracy:86.25, train_loss:0.37654427301281607


 40%|████      | 4/10 [00:50<01:15, 12.63s/it]

 val_accuracy:85.91666666666667, val_loss:0.38116463243943033, train_accuracy:87.29629629629629, train_loss:0.3483435498008214


 50%|█████     | 5/10 [01:04<01:06, 13.21s/it]

 val_accuracy:86.1, val_loss:0.37294185062176444, train_accuracy:87.83703703703704, train_loss:0.3321819353208179


 60%|██████    | 6/10 [01:17<00:52, 13.04s/it]

 val_accuracy:86.45, val_loss:0.3649320551854226, train_accuracy:88.18888888888888, train_loss:0.3199871178938287


 70%|███████   | 7/10 [01:29<00:38, 12.87s/it]

 val_accuracy:86.43333333333334, val_loss:0.36187466929347784, train_accuracy:88.58888888888889, train_loss:0.3079925147428351


 80%|████████  | 8/10 [01:41<00:25, 12.63s/it]

 val_accuracy:86.5, val_loss:0.36347573070454225, train_accuracy:88.92777777777778, train_loss:0.3000061949279452


 90%|█████████ | 9/10 [01:54<00:12, 12.47s/it]

 val_accuracy:86.73333333333333, val_loss:0.3573147325484913, train_accuracy:89.25, train_loss:0.2895799898255606


100%|██████████| 10/10 [02:05<00:00, 12.55s/it]

 val_accuracy:86.7, val_loss:0.35930256189314425, train_accuracy:89.45185185185186, train_loss:0.2838202840698878





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▄▆▇▇█████
val_loss,█▅▃▂▂▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,89.45185
train_loss,0.28382
val_accuracy,86.7
val_loss,0.3593


[34m[1mwandb[0m: Agent Starting Run: to4awfle with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


 10%|█         | 1/10 [00:14<02:09, 14.44s/it]

 val_accuracy:83.2, val_loss:0.49484113612314656, train_accuracy:83.35925925925926, train_loss:0.490513227219734


 20%|██        | 2/10 [00:28<01:54, 14.26s/it]

 val_accuracy:84.85, val_loss:0.4663038209544017, train_accuracy:85.18333333333334, train_loss:0.45476360429020835


 30%|███       | 3/10 [00:42<01:39, 14.23s/it]

 val_accuracy:85.48333333333333, val_loss:0.47773847985208584, train_accuracy:85.58703703703704, train_loss:0.4625263031613412


 40%|████      | 4/10 [00:56<01:25, 14.22s/it]

 val_accuracy:84.86666666666666, val_loss:0.5103286965115086, train_accuracy:85.55555555555556, train_loss:0.4669106201820529


  loss += -1.0 * y[i][j] * np.log(y_hat[i][j])
  loss += -1.0 * y[i][j] * np.log(y_hat[i][j])
 50%|█████     | 5/10 [01:11<01:12, 14.45s/it]

 val_accuracy:83.78333333333333, val_loss:0.5859984713672555, train_accuracy:84.75925925925925, train_loss:nan


 60%|██████    | 6/10 [01:26<00:57, 14.45s/it]

 val_accuracy:83.11666666666666, val_loss:nan, train_accuracy:83.60185185185185, train_loss:nan


 70%|███████   | 7/10 [01:40<00:43, 14.45s/it]

 val_accuracy:83.61666666666666, val_loss:nan, train_accuracy:84.55185185185185, train_loss:nan


 80%|████████  | 8/10 [01:55<00:29, 14.71s/it]

 val_accuracy:84.38333333333334, val_loss:nan, train_accuracy:84.9962962962963, train_loss:nan


 90%|█████████ | 9/10 [02:10<00:14, 14.70s/it]

 val_accuracy:83.43333333333334, val_loss:nan, train_accuracy:84.11666666666666, train_loss:nan


100%|██████████| 10/10 [02:24<00:00, 14.49s/it]

 val_accuracy:83.6, val_loss:nan, train_accuracy:84.44259259259259, train_loss:nan





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▇██▅▂▅▆▃▄
train_loss,█▁▃▃
val_accuracy,▁▆█▆▃▁▂▅▂▂
val_loss,▃▁▂▄█

0,1
epoch,10.0
train_accuracy,84.44259
train_loss,
val_accuracy,83.6
val_loss,


[34m[1mwandb[0m: Agent Starting Run: a2a8tcmm with config:
[34m[1mwandb[0m: 	activation_func: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:25<03:45, 25.04s/it]

 val_accuracy:81.73333333333333, val_loss:0.5246260542030747, train_accuracy:82.36851851851851, train_loss:0.49768810616283904


 20%|██        | 2/10 [00:49<03:16, 24.58s/it]

 val_accuracy:84.56666666666666, val_loss:0.4527763551705047, train_accuracy:85.64814814814815, train_loss:0.4204414983676267


 30%|███       | 3/10 [01:13<02:49, 24.20s/it]

 val_accuracy:85.13333333333334, val_loss:0.4699191953369437, train_accuracy:85.4462962962963, train_loss:0.43387801127963177


 40%|████      | 4/10 [01:37<02:25, 24.22s/it]

 val_accuracy:85.51666666666667, val_loss:0.4632853242206708, train_accuracy:85.79629629629629, train_loss:0.4244177519806149


 50%|█████     | 5/10 [02:02<02:02, 24.53s/it]

 val_accuracy:85.25, val_loss:0.4796156730750582, train_accuracy:86.40925925925926, train_loss:0.4150758599882855


 60%|██████    | 6/10 [02:27<01:38, 24.62s/it]

 val_accuracy:84.78333333333333, val_loss:0.5000467801017902, train_accuracy:85.74074074074075, train_loss:0.4396713228796435


 70%|███████   | 7/10 [02:50<01:12, 24.22s/it]

 val_accuracy:86.08333333333333, val_loss:0.4817741960330853, train_accuracy:86.81296296296296, train_loss:0.40729663215559475


 80%|████████  | 8/10 [03:14<00:48, 24.09s/it]

 val_accuracy:85.91666666666667, val_loss:0.49839287980100727, train_accuracy:86.92777777777778, train_loss:0.4201560296208226


 90%|█████████ | 9/10 [03:39<00:24, 24.26s/it]

 val_accuracy:86.06666666666666, val_loss:0.5128953851859511, train_accuracy:87.0425925925926, train_loss:0.41528500911711946


100%|██████████| 10/10 [04:02<00:00, 24.27s/it]

 val_accuracy:85.98333333333333, val_loss:0.5186025520188158, train_accuracy:87.11296296296297, train_loss:0.422060026853765





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▆▆▇▆████
train_loss,█▂▃▂▂▄▁▂▂▂
val_accuracy,▁▆▆▇▇▆████
val_loss,█▁▃▂▄▆▄▅▇▇

0,1
epoch,10.0
train_accuracy,87.11296
train_loss,0.42206
val_accuracy,85.98333
val_loss,0.5186


[34m[1mwandb[0m: Agent Starting Run: eppbjho9 with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


 10%|█         | 1/10 [00:13<01:58, 13.21s/it]

 val_accuracy:83.46666666666667, val_loss:0.4743230548345325, train_accuracy:83.75740740740741, train_loss:0.45987355290781534


 20%|██        | 2/10 [00:31<02:09, 16.23s/it]

 val_accuracy:84.63333333333334, val_loss:0.453535439064145, train_accuracy:85.21296296296296, train_loss:0.43439674228176145


 30%|███       | 3/10 [00:52<02:07, 18.17s/it]

 val_accuracy:85.5, val_loss:0.4431183073354988, train_accuracy:86.18518518518519, train_loss:0.41635890792915353


 40%|████      | 4/10 [01:10<01:50, 18.35s/it]

 val_accuracy:85.5, val_loss:0.4418512828437544, train_accuracy:86.27592592592593, train_loss:0.41563738217291163


 50%|█████     | 5/10 [01:30<01:34, 18.92s/it]

 val_accuracy:85.63333333333334, val_loss:0.4764722739401999, train_accuracy:86.28703703703704, train_loss:0.4342630922637404


 60%|██████    | 6/10 [01:49<01:15, 18.77s/it]

 val_accuracy:86.0, val_loss:0.4712664976176771, train_accuracy:86.93888888888888, train_loss:0.4253802192443663


 70%|███████   | 7/10 [02:08<00:57, 19.01s/it]

 val_accuracy:85.65, val_loss:0.4877607670370363, train_accuracy:86.5425925925926, train_loss:0.44598595639509236


 80%|████████  | 8/10 [02:26<00:37, 18.73s/it]

 val_accuracy:86.08333333333333, val_loss:0.5120378080712623, train_accuracy:86.86481481481482, train_loss:0.44959039437559206


 90%|█████████ | 9/10 [02:44<00:18, 18.55s/it]

 val_accuracy:85.6, val_loss:0.553760462177865, train_accuracy:86.36296296296297, train_loss:nan


100%|██████████| 10/10 [03:05<00:00, 18.50s/it]

 val_accuracy:86.21666666666667, val_loss:nan, train_accuracy:86.78518518518518, train_loss:nan





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▆▇▇█▇█▇█
train_loss,█▄▁▁▄▃▆▆
val_accuracy,▁▄▆▆▇▇▇█▆█
val_loss,▃▂▁▁▃▃▄▅█

0,1
epoch,10.0
train_accuracy,86.78519
train_loss,
val_accuracy,86.21667
val_loss,


[34m[1mwandb[0m: Agent Starting Run: kx43pfkd with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:19<02:54, 19.34s/it]

 val_accuracy:79.75, val_loss:0.7027873353187818, train_accuracy:80.44259259259259, train_loss:nan


 20%|██        | 2/10 [00:37<02:27, 18.49s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 30%|███       | 3/10 [00:58<02:18, 19.77s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 40%|████      | 4/10 [01:15<01:52, 18.77s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 50%|█████     | 5/10 [01:33<01:31, 18.33s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 60%|██████    | 6/10 [01:52<01:14, 18.52s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 70%|███████   | 7/10 [02:09<00:54, 18.13s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 80%|████████  | 8/10 [02:27<00:35, 17.98s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 90%|█████████ | 9/10 [02:46<00:18, 18.29s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


100%|██████████| 10/10 [03:03<00:00, 18.34s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,█▁▁▁▁▁▁▁▁▁
val_accuracy,█▁▁▁▁▁▁▁▁▁
val_loss,▁

0,1
epoch,10.0
train_accuracy,9.9963
train_loss,
val_accuracy,10.03333
val_loss,


[34m[1mwandb[0m: Agent Starting Run: cuegghit with config:
[34m[1mwandb[0m: 	activation_func: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.001
[34m[1mwandb[0m: 	method: random_normal
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 32
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:08<01:13,  8.16s/it]

 val_accuracy:9.3, val_loss:2.3051519893543375, train_accuracy:10.077777777777778, train_loss:2.3028606532226337


 20%|██        | 2/10 [00:19<01:19,  9.96s/it]

 val_accuracy:9.3, val_loss:2.306940972400851, train_accuracy:10.077777777777778, train_loss:2.303130333594884


 30%|███       | 3/10 [00:31<01:17, 11.03s/it]

 val_accuracy:9.3, val_loss:2.307248550032974, train_accuracy:10.077777777777778, train_loss:2.3031652503113906


 40%|████      | 4/10 [00:43<01:08, 11.45s/it]

 val_accuracy:9.3, val_loss:2.3074447128392856, train_accuracy:10.077777777777778, train_loss:2.303187069774247


 50%|█████     | 5/10 [00:55<00:57, 11.58s/it]

 val_accuracy:9.3, val_loss:2.3076019527300953, train_accuracy:10.077777777777778, train_loss:2.3032045416617035


 60%|██████    | 6/10 [01:07<00:46, 11.64s/it]

 val_accuracy:9.3, val_loss:2.307743737252416, train_accuracy:10.077777777777778, train_loss:2.3032202955230137


 70%|███████   | 7/10 [01:19<00:35, 11.72s/it]

 val_accuracy:9.3, val_loss:2.307883114147471, train_accuracy:10.077777777777778, train_loss:2.303235781847343


 80%|████████  | 8/10 [01:31<00:23, 11.81s/it]

 val_accuracy:9.3, val_loss:2.3080307961984756, train_accuracy:10.077777777777778, train_loss:2.3032521909738097


 90%|█████████ | 9/10 [01:42<00:11, 11.71s/it]

 val_accuracy:9.3, val_loss:2.308198615545168, train_accuracy:10.077777777777778, train_loss:2.303270837584792


100%|██████████| 10/10 [01:56<00:00, 11.65s/it]

 val_accuracy:9.3, val_loss:2.3084022000583735, train_accuracy:10.077777777777778, train_loss:2.3032934581163405





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▁▁▁▁▁▁▁▁▁
train_loss,▁▅▆▆▇▇▇▇██
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▅▆▆▆▇▇▇██

0,1
epoch,10.0
train_accuracy,10.07778
train_loss,2.30329
val_accuracy,9.3
val_loss,2.3084


[34m[1mwandb[0m: Agent Starting Run: k1tt8tk8 with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: random_uniform
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:14<02:10, 14.47s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 20%|██        | 2/10 [00:38<02:38, 19.83s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 30%|███       | 3/10 [01:00<02:27, 21.11s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 40%|████      | 4/10 [01:24<02:13, 22.23s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 50%|█████     | 5/10 [01:49<01:56, 23.23s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 60%|██████    | 6/10 [02:14<01:35, 23.84s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 70%|███████   | 7/10 [02:38<01:11, 23.87s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 80%|████████  | 8/10 [03:01<00:47, 23.66s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 90%|█████████ | 9/10 [03:24<00:23, 23.44s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


100%|██████████| 10/10 [03:50<00:00, 23.01s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,9.9963
train_loss,
val_accuracy,10.03333
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 4ktuzr1o with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:12<01:48, 12.11s/it]

 val_accuracy:82.21666666666667, val_loss:0.5139602203297725, train_accuracy:82.32037037037037, train_loss:0.4941722032241284


 20%|██        | 2/10 [00:24<01:36, 12.04s/it]

 val_accuracy:84.45, val_loss:0.4432114676573335, train_accuracy:84.99074074074075, train_loss:0.4170715746413308


 30%|███       | 3/10 [00:36<01:25, 12.18s/it]

 val_accuracy:85.6, val_loss:0.4103109352764315, train_accuracy:86.25, train_loss:0.3776411290764059


 40%|████      | 4/10 [00:48<01:12, 12.17s/it]

 val_accuracy:85.91666666666667, val_loss:0.3910791439298924, train_accuracy:87.29629629629629, train_loss:0.3494451621886505


 50%|█████     | 5/10 [01:01<01:01, 12.28s/it]

 val_accuracy:86.1, val_loss:0.38290486210135716, train_accuracy:87.83703703703704, train_loss:0.33328893659632813


 60%|██████    | 6/10 [01:13<00:49, 12.27s/it]

 val_accuracy:86.45, val_loss:0.3749467435184502, train_accuracy:88.18888888888888, train_loss:0.32109986104194294


 70%|███████   | 7/10 [01:25<00:36, 12.28s/it]

 val_accuracy:86.43333333333334, val_loss:0.37194775108772016, train_accuracy:88.58888888888889, train_loss:0.30911174605330644


 80%|████████  | 8/10 [01:37<00:24, 12.26s/it]

 val_accuracy:86.5, val_loss:0.373609702112433, train_accuracy:88.92777777777778, train_loss:0.3011321917510442


 90%|█████████ | 9/10 [01:50<00:12, 12.38s/it]

 val_accuracy:86.73333333333333, val_loss:0.3675120616105784, train_accuracy:89.25, train_loss:0.2907130263880147


100%|██████████| 10/10 [02:02<00:00, 12.27s/it]

 val_accuracy:86.7, val_loss:0.3695666464647429, train_accuracy:89.45185185185186, train_loss:0.28496073791117654





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▄▆▇▇█████
val_loss,█▅▃▂▂▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,89.45185
train_loss,0.28496
val_accuracy,86.7
val_loss,0.36957


[34m[1mwandb[0m: Agent Starting Run: 30y08wu8 with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005


 10%|█         | 1/10 [00:13<02:02, 13.58s/it]

 val_accuracy:83.2, val_loss:0.49494193193827574, train_accuracy:83.35925925925926, train_loss:0.4905244267547484


 20%|██        | 2/10 [00:27<01:48, 13.54s/it]

 val_accuracy:84.85, val_loss:0.46640878194057206, train_accuracy:85.18333333333334, train_loss:0.45477526662200507


 30%|███       | 3/10 [00:41<01:36, 13.79s/it]

 val_accuracy:85.48333333333333, val_loss:0.4778489096623024, train_accuracy:85.58703703703704, train_loss:0.46253857314025415


 40%|████      | 4/10 [00:55<01:22, 13.83s/it]

 val_accuracy:84.86666666666666, val_loss:0.5104457459305839, train_accuracy:85.55555555555556, train_loss:0.4669236256730613


 50%|█████     | 5/10 [01:09<01:09, 13.94s/it]

 val_accuracy:83.78333333333333, val_loss:0.5861232025811044, train_accuracy:84.75925925925925, train_loss:nan


 60%|██████    | 6/10 [01:24<00:57, 14.37s/it]

 val_accuracy:83.11666666666666, val_loss:nan, train_accuracy:83.60185185185185, train_loss:nan


 70%|███████   | 7/10 [01:38<00:42, 14.28s/it]

 val_accuracy:83.61666666666666, val_loss:nan, train_accuracy:84.55185185185185, train_loss:nan


 80%|████████  | 8/10 [01:52<00:28, 14.21s/it]

 val_accuracy:84.38333333333334, val_loss:nan, train_accuracy:84.9962962962963, train_loss:nan


 90%|█████████ | 9/10 [02:07<00:14, 14.39s/it]

 val_accuracy:83.43333333333334, val_loss:nan, train_accuracy:84.11666666666666, train_loss:nan


100%|██████████| 10/10 [02:21<00:00, 14.13s/it]

 val_accuracy:83.6, val_loss:nan, train_accuracy:84.44259259259259, train_loss:nan





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▇██▅▂▅▆▃▄
train_loss,█▁▃▃
val_accuracy,▁▆█▆▃▁▂▅▂▂
val_loss,▃▁▂▄█

0,1
epoch,10.0
train_accuracy,84.44259
train_loss,
val_accuracy,83.6
val_loss,


[34m[1mwandb[0m: Agent Starting Run: oyinx5q0 with config:
[34m[1mwandb[0m: 	activation_func: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005


 10%|█         | 1/10 [00:36<05:27, 36.43s/it]

 val_accuracy:85.13333333333334, val_loss:0.4102531663796204, train_accuracy:86.01851851851852, train_loss:0.3906107710158986


 20%|██        | 2/10 [01:12<04:48, 36.08s/it]

 val_accuracy:86.45, val_loss:0.37366832512494574, train_accuracy:87.54629629629629, train_loss:0.34826495152424186


 30%|███       | 3/10 [01:48<04:12, 36.02s/it]

 val_accuracy:87.23333333333333, val_loss:0.352824090653081, train_accuracy:88.3962962962963, train_loss:0.3247437572234698


 40%|████      | 4/10 [02:23<03:34, 35.73s/it]

 val_accuracy:87.45, val_loss:0.34757924146285585, train_accuracy:88.86666666666666, train_loss:0.31191246817073337


 50%|█████     | 5/10 [03:01<03:02, 36.55s/it]

 val_accuracy:87.58333333333333, val_loss:0.34193797908519014, train_accuracy:89.29814814814814, train_loss:0.3031744323941878


 60%|██████    | 6/10 [03:38<02:26, 36.71s/it]

 val_accuracy:87.78333333333333, val_loss:0.34273208263429117, train_accuracy:89.50185185185185, train_loss:0.2967372223213973


 70%|███████   | 7/10 [04:14<01:49, 36.38s/it]

 val_accuracy:88.0, val_loss:0.33930023717358765, train_accuracy:89.83148148148148, train_loss:0.28686351730065074


 80%|████████  | 8/10 [04:49<01:12, 36.18s/it]

 val_accuracy:88.2, val_loss:0.3386986740309704, train_accuracy:90.2, train_loss:0.2798460108650953


 90%|█████████ | 9/10 [05:25<00:36, 36.13s/it]

 val_accuracy:88.21666666666667, val_loss:0.33827998097896794, train_accuracy:90.42037037037036, train_loss:0.27271806937108195


100%|██████████| 10/10 [06:01<00:00, 36.20s/it]

 val_accuracy:88.51666666666667, val_loss:0.3377476577388441, train_accuracy:90.64444444444445, train_loss:0.266098823324254





VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.09017917511832318, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▅▆▆▇▇██
train_loss,█▆▄▄▃▃▂▂▁▁
val_accuracy,▁▄▅▆▆▆▇▇▇█
val_loss,█▄▂▂▁▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,90.64444
train_loss,0.2661
val_accuracy,88.51667
val_loss,0.33775


[34m[1mwandb[0m: Agent Starting Run: y5lxxrke with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: nag
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005


 10%|█         | 1/10 [00:11<01:47, 11.90s/it]

 val_accuracy:80.81666666666666, val_loss:0.5286070826107837, train_accuracy:81.36111111111111, train_loss:0.5271910003651769


 20%|██        | 2/10 [00:22<01:31, 11.42s/it]

 val_accuracy:83.05, val_loss:0.4668805623537296, train_accuracy:83.50555555555556, train_loss:0.46151828289086005


 30%|███       | 3/10 [00:33<01:16, 10.99s/it]

 val_accuracy:84.01666666666667, val_loss:0.4379257328439478, train_accuracy:84.80185185185185, train_loss:0.4266376239628702


 40%|████      | 4/10 [00:43<01:04, 10.80s/it]

 val_accuracy:84.65, val_loss:0.4199469199737782, train_accuracy:85.67777777777778, train_loss:0.4034433607373356


 50%|█████     | 5/10 [00:56<00:57, 11.53s/it]

 val_accuracy:85.28333333333333, val_loss:0.40604575058595604, train_accuracy:86.31851851851852, train_loss:0.3859866798495223


 60%|██████    | 6/10 [01:08<00:46, 11.56s/it]

 val_accuracy:85.55, val_loss:0.3958159207293033, train_accuracy:86.80185185185185, train_loss:0.37197281163345214


 70%|███████   | 7/10 [01:20<00:34, 11.63s/it]

 val_accuracy:85.86666666666666, val_loss:0.3882377299594369, train_accuracy:87.1574074074074, train_loss:0.361350921256192


 80%|████████  | 8/10 [01:32<00:23, 11.70s/it]

 val_accuracy:86.31666666666666, val_loss:0.3819625570512964, train_accuracy:87.50555555555556, train_loss:0.35115993456982897


 90%|█████████ | 9/10 [01:43<00:11, 11.70s/it]

 val_accuracy:86.43333333333334, val_loss:0.3751698465501155, train_accuracy:87.81111111111112, train_loss:0.34112924537019756


100%|██████████| 10/10 [01:55<00:00, 11.57s/it]

 val_accuracy:86.61666666666666, val_loss:0.3712517587296662, train_accuracy:87.93888888888888, train_loss:0.3345007231773848





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▆▆▇▇███
train_loss,█▆▄▄▃▂▂▂▁▁
val_accuracy,▁▄▅▆▆▇▇███
val_loss,█▅▄▃▃▂▂▁▁▁

0,1
epoch,10.0
train_accuracy,87.93889
train_loss,0.3345
val_accuracy,86.61667
val_loss,0.37125


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5wubt7t4 with config:
[34m[1mwandb[0m: 	activation_func: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005


 10%|█         | 1/10 [00:17<02:41, 17.91s/it]

 val_accuracy:83.75, val_loss:0.45400566412684307, train_accuracy:83.92037037037036, train_loss:0.4500210238447983


 20%|██        | 2/10 [00:38<02:37, 19.73s/it]

 val_accuracy:85.5, val_loss:0.4036124271675504, train_accuracy:86.13518518518518, train_loss:0.39156959151394843


 30%|███       | 3/10 [01:01<02:27, 21.10s/it]

 val_accuracy:86.01666666666667, val_loss:0.38390492359809736, train_accuracy:87.2, train_loss:0.3662617664269291


 40%|████      | 4/10 [01:23<02:07, 21.28s/it]

 val_accuracy:86.38333333333334, val_loss:0.3799258637452766, train_accuracy:87.59074074074074, train_loss:0.35469499777979385


 50%|█████     | 5/10 [01:44<01:45, 21.13s/it]

 val_accuracy:86.38333333333334, val_loss:0.3757661816021723, train_accuracy:87.92407407407407, train_loss:0.34549612645273764


 60%|██████    | 6/10 [02:07<01:27, 21.76s/it]

 val_accuracy:86.68333333333334, val_loss:0.37273078972211593, train_accuracy:88.28888888888889, train_loss:0.3370984339558871


 70%|███████   | 7/10 [02:28<01:04, 21.63s/it]

 val_accuracy:86.95, val_loss:0.3667671152527129, train_accuracy:88.60740740740741, train_loss:0.32729537451582863


 80%|████████  | 8/10 [02:49<00:42, 21.49s/it]

 val_accuracy:87.35, val_loss:0.36274710107922087, train_accuracy:88.88518518518518, train_loss:0.31888399391433336


 90%|█████████ | 9/10 [03:12<00:22, 22.07s/it]

 val_accuracy:87.4, val_loss:0.3608063612065917, train_accuracy:89.1, train_loss:0.31326490450254607


100%|██████████| 10/10 [03:34<00:00, 21.43s/it]

 val_accuracy:87.48333333333333, val_loss:0.35913580519382654, train_accuracy:89.26481481481481, train_loss:0.31002891572060537





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▃▂▂▁▁▁
val_accuracy,▁▄▅▆▆▇▇███
val_loss,█▄▃▃▂▂▂▁▁▁

0,1
epoch,10.0
train_accuracy,89.26481
train_loss,0.31003
val_accuracy,87.48333
val_loss,0.35914


[34m[1mwandb[0m: Agent Starting Run: fkdpdn9o with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0


 10%|█         | 1/10 [00:12<01:51, 12.40s/it]

 val_accuracy:83.75, val_loss:0.45698411116810583, train_accuracy:83.96666666666667, train_loss:0.4446859839417465


 20%|██        | 2/10 [00:32<02:13, 16.64s/it]

 val_accuracy:85.78333333333333, val_loss:0.4007565961382921, train_accuracy:86.54444444444445, train_loss:0.3745922323051144


 30%|███       | 3/10 [00:52<02:10, 18.63s/it]

 val_accuracy:86.53333333333333, val_loss:0.3704740968563855, train_accuracy:88.08333333333333, train_loss:0.33206543933016597


 40%|████      | 4/10 [01:12<01:54, 19.11s/it]

 val_accuracy:87.21666666666667, val_loss:0.35331803069426626, train_accuracy:89.00185185185185, train_loss:0.30546971726025207


 50%|█████     | 5/10 [01:34<01:39, 19.91s/it]

 val_accuracy:87.3, val_loss:0.3454010281644992, train_accuracy:89.59444444444445, train_loss:0.2896252886649662


 60%|██████    | 6/10 [01:54<01:19, 19.98s/it]

 val_accuracy:87.65, val_loss:0.3361048268150755, train_accuracy:90.19814814814815, train_loss:0.2720551012480583


 70%|███████   | 7/10 [02:15<01:01, 20.46s/it]

 val_accuracy:87.85, val_loss:0.3318189307005072, train_accuracy:90.61851851851851, train_loss:0.26144661316190393


 80%|████████  | 8/10 [02:36<00:41, 20.56s/it]

 val_accuracy:87.76666666666667, val_loss:0.3345671254517523, train_accuracy:90.86296296296297, train_loss:0.2526587344134978


 90%|█████████ | 9/10 [02:56<00:20, 20.35s/it]

 val_accuracy:87.86666666666666, val_loss:0.3356940072917211, train_accuracy:91.03703703703704, train_loss:0.24485576508039264


100%|██████████| 10/10 [03:19<00:00, 19.94s/it]

 val_accuracy:87.71666666666667, val_loss:0.3343658192149754, train_accuracy:91.44074074074074, train_loss:0.23341832274923244





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▆▆▇▇▇██
train_loss,█▆▄▃▃▂▂▂▁▁
val_accuracy,▁▄▆▇▇█████
val_loss,█▅▃▂▂▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,91.44074
train_loss,0.23342
val_accuracy,87.71667
val_loss,0.33437


[34m[1mwandb[0m: Agent Starting Run: drnf69xj with config:
[34m[1mwandb[0m: 	activation_func: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:16<02:27, 16.43s/it]

 val_accuracy:26.95, val_loss:1.8954879872773627, train_accuracy:27.52962962962963, train_loss:1.8752736087432575


 20%|██        | 2/10 [00:40<02:45, 20.72s/it]

 val_accuracy:51.88333333333333, val_loss:1.2290503426320376, train_accuracy:51.49629629629629, train_loss:1.213634572773553


 30%|███       | 3/10 [01:03<02:32, 21.75s/it]

 val_accuracy:58.55, val_loss:1.0103642128555408, train_accuracy:59.20740740740741, train_loss:0.9908315549891933


 40%|████      | 4/10 [01:25<02:11, 21.98s/it]

 val_accuracy:60.8, val_loss:0.9384816806715066, train_accuracy:61.714814814814815, train_loss:0.9175197421083541


 50%|█████     | 5/10 [01:49<01:52, 22.55s/it]

 val_accuracy:61.36666666666667, val_loss:0.9141045564540673, train_accuracy:62.53703703703704, train_loss:0.8928980473476855


 60%|██████    | 6/10 [02:13<01:32, 23.04s/it]

 val_accuracy:62.61666666666667, val_loss:0.9021232335004469, train_accuracy:64.16296296296296, train_loss:0.8815684220028325


 70%|███████   | 7/10 [02:36<01:09, 23.09s/it]

 val_accuracy:63.65, val_loss:0.8943033968017852, train_accuracy:65.08888888888889, train_loss:0.8748878621633874


 80%|████████  | 8/10 [02:59<00:46, 23.08s/it]

 val_accuracy:64.4, val_loss:0.8881372451100336, train_accuracy:65.73333333333333, train_loss:0.8702819219049523


 90%|█████████ | 9/10 [03:21<00:22, 22.95s/it]

 val_accuracy:65.36666666666666, val_loss:0.8813333490690624, train_accuracy:66.18888888888888, train_loss:0.865449793032676


100%|██████████| 10/10 [03:45<00:00, 22.59s/it]

 val_accuracy:66.08333333333333, val_loss:0.8715185271773012, train_accuracy:66.88333333333334, train_loss:0.8577511538888045





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▇▇▇█████
train_loss,█▃▂▁▁▁▁▁▁▁
val_accuracy,▁▅▇▇▇▇████
val_loss,█▃▂▁▁▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,66.88333
train_loss,0.85775
val_accuracy,66.08333
val_loss,0.87152


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7m74vi80 with config:
[34m[1mwandb[0m: 	activation_func: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:40<06:04, 40.55s/it]

 val_accuracy:85.83333333333333, val_loss:0.41020328823593855, train_accuracy:86.59074074074074, train_loss:0.3764888716078967


 20%|██        | 2/10 [01:20<05:21, 40.17s/it]

 val_accuracy:86.55, val_loss:0.3918844369065555, train_accuracy:87.68888888888888, train_loss:0.34436588161877785


 30%|███       | 3/10 [02:01<04:43, 40.46s/it]

 val_accuracy:86.93333333333334, val_loss:0.39115673992380173, train_accuracy:87.88148148148149, train_loss:0.3390808736672873


 40%|████      | 4/10 [02:42<04:04, 40.81s/it]

 val_accuracy:86.8, val_loss:0.3978981047935957, train_accuracy:88.16666666666667, train_loss:0.3369492298724995


 50%|█████     | 5/10 [03:24<03:26, 41.20s/it]

 val_accuracy:86.8, val_loss:0.392908366038718, train_accuracy:88.65185185185184, train_loss:0.32629836229107617


 60%|██████    | 6/10 [04:05<02:44, 41.10s/it]

 val_accuracy:87.05, val_loss:0.39223632247323137, train_accuracy:88.9574074074074, train_loss:0.3185821431247941


 70%|███████   | 7/10 [04:47<02:04, 41.35s/it]

 val_accuracy:87.31666666666666, val_loss:0.38763223655200657, train_accuracy:89.42407407407407, train_loss:0.3055556985862049


 80%|████████  | 8/10 [05:28<01:22, 41.27s/it]

 val_accuracy:87.68333333333334, val_loss:0.3932105323127385, train_accuracy:89.56296296296296, train_loss:0.3013362903535378


 90%|█████████ | 9/10 [06:09<00:41, 41.19s/it]

 val_accuracy:87.41666666666667, val_loss:0.3994968691399709, train_accuracy:89.67592592592592, train_loss:0.3014907565336865


100%|██████████| 10/10 [06:51<00:00, 41.18s/it]

 val_accuracy:87.5, val_loss:0.4025652772661843, train_accuracy:89.96111111111111, train_loss:0.2946542192279613





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▄▅▆▇▇▇█
train_loss,█▅▅▅▄▃▂▂▂▁
val_accuracy,▁▄▅▅▅▆▇█▇▇
val_loss,█▂▂▄▃▂▁▃▅▆

0,1
epoch,10.0
train_accuracy,89.96111
train_loss,0.29465
val_accuracy,87.5
val_loss,0.40257


[34m[1mwandb[0m: Agent Starting Run: jpblbh7m with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005


 10%|█         | 1/10 [00:12<01:53, 12.66s/it]

 val_accuracy:83.46666666666667, val_loss:0.47451087072987547, train_accuracy:83.75740740740741, train_loss:0.45989442134063124


 20%|██        | 2/10 [00:31<02:12, 16.52s/it]

 val_accuracy:84.63333333333334, val_loss:0.45372499805071204, train_accuracy:85.21296296296296, train_loss:0.43441780439138006


 30%|███       | 3/10 [00:49<02:00, 17.25s/it]

 val_accuracy:85.5, val_loss:0.4433101610372164, train_accuracy:86.18518518518519, train_loss:0.4163802250071222


 40%|████      | 4/10 [01:08<01:45, 17.57s/it]

 val_accuracy:85.5, val_loss:0.442045688231378, train_accuracy:86.27592592592593, train_loss:0.4156589827715365


 50%|█████     | 5/10 [01:29<01:34, 18.87s/it]

 val_accuracy:85.63333333333334, val_loss:0.4766696640343645, train_accuracy:86.28703703703704, train_loss:0.43428502449642536


 60%|██████    | 6/10 [01:46<01:13, 18.45s/it]

 val_accuracy:86.0, val_loss:0.4714669960431445, train_accuracy:86.93888888888888, train_loss:0.425402496847196


 70%|███████   | 7/10 [02:06<00:56, 18.86s/it]

 val_accuracy:85.65, val_loss:0.487964479899841, train_accuracy:86.5425925925926, train_loss:0.4460085911576262


 80%|████████  | 8/10 [02:24<00:37, 18.53s/it]

 val_accuracy:86.08333333333333, val_loss:0.5122448968296072, train_accuracy:86.86481481481482, train_loss:0.4496134042376304


 90%|█████████ | 9/10 [02:43<00:18, 18.70s/it]

 val_accuracy:85.6, val_loss:0.5539709449137487, train_accuracy:86.36296296296297, train_loss:nan


100%|██████████| 10/10 [03:01<00:00, 18.14s/it]

 val_accuracy:86.21666666666667, val_loss:nan, train_accuracy:86.78518518518518, train_loss:nan





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▆▇▇█▇█▇█
train_loss,█▄▁▁▄▃▆▆
val_accuracy,▁▄▆▆▇▇▇█▆█
val_loss,▃▂▁▁▃▃▄▅█

0,1
epoch,10.0
train_accuracy,86.78519
train_loss,
val_accuracy,86.21667
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 3khwyof1 with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:11<01:40, 11.14s/it]

 val_accuracy:83.7, val_loss:0.4811351257549647, train_accuracy:83.75, train_loss:0.4607127140831745


 20%|██        | 2/10 [00:27<01:52, 14.05s/it]

 val_accuracy:85.01666666666667, val_loss:0.441163692373571, train_accuracy:85.76481481481481, train_loss:0.41891325251900835


 30%|███       | 3/10 [00:44<01:47, 15.41s/it]

 val_accuracy:85.65, val_loss:0.43389169520655924, train_accuracy:86.33888888888889, train_loss:0.404376986028582


 40%|████      | 4/10 [01:00<01:34, 15.73s/it]

 val_accuracy:85.83333333333333, val_loss:0.45138080739452524, train_accuracy:86.7611111111111, train_loss:0.41012601529466286


 50%|█████     | 5/10 [01:16<01:19, 15.84s/it]

 val_accuracy:86.26666666666667, val_loss:0.45600506007591524, train_accuracy:86.99444444444444, train_loss:0.40709614338835787


 60%|██████    | 6/10 [01:32<01:03, 15.90s/it]

 val_accuracy:86.55, val_loss:0.4571441117656915, train_accuracy:87.34444444444445, train_loss:0.408715990374669


 70%|███████   | 7/10 [01:50<00:49, 16.47s/it]

 val_accuracy:86.6, val_loss:0.4692444433599042, train_accuracy:87.46851851851852, train_loss:0.4137452025265026


 80%|████████  | 8/10 [02:06<00:32, 16.37s/it]

 val_accuracy:86.71666666666667, val_loss:0.4755059790179814, train_accuracy:87.6037037037037, train_loss:0.4180469505866634


 90%|█████████ | 9/10 [02:24<00:16, 16.81s/it]

 val_accuracy:86.33333333333333, val_loss:0.495009409428338, train_accuracy:87.28888888888889, train_loss:0.42947745911264235


100%|██████████| 10/10 [02:41<00:00, 16.15s/it]

 val_accuracy:86.0, val_loss:nan, train_accuracy:87.29814814814814, train_loss:nan





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▆▇███▇▇
train_loss,█▃▁▂▁▂▂▃▄
val_accuracy,▁▄▆▆▇███▇▆
val_loss,▆▂▁▃▄▄▅▆█

0,1
epoch,10.0
train_accuracy,87.29815
train_loss,
val_accuracy,86.0
val_loss,


[34m[1mwandb[0m: Agent Starting Run: oypv42ug with config:
[34m[1mwandb[0m: 	activation_func: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005


 10%|█         | 1/10 [00:28<04:12, 28.06s/it]

 val_accuracy:45.0, val_loss:1.3571917644642046, train_accuracy:44.9537037037037, train_loss:1.3606028620674961


 20%|██        | 2/10 [00:56<03:47, 28.42s/it]

 val_accuracy:61.483333333333334, val_loss:0.9663934672416157, train_accuracy:61.922222222222224, train_loss:0.9677588960754417


 30%|███       | 3/10 [01:25<03:19, 28.50s/it]

 val_accuracy:64.8, val_loss:0.8549532534806066, train_accuracy:65.85185185185185, train_loss:0.8549431538563718


 40%|████      | 4/10 [01:52<02:48, 28.04s/it]

 val_accuracy:67.33333333333333, val_loss:0.8117146811454352, train_accuracy:68.41851851851852, train_loss:0.8111239977661117


 50%|█████     | 5/10 [02:20<02:19, 27.91s/it]

 val_accuracy:68.41666666666667, val_loss:0.7864608569193728, train_accuracy:69.45370370370371, train_loss:0.7860200572527061


 60%|██████    | 6/10 [02:48<01:52, 28.03s/it]

 val_accuracy:69.28333333333333, val_loss:0.7657631309978994, train_accuracy:70.27777777777777, train_loss:0.7662323478297303


 70%|███████   | 7/10 [03:16<01:23, 27.95s/it]

 val_accuracy:70.21666666666667, val_loss:0.745316441649533, train_accuracy:71.12407407407407, train_loss:0.7473968464761812


 80%|████████  | 8/10 [03:47<00:57, 28.92s/it]

 val_accuracy:70.96666666666667, val_loss:0.7241136984692668, train_accuracy:71.94259259259259, train_loss:0.7281256648105405


 90%|█████████ | 9/10 [04:15<00:28, 28.56s/it]

 val_accuracy:72.11666666666666, val_loss:0.7025039833263235, train_accuracy:72.75925925925925, train_loss:0.7083703947960939


100%|██████████| 10/10 [04:43<00:00, 28.34s/it]

 val_accuracy:72.83333333333333, val_loss:0.6819187569560832, train_accuracy:73.53888888888889, train_loss:0.6891656694189152





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▇▇▇▇███
train_loss,█▄▃▂▂▂▂▁▁▁
val_accuracy,▁▅▆▇▇▇▇███
val_loss,█▄▃▂▂▂▂▁▁▁

0,1
epoch,10.0
train_accuracy,73.53889
train_loss,0.68917
val_accuracy,72.83333
val_loss,0.68192


[34m[1mwandb[0m: Agent Starting Run: 1wvj75ng with config:
[34m[1mwandb[0m: 	activation_func: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:21<03:14, 21.66s/it]

 val_accuracy:84.5, val_loss:0.45260117162757807, train_accuracy:83.9462962962963, train_loss:0.4394056155575667


 20%|██        | 2/10 [00:43<02:55, 21.97s/it]

 val_accuracy:85.75, val_loss:0.4067994350498333, train_accuracy:86.0962962962963, train_loss:0.382795738003683


 30%|███       | 3/10 [01:05<02:33, 21.99s/it]

 val_accuracy:86.4, val_loss:0.3851150591415122, train_accuracy:87.17037037037036, train_loss:0.3530686851849777


 40%|████      | 4/10 [01:27<02:11, 21.84s/it]

 val_accuracy:86.71666666666667, val_loss:0.37126304923486064, train_accuracy:87.81851851851852, train_loss:0.33264742676473


 50%|█████     | 5/10 [01:51<01:53, 22.65s/it]

 val_accuracy:87.03333333333333, val_loss:0.3612689001875675, train_accuracy:88.35185185185185, train_loss:0.3167843939271924


 60%|██████    | 6/10 [02:12<01:28, 22.23s/it]

 val_accuracy:87.33333333333333, val_loss:0.3536330568984663, train_accuracy:88.84444444444445, train_loss:0.3036835166410563


 70%|███████   | 7/10 [02:35<01:07, 22.35s/it]

 val_accuracy:87.56666666666666, val_loss:0.347813170966546, train_accuracy:89.25740740740741, train_loss:0.29263348706512404


 80%|████████  | 8/10 [02:58<00:45, 22.54s/it]

 val_accuracy:87.73333333333333, val_loss:0.3435727122604507, train_accuracy:89.53703703703704, train_loss:0.2831932645131015


 90%|█████████ | 9/10 [03:21<00:22, 22.66s/it]

 val_accuracy:87.9, val_loss:0.34060707408613927, train_accuracy:89.86111111111111, train_loss:0.2749455156741211


100%|██████████| 10/10 [03:43<00:00, 22.36s/it]

 val_accuracy:88.1, val_loss:0.3385781634011917, train_accuracy:90.15555555555555, train_loss:0.2675248276750387





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▅▆▇▇▇██
train_loss,█▆▄▄▃▂▂▂▁▁
val_accuracy,▁▃▅▅▆▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_accuracy,90.15556
train_loss,0.26752
val_accuracy,88.1
val_loss,0.33858


[34m[1mwandb[0m: Agent Starting Run: 84axqm2u with config:
[34m[1mwandb[0m: 	activation_func: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:40<06:04, 40.48s/it]

 val_accuracy:85.83333333333333, val_loss:0.41020328823593855, train_accuracy:86.59074074074074, train_loss:0.3764888716078967


 20%|██        | 2/10 [01:20<05:22, 40.32s/it]

 val_accuracy:86.55, val_loss:0.3918844369065555, train_accuracy:87.68888888888888, train_loss:0.34436588161877785


 30%|███       | 3/10 [02:01<04:44, 40.67s/it]

 val_accuracy:86.93333333333334, val_loss:0.39115673992380173, train_accuracy:87.88148148148149, train_loss:0.3390808736672873


 40%|████      | 4/10 [02:43<04:06, 41.11s/it]

 val_accuracy:86.8, val_loss:0.3978981047935957, train_accuracy:88.16666666666667, train_loss:0.3369492298724995


 50%|█████     | 5/10 [03:28<03:32, 42.43s/it]

 val_accuracy:86.8, val_loss:0.392908366038718, train_accuracy:88.65185185185184, train_loss:0.32629836229107617


 60%|██████    | 6/10 [04:09<02:47, 41.83s/it]

 val_accuracy:87.05, val_loss:0.39223632247323137, train_accuracy:88.9574074074074, train_loss:0.3185821431247941


 70%|███████   | 7/10 [04:49<02:04, 41.35s/it]

 val_accuracy:87.31666666666666, val_loss:0.38763223655200657, train_accuracy:89.42407407407407, train_loss:0.3055556985862049


 80%|████████  | 8/10 [05:29<01:21, 40.94s/it]

 val_accuracy:87.68333333333334, val_loss:0.3932105323127385, train_accuracy:89.56296296296296, train_loss:0.3013362903535378


 90%|█████████ | 9/10 [06:09<00:40, 40.70s/it]

 val_accuracy:87.41666666666667, val_loss:0.3994968691399709, train_accuracy:89.67592592592592, train_loss:0.3014907565336865


100%|██████████| 10/10 [06:50<00:00, 41.09s/it]

 val_accuracy:87.5, val_loss:0.4025652772661843, train_accuracy:89.96111111111111, train_loss:0.2946542192279613





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▄▅▆▇▇▇█
train_loss,█▅▅▅▄▃▂▂▂▁
val_accuracy,▁▄▅▅▅▆▇█▇▇
val_loss,█▂▂▄▃▂▁▃▅▆

0,1
epoch,10.0
train_accuracy,89.96111
train_loss,0.29465
val_accuracy,87.5
val_loss,0.40257


[34m[1mwandb[0m: Agent Starting Run: b4o0pkk7 with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 3
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0


 10%|█         | 1/10 [00:09<01:29,  9.96s/it]

 val_accuracy:82.58333333333333, val_loss:0.499882132600061, train_accuracy:82.64814814814815, train_loss:0.4921803117362299


 20%|██        | 2/10 [00:20<01:23, 10.49s/it]

 val_accuracy:84.73333333333333, val_loss:0.44886615889676157, train_accuracy:85.2388888888889, train_loss:0.4320030154301949


 30%|███       | 3/10 [00:30<01:11, 10.26s/it]

 val_accuracy:85.41666666666667, val_loss:0.4420975736232603, train_accuracy:85.93333333333334, train_loss:0.4199549005505863


 40%|████      | 4/10 [00:42<01:05, 10.91s/it]

 val_accuracy:85.9, val_loss:0.4459824168948349, train_accuracy:86.61666666666666, train_loss:0.41477034893513803


 50%|█████     | 5/10 [00:54<00:56, 11.21s/it]

 val_accuracy:86.16666666666667, val_loss:0.45515905875662954, train_accuracy:86.47407407407407, train_loss:0.43168200869015083


 60%|██████    | 6/10 [01:06<00:45, 11.48s/it]

 val_accuracy:85.75, val_loss:0.4684414008499416, train_accuracy:86.48518518518519, train_loss:0.44131103818893336


 70%|███████   | 7/10 [01:18<00:35, 11.67s/it]

 val_accuracy:85.8, val_loss:0.4959727120369318, train_accuracy:85.96481481481482, train_loss:0.47580414945096333


 80%|████████  | 8/10 [01:30<00:23, 11.74s/it]

 val_accuracy:84.93333333333334, val_loss:nan, train_accuracy:85.62962962962963, train_loss:nan


 90%|█████████ | 9/10 [01:42<00:11, 11.72s/it]

 val_accuracy:84.0, val_loss:nan, train_accuracy:84.89259259259259, train_loss:nan


100%|██████████| 10/10 [01:52<00:00, 11.28s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,█████████▁
train_loss,█▃▁▁▃▃▇
val_accuracy,█████████▁
val_loss,█▂▁▁▃▄█

0,1
epoch,10.0
train_accuracy,9.9963
train_loss,
val_accuracy,10.03333
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 6yr578qd with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:11<01:43, 11.51s/it]

 val_accuracy:83.46666666666667, val_loss:0.49310464436882717, train_accuracy:83.75740740740741, train_loss:0.4619603961894037


 20%|██        | 2/10 [00:29<02:03, 15.48s/it]

 val_accuracy:84.63333333333334, val_loss:0.47249133772084756, train_accuracy:85.21296296296296, train_loss:0.4365029532436173


 30%|███       | 3/10 [00:47<01:55, 16.56s/it]

 val_accuracy:85.5, val_loss:0.46230367750726514, train_accuracy:86.18518518518519, train_loss:0.4184906157260165


 40%|████      | 4/10 [01:07<01:46, 17.74s/it]

 val_accuracy:85.5, val_loss:0.46129182160612264, train_accuracy:86.27592592592593, train_loss:0.417797442035397


 50%|█████     | 5/10 [01:24<01:28, 17.66s/it]

 val_accuracy:85.63333333333334, val_loss:0.49621128335665543, train_accuracy:86.28703703703704, train_loss:0.4364563155322354


 60%|██████    | 6/10 [01:43<01:12, 18.10s/it]

 val_accuracy:86.0, val_loss:0.4913163401644175, train_accuracy:86.93888888888888, train_loss:0.4276079795273375


 70%|███████   | 7/10 [02:01<00:53, 18.00s/it]

 val_accuracy:85.65, val_loss:0.5081320533175089, train_accuracy:86.5425925925926, train_loss:0.4482494326484782


 80%|████████  | 8/10 [02:20<00:36, 18.44s/it]

 val_accuracy:86.08333333333333, val_loss:0.5327466839057454, train_accuracy:86.86481481481482, train_loss:0.4518913805794235


 90%|█████████ | 9/10 [02:40<00:18, 18.71s/it]

 val_accuracy:85.6, val_loss:0.574808735766227, train_accuracy:86.36296296296297, train_loss:nan


100%|██████████| 10/10 [02:57<00:00, 17.77s/it]

 val_accuracy:86.21666666666667, val_loss:nan, train_accuracy:86.78518518518518, train_loss:nan





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▆▇▇█▇█▇█
train_loss,█▄▁▁▄▃▆▆
val_accuracy,▁▄▆▆▇▇▇█▆█
val_loss,▃▂▁▁▃▃▄▅█

0,1
epoch,10.0
train_accuracy,86.78519
train_loss,
val_accuracy,86.21667
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0a4n88ph with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:15<02:23, 15.91s/it]

 val_accuracy:83.7, val_loss:0.47654495406797326, train_accuracy:84.38518518518518, train_loss:0.44785511826696783


 20%|██        | 2/10 [00:43<03:00, 22.53s/it]

 val_accuracy:84.76666666666667, val_loss:0.4825187272317493, train_accuracy:85.37222222222222, train_loss:0.45315215631777134


 30%|███       | 3/10 [01:09<02:50, 24.38s/it]

 val_accuracy:85.13333333333334, val_loss:0.5016661649101292, train_accuracy:85.8537037037037, train_loss:0.4559754554159153


 40%|████      | 4/10 [01:36<02:33, 25.54s/it]

 val_accuracy:85.3, val_loss:0.5634702146530678, train_accuracy:85.69444444444444, train_loss:nan


 50%|█████     | 5/10 [02:04<02:11, 26.24s/it]

 val_accuracy:83.76666666666667, val_loss:nan, train_accuracy:84.28148148148148, train_loss:nan


 60%|██████    | 6/10 [02:30<01:45, 26.34s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 70%|███████   | 7/10 [02:57<01:19, 26.51s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 80%|████████  | 8/10 [03:25<00:54, 27.02s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 90%|█████████ | 9/10 [03:52<00:26, 26.77s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


100%|██████████| 10/10 [04:18<00:00, 25.86s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,█████▁▁▁▁▁
train_loss,▁▆█
val_accuracy,█████▁▁▁▁▁
val_loss,▁▁▃█

0,1
epoch,10.0
train_accuracy,9.9963
train_loss,
val_accuracy,10.03333
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: cn0qf51k with config:
[34m[1mwandb[0m: 	activation_func: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: random_uniform
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:28<04:13, 28.19s/it]

 val_accuracy:61.15, val_loss:1.57790310363744, train_accuracy:62.257407407407406, train_loss:1.3150391724421855


 20%|██        | 2/10 [01:04<04:23, 32.88s/it]

 val_accuracy:69.73333333333333, val_loss:1.1666075448106268, train_accuracy:71.21666666666667, train_loss:0.9187884697021187


 30%|███       | 3/10 [01:40<04:01, 34.54s/it]

 val_accuracy:74.26666666666667, val_loss:1.012307973138958, train_accuracy:75.29074074074074, train_loss:0.7508531543317397


 40%|████      | 4/10 [02:18<03:34, 35.70s/it]

 val_accuracy:75.56666666666666, val_loss:0.9430566632936157, train_accuracy:77.86296296296297, train_loss:0.6639631101009094


 50%|█████     | 5/10 [02:55<03:01, 36.39s/it]

 val_accuracy:76.71666666666667, val_loss:0.893367594326633, train_accuracy:79.69814814814815, train_loss:0.5968129089064775


 60%|██████    | 6/10 [03:31<02:24, 36.20s/it]

 val_accuracy:77.81666666666666, val_loss:0.861590405859373, train_accuracy:81.13518518518518, train_loss:0.5539552706878418


 70%|███████   | 7/10 [04:08<01:48, 36.21s/it]

 val_accuracy:78.2, val_loss:0.8429261461026056, train_accuracy:82.32037037037037, train_loss:0.5196384198425901


 80%|████████  | 8/10 [04:44<01:12, 36.25s/it]

 val_accuracy:78.9, val_loss:0.8292516992664303, train_accuracy:82.88333333333334, train_loss:0.4992045756202192


 90%|█████████ | 9/10 [05:19<00:36, 36.03s/it]

 val_accuracy:79.03333333333333, val_loss:0.823956460451731, train_accuracy:83.83703703703704, train_loss:0.47167339287365945


100%|██████████| 10/10 [05:56<00:00, 35.68s/it]

 val_accuracy:79.33333333333333, val_loss:0.814487891605304, train_accuracy:84.46666666666667, train_loss:0.45169353649562144





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▇▇███
train_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▄▆▇▇▇████
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,84.46667
train_loss,0.45169
val_accuracy,79.33333
val_loss,0.81449


[34m[1mwandb[0m: Agent Starting Run: 0h5uf4sp with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:06<00:54,  6.06s/it]

 val_accuracy:80.73333333333333, val_loss:0.5663592918985295, train_accuracy:80.78703703703704, train_loss:0.5502928062089822


 20%|██        | 2/10 [00:16<01:08,  8.56s/it]

 val_accuracy:83.33333333333333, val_loss:0.4978480531011543, train_accuracy:83.70370370370371, train_loss:0.47892422940130946


 30%|███       | 3/10 [00:26<01:03,  9.07s/it]

 val_accuracy:84.21666666666667, val_loss:0.47214862929333035, train_accuracy:84.72037037037038, train_loss:0.4541046668932964


 40%|████      | 4/10 [00:37<00:59,  9.93s/it]

 val_accuracy:84.65, val_loss:0.4700074379002479, train_accuracy:85.34074074074074, train_loss:0.4424185526987476


 50%|█████     | 5/10 [00:46<00:47,  9.50s/it]

 val_accuracy:84.95, val_loss:0.47628275281966714, train_accuracy:85.65, train_loss:0.4385678877951816


 60%|██████    | 6/10 [00:56<00:39,  9.79s/it]

 val_accuracy:85.13333333333334, val_loss:0.4712057601459982, train_accuracy:85.97962962962963, train_loss:0.42964242412626075


 70%|███████   | 7/10 [01:06<00:29,  9.93s/it]

 val_accuracy:85.48333333333333, val_loss:0.4822272329147966, train_accuracy:86.19814814814815, train_loss:0.43678675935113037


 80%|████████  | 8/10 [01:16<00:19,  9.78s/it]

 val_accuracy:85.55, val_loss:0.49164408235380497, train_accuracy:86.42962962962963, train_loss:0.4381520219287328


 90%|█████████ | 9/10 [01:24<00:09,  9.39s/it]

 val_accuracy:84.93333333333334, val_loss:0.5013512090458766, train_accuracy:86.46666666666667, train_loss:0.4391680637197631


100%|██████████| 10/10 [01:34<00:00,  9.48s/it]

 val_accuracy:85.03333333333333, val_loss:0.500663035665394, train_accuracy:86.5425925925926, train_loss:0.44376157098652436





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▇▇▇████
train_loss,█▄▂▂▂▁▁▁▂▂
val_accuracy,▁▅▆▇▇▇██▇▇
val_loss,█▃▁▁▁▁▂▃▃▃

0,1
epoch,10.0
train_accuracy,86.54259
train_loss,0.44376
val_accuracy,85.03333
val_loss,0.50066


[34m[1mwandb[0m: Agent Starting Run: pmgk33q6 with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:11<01:47, 11.93s/it]

 val_accuracy:83.46666666666667, val_loss:0.49310464436882717, train_accuracy:83.75740740740741, train_loss:0.4619603961894037


 20%|██        | 2/10 [00:29<02:01, 15.18s/it]

 val_accuracy:84.63333333333334, val_loss:0.47249133772084756, train_accuracy:85.21296296296296, train_loss:0.4365029532436173


 30%|███       | 3/10 [00:47<01:56, 16.62s/it]

 val_accuracy:85.5, val_loss:0.46230367750726514, train_accuracy:86.18518518518519, train_loss:0.4184906157260165


 40%|████      | 4/10 [01:05<01:42, 17.01s/it]

 val_accuracy:85.5, val_loss:0.46129182160612264, train_accuracy:86.27592592592593, train_loss:0.417797442035397


 50%|█████     | 5/10 [01:23<01:26, 17.31s/it]

 val_accuracy:85.63333333333334, val_loss:0.49621128335665543, train_accuracy:86.28703703703704, train_loss:0.4364563155322354


 60%|██████    | 6/10 [01:41<01:10, 17.72s/it]

 val_accuracy:86.0, val_loss:0.4913163401644175, train_accuracy:86.93888888888888, train_loss:0.4276079795273375


 70%|███████   | 7/10 [01:59<00:52, 17.63s/it]

 val_accuracy:85.65, val_loss:0.5081320533175089, train_accuracy:86.5425925925926, train_loss:0.4482494326484782


 80%|████████  | 8/10 [02:18<00:36, 18.04s/it]

 val_accuracy:86.08333333333333, val_loss:0.5327466839057454, train_accuracy:86.86481481481482, train_loss:0.4518913805794235


 90%|█████████ | 9/10 [02:35<00:17, 17.90s/it]

 val_accuracy:85.6, val_loss:0.574808735766227, train_accuracy:86.36296296296297, train_loss:nan


100%|██████████| 10/10 [02:53<00:00, 17.34s/it]

 val_accuracy:86.21666666666667, val_loss:nan, train_accuracy:86.78518518518518, train_loss:nan





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▆▇▇█▇█▇█
train_loss,█▄▁▁▄▃▆▆
val_accuracy,▁▄▆▆▇▇▇█▆█
val_loss,▃▂▁▁▃▃▄▅█

0,1
epoch,10.0
train_accuracy,86.78519
train_loss,
val_accuracy,86.21667
val_loss,


[34m[1mwandb[0m: Agent Starting Run: doetwc7l with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:16<02:32, 16.91s/it]

 val_accuracy:83.7, val_loss:0.47654495406797326, train_accuracy:84.38518518518518, train_loss:0.44785511826696783


 20%|██        | 2/10 [00:42<02:58, 22.26s/it]

 val_accuracy:84.76666666666667, val_loss:0.4825187272317493, train_accuracy:85.37222222222222, train_loss:0.45315215631777134


 30%|███       | 3/10 [01:09<02:48, 24.08s/it]

 val_accuracy:85.13333333333334, val_loss:0.5016661649101292, train_accuracy:85.8537037037037, train_loss:0.4559754554159153


 40%|████      | 4/10 [01:35<02:30, 25.08s/it]

 val_accuracy:85.3, val_loss:0.5634702146530678, train_accuracy:85.69444444444444, train_loss:nan


 50%|█████     | 5/10 [02:03<02:09, 25.86s/it]

 val_accuracy:83.76666666666667, val_loss:nan, train_accuracy:84.28148148148148, train_loss:nan


 60%|██████    | 6/10 [02:29<01:43, 25.99s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 70%|███████   | 7/10 [02:55<01:18, 26.07s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 80%|████████  | 8/10 [03:21<00:52, 26.10s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 90%|█████████ | 9/10 [03:47<00:26, 26.11s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


100%|██████████| 10/10 [04:16<00:00, 25.64s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,█████▁▁▁▁▁
train_loss,▁▆█
val_accuracy,█████▁▁▁▁▁
val_loss,▁▁▃█

0,1
epoch,10.0
train_accuracy,9.9963
train_loss,
val_accuracy,10.03333
val_loss,


[34m[1mwandb[0m: Agent Starting Run: phd9yh4f with config:
[34m[1mwandb[0m: 	activation_func: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.0005


 10%|█         | 1/10 [00:17<02:35, 17.22s/it]

 val_accuracy:82.96666666666667, val_loss:0.47493935054732817, train_accuracy:82.71296296296296, train_loss:0.47683801569286616


 20%|██        | 2/10 [00:39<02:41, 20.14s/it]

 val_accuracy:85.26666666666667, val_loss:0.4082751959877661, train_accuracy:85.73148148148148, train_loss:0.39346008774845087


 30%|███       | 3/10 [01:00<02:25, 20.74s/it]

 val_accuracy:86.01666666666667, val_loss:0.3811974489646543, train_accuracy:87.07222222222222, train_loss:0.35730661004440456


 40%|████      | 4/10 [01:23<02:09, 21.64s/it]

 val_accuracy:86.58333333333333, val_loss:0.3647045234879689, train_accuracy:87.86851851851851, train_loss:0.3342434145789154


 50%|█████     | 5/10 [01:45<01:47, 21.57s/it]

 val_accuracy:86.98333333333333, val_loss:0.35325514993858376, train_accuracy:88.4574074074074, train_loss:0.3169900280894689


 60%|██████    | 6/10 [02:07<01:27, 21.80s/it]

 val_accuracy:87.16666666666667, val_loss:0.3448302665894085, train_accuracy:88.92777777777778, train_loss:0.30308670441186736


 70%|███████   | 7/10 [02:28<01:04, 21.58s/it]

 val_accuracy:87.51666666666667, val_loss:0.3384081361462772, train_accuracy:89.36481481481482, train_loss:0.2912123262606038


 80%|████████  | 8/10 [02:50<00:43, 21.70s/it]

 val_accuracy:87.7, val_loss:0.3335076993454845, train_accuracy:89.71111111111111, train_loss:0.28084951876594066


 90%|█████████ | 9/10 [03:12<00:21, 21.73s/it]

 val_accuracy:87.73333333333333, val_loss:0.3299146550005769, train_accuracy:90.0, train_loss:0.2718233638101938


100%|██████████| 10/10 [03:34<00:00, 21.41s/it]

 val_accuracy:88.01666666666667, val_loss:0.32741887274657716, train_accuracy:90.30185185185185, train_loss:0.26385060502881214





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▄▅▆▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
train_accuracy,90.30185
train_loss,0.26385
val_accuracy,88.01667
val_loss,0.32742


[34m[1mwandb[0m: Agent Starting Run: bohydg8u with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:12<01:53, 12.62s/it]

 val_accuracy:83.75, val_loss:0.4725923629357816, train_accuracy:83.96666666666667, train_loss:0.44642023413815496


 20%|██        | 2/10 [00:33<02:21, 17.67s/it]

 val_accuracy:85.78333333333333, val_loss:0.41642178080396813, train_accuracy:86.54444444444445, train_loss:0.3763328083790784


 30%|███       | 3/10 [00:54<02:12, 18.93s/it]

 val_accuracy:86.53333333333333, val_loss:0.3862161112267977, train_accuracy:88.08333333333333, train_loss:0.33381455203798954


 40%|████      | 4/10 [01:14<01:55, 19.26s/it]

 val_accuracy:87.21666666666667, val_loss:0.36915513616317647, train_accuracy:89.00185185185185, train_loss:0.3072293956456866


 50%|█████     | 5/10 [01:35<01:40, 20.01s/it]

 val_accuracy:87.3, val_loss:0.3613462829146912, train_accuracy:89.59444444444445, train_loss:0.2913969836372097


 60%|██████    | 6/10 [01:54<01:19, 19.83s/it]

 val_accuracy:87.65, val_loss:0.352170093049344, train_accuracy:90.19814814814815, train_loss:0.2738401308296437


 70%|███████   | 7/10 [02:16<01:01, 20.40s/it]

 val_accuracy:87.85, val_loss:0.34801278083138654, train_accuracy:90.61851851851851, train_loss:0.26324592984311274


 80%|████████  | 8/10 [02:37<00:41, 20.52s/it]

 val_accuracy:87.76666666666667, val_loss:0.35089617266032935, train_accuracy:90.86296296296297, train_loss:0.2544730729922286


 90%|█████████ | 9/10 [02:56<00:20, 20.24s/it]

 val_accuracy:87.86666666666666, val_loss:0.3521655925909159, train_accuracy:91.03703703703704, train_loss:0.24668594122474763


100%|██████████| 10/10 [03:17<00:00, 19.80s/it]

 val_accuracy:87.71666666666667, val_loss:0.35098649251056335, train_accuracy:91.44074074074074, train_loss:0.23526506422652





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▅▆▆▇▇▇██
train_loss,█▆▄▃▃▂▂▂▁▁
val_accuracy,▁▄▆▇▇█████
val_loss,█▅▃▂▂▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,91.44074
train_loss,0.23527
val_accuracy,87.71667
val_loss,0.35099


[34m[1mwandb[0m: Agent Starting Run: tobvnb3f with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:07<01:03,  7.04s/it]

 val_accuracy:81.13333333333334, val_loss:0.5513395560442763, train_accuracy:81.12222222222222, train_loss:0.5361065972264722


 20%|██        | 2/10 [00:19<01:20, 10.04s/it]

 val_accuracy:84.25, val_loss:0.45811494279742876, train_accuracy:84.6, train_loss:0.434089637539111


 30%|███       | 3/10 [00:33<01:22, 11.83s/it]

 val_accuracy:85.31666666666666, val_loss:0.421930513054357, train_accuracy:85.95370370370371, train_loss:0.38962297898492204


 40%|████      | 4/10 [00:45<01:12, 12.15s/it]

 val_accuracy:85.78333333333333, val_loss:0.40303934855705104, train_accuracy:86.87222222222222, train_loss:0.361432327630699


 50%|█████     | 5/10 [00:58<01:01, 12.31s/it]

 val_accuracy:86.01666666666667, val_loss:0.38901247211422607, train_accuracy:87.56851851851852, train_loss:0.3410713006211165


 60%|██████    | 6/10 [01:10<00:49, 12.28s/it]

 val_accuracy:86.26666666666667, val_loss:0.3810928844885407, train_accuracy:88.08703703703704, train_loss:0.32560028964556625


 70%|███████   | 7/10 [01:23<00:37, 12.40s/it]

 val_accuracy:86.48333333333333, val_loss:0.3788127304465774, train_accuracy:88.41481481481482, train_loss:0.31531268584249966


 80%|████████  | 8/10 [01:35<00:24, 12.41s/it]

 val_accuracy:86.66666666666667, val_loss:0.3728523725211269, train_accuracy:88.80925925925926, train_loss:0.30299853944584887


 90%|█████████ | 9/10 [01:48<00:12, 12.51s/it]

 val_accuracy:87.05, val_loss:0.3687566568458044, train_accuracy:89.1462962962963, train_loss:0.29252722665613795


100%|██████████| 10/10 [02:00<00:00, 12.07s/it]

 val_accuracy:87.33333333333333, val_loss:0.36200060023836267, train_accuracy:89.53333333333333, train_loss:0.2816803452014879





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▄▃▃▂▂▂▁▁
val_accuracy,▁▅▆▆▇▇▇▇██
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
epoch,10.0
train_accuracy,89.53333
train_loss,0.28168
val_accuracy,87.33333
val_loss,0.362


[34m[1mwandb[0m: Agent Starting Run: 1ywdcqhd with config:
[34m[1mwandb[0m: 	activation_func: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: random_uniform
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005


 10%|█         | 1/10 [00:28<04:16, 28.50s/it]

 val_accuracy:66.35, val_loss:1.1271816776905095, train_accuracy:66.79629629629629, train_loss:1.089359166418258


 20%|██        | 2/10 [01:08<04:44, 35.54s/it]

 val_accuracy:75.25, val_loss:0.7638882446787506, train_accuracy:75.72777777777777, train_loss:0.7367815204464877


 30%|███       | 3/10 [01:50<04:27, 38.27s/it]

 val_accuracy:78.31666666666666, val_loss:0.6632918308112664, train_accuracy:78.9, train_loss:0.6377061369358631


 40%|████      | 4/10 [02:33<04:01, 40.31s/it]

 val_accuracy:79.73333333333333, val_loss:0.6243425035336361, train_accuracy:80.94444444444444, train_loss:0.583768969973603


 50%|█████     | 5/10 [03:14<03:21, 40.33s/it]

 val_accuracy:80.93333333333334, val_loss:0.5860880008004259, train_accuracy:81.70370370370371, train_loss:0.5562030958724347


 60%|██████    | 6/10 [03:54<02:41, 40.29s/it]

 val_accuracy:81.11666666666666, val_loss:0.5798398881172807, train_accuracy:82.61111111111111, train_loss:0.5307902050701362


 70%|███████   | 7/10 [04:36<02:02, 40.69s/it]

 val_accuracy:81.73333333333333, val_loss:0.5760909905678469, train_accuracy:82.86296296296297, train_loss:0.5205909100531289


 80%|████████  | 8/10 [05:16<01:21, 40.76s/it]

 val_accuracy:82.4, val_loss:0.5472755982449905, train_accuracy:83.57037037037037, train_loss:0.4985223017080176


 90%|█████████ | 9/10 [05:58<00:41, 41.11s/it]

 val_accuracy:81.8, val_loss:0.5576665999641579, train_accuracy:84.01296296296296, train_loss:0.48856348867202387


100%|██████████| 10/10 [06:39<00:00, 39.93s/it]

 val_accuracy:82.71666666666667, val_loss:0.5377064264392789, train_accuracy:84.7462962962963, train_loss:0.4738980086819275





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▆▇▇▇▇███
train_loss,█▄▃▂▂▂▂▁▁▁
val_accuracy,▁▅▆▇▇▇████
val_loss,█▄▂▂▂▂▁▁▁▁

0,1
epoch,10.0
train_accuracy,84.7463
train_loss,0.4739
val_accuracy,82.71667
val_loss,0.53771


[34m[1mwandb[0m: Agent Starting Run: 5vyp63ku with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: random_uniform
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 64
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:07<01:03,  7.04s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 20%|██        | 2/10 [00:17<01:11,  8.89s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 30%|███       | 3/10 [00:25<01:01,  8.82s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 40%|████      | 4/10 [00:35<00:54,  9.06s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 50%|█████     | 5/10 [00:45<00:47,  9.48s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 60%|██████    | 6/10 [00:55<00:38,  9.61s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 70%|███████   | 7/10 [01:03<00:27,  9.19s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 80%|████████  | 8/10 [01:13<00:18,  9.46s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


 90%|█████████ | 9/10 [01:23<00:09,  9.63s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan


100%|██████████| 10/10 [01:33<00:00,  9.34s/it]

 val_accuracy:10.033333333333333, val_loss:nan, train_accuracy:9.996296296296297, train_loss:nan





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,9.9963
train_loss,
val_accuracy,10.03333
val_loss,


[34m[1mwandb[0m: Agent Starting Run: oagnqjsm with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005


 10%|█         | 1/10 [00:18<02:44, 18.32s/it]

 val_accuracy:83.8, val_loss:0.4753171289694281, train_accuracy:84.01481481481481, train_loss:0.4651079516452722


 20%|██        | 2/10 [00:36<02:24, 18.10s/it]

 val_accuracy:85.06666666666666, val_loss:0.44877484830425535, train_accuracy:85.53518518518518, train_loss:0.4313687441626594


 30%|███       | 3/10 [00:55<02:10, 18.63s/it]

 val_accuracy:85.61666666666666, val_loss:0.4474163479887624, train_accuracy:86.35925925925926, train_loss:0.41911491938983103


 40%|████      | 4/10 [01:13<01:49, 18.24s/it]

 val_accuracy:85.7, val_loss:0.4674199947777682, train_accuracy:86.5037037037037, train_loss:0.4221387680339004


 50%|█████     | 5/10 [01:31<01:31, 18.35s/it]

 val_accuracy:85.56666666666666, val_loss:0.493850417900115, train_accuracy:86.5962962962963, train_loss:0.4403530670968574


 60%|██████    | 6/10 [01:49<01:12, 18.16s/it]

 val_accuracy:85.98333333333333, val_loss:0.4865092104273047, train_accuracy:87.1537037037037, train_loss:0.43215500984596966


 70%|███████   | 7/10 [02:08<00:55, 18.50s/it]

 val_accuracy:85.95, val_loss:0.5118822859795168, train_accuracy:87.55185185185185, train_loss:0.4425512446587938


 80%|████████  | 8/10 [02:27<00:37, 18.70s/it]

 val_accuracy:86.21666666666667, val_loss:0.515977597958248, train_accuracy:87.55925925925926, train_loss:nan


 90%|█████████ | 9/10 [02:45<00:18, 18.41s/it]

 val_accuracy:85.9, val_loss:nan, train_accuracy:87.35555555555555, train_loss:nan


100%|██████████| 10/10 [03:04<00:00, 18.44s/it]

 val_accuracy:85.71666666666667, val_loss:nan, train_accuracy:87.22222222222223, train_loss:nan





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▆▆▆▇███▇
train_loss,█▃▁▁▄▃▅
val_accuracy,▁▅▆▇▆▇▇█▇▇
val_loss,▄▁▁▃▆▅██

0,1
epoch,10.0
train_accuracy,87.22222
train_loss,
val_accuracy,85.71667
val_loss,


[34m[1mwandb[0m: Agent Starting Run: qyj63xgs with config:
[34m[1mwandb[0m: 	activation_func: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: random_uniform
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:41<06:16, 41.85s/it]

 val_accuracy:70.9, val_loss:1.1625231691765734, train_accuracy:71.8, train_loss:0.9314835023179933


 20%|██        | 2/10 [01:22<05:31, 41.42s/it]

 val_accuracy:76.53333333333333, val_loss:0.9454042661070151, train_accuracy:77.41296296296296, train_loss:0.7203229181697168


 30%|███       | 3/10 [02:03<04:46, 40.91s/it]

 val_accuracy:78.76666666666667, val_loss:0.8628898293978075, train_accuracy:79.93333333333334, train_loss:0.6342236190142199


 40%|████      | 4/10 [02:43<04:03, 40.60s/it]

 val_accuracy:80.23333333333333, val_loss:0.8361178040621694, train_accuracy:80.91296296296296, train_loss:0.5944603964857289


 50%|█████     | 5/10 [03:26<03:27, 41.42s/it]

 val_accuracy:80.78333333333333, val_loss:0.8117636023183462, train_accuracy:82.07962962962964, train_loss:0.5711294974093132


 60%|██████    | 6/10 [04:08<02:46, 41.57s/it]

 val_accuracy:82.03333333333333, val_loss:0.7912742397664027, train_accuracy:82.81481481481481, train_loss:0.5444130126994232


 70%|███████   | 7/10 [04:48<02:03, 41.21s/it]

 val_accuracy:81.96666666666667, val_loss:0.7809078434878591, train_accuracy:83.24444444444444, train_loss:0.5312903755456201


 80%|████████  | 8/10 [05:28<01:21, 40.92s/it]

 val_accuracy:81.96666666666667, val_loss:0.7734706585957944, train_accuracy:83.72407407407407, train_loss:0.5157858268095462


 90%|█████████ | 9/10 [06:08<00:40, 40.64s/it]

 val_accuracy:82.06666666666666, val_loss:0.769369982270701, train_accuracy:84.31296296296296, train_loss:0.503429996682942


100%|██████████| 10/10 [06:50<00:00, 41.01s/it]

 val_accuracy:82.81666666666666, val_loss:0.7599214759146459, train_accuracy:85.04814814814814, train_loss:0.4895098349796663





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▄▆▆▇█████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,10.0
train_accuracy,85.04815
train_loss,0.48951
val_accuracy,82.81667
val_loss,0.75992


[34m[1mwandb[0m: Agent Starting Run: lm4fq6e9 with config:
[34m[1mwandb[0m: 	activation_func: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005


 10%|█         | 1/10 [00:14<02:06, 14.08s/it]

 val_accuracy:83.4, val_loss:0.46578137269632974, train_accuracy:84.02222222222223, train_loss:0.4466217795938441


 20%|██        | 2/10 [00:38<02:42, 20.25s/it]

 val_accuracy:85.33333333333333, val_loss:0.40388801654315404, train_accuracy:86.29629629629629, train_loss:0.37627538217647793


 30%|███       | 3/10 [01:01<02:29, 21.36s/it]

 val_accuracy:86.25, val_loss:0.3797371270844235, train_accuracy:87.5537037037037, train_loss:0.33839133094001705


 40%|████      | 4/10 [01:23<02:10, 21.76s/it]

 val_accuracy:87.11666666666666, val_loss:0.36231010382964934, train_accuracy:88.4, train_loss:0.3131848605914198


 50%|█████     | 5/10 [01:47<01:52, 22.46s/it]

 val_accuracy:87.06666666666666, val_loss:0.3543389736586773, train_accuracy:89.12962962962963, train_loss:0.2951260758409558


 60%|██████    | 6/10 [02:11<01:31, 22.93s/it]

 val_accuracy:87.43333333333334, val_loss:0.34851356936579525, train_accuracy:89.57962962962964, train_loss:0.28076671431269307


 70%|███████   | 7/10 [02:34<01:09, 23.03s/it]

 val_accuracy:87.68333333333334, val_loss:0.34746181262359693, train_accuracy:89.91851851851852, train_loss:0.2713553324225681


 80%|████████  | 8/10 [02:56<00:45, 22.72s/it]

 val_accuracy:87.83333333333333, val_loss:0.34384918254514507, train_accuracy:90.51481481481481, train_loss:0.2545549630692424


 90%|█████████ | 9/10 [03:19<00:22, 22.85s/it]

 val_accuracy:87.7, val_loss:0.3461219368794528, train_accuracy:90.88333333333334, train_loss:0.24284499539747192


100%|██████████| 10/10 [03:43<00:00, 22.35s/it]

 val_accuracy:87.73333333333333, val_loss:0.349675672753118, train_accuracy:91.27962962962962, train_loss:0.23386724804933398





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▆▇▇██
train_loss,█▆▄▄▃▃▂▂▁▁
val_accuracy,▁▄▅▇▇▇████
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,91.27963
train_loss,0.23387
val_accuracy,87.73333
val_loss,0.34968


[34m[1mwandb[0m: Agent Starting Run: izpv6e55 with config:
[34m[1mwandb[0m: 	activation_func: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: random_uniform
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.0005


 10%|█         | 1/10 [00:42<06:22, 42.53s/it]

 val_accuracy:70.9, val_loss:0.9377349948064075, train_accuracy:71.8, train_loss:0.906507038499086


 20%|██        | 2/10 [01:23<05:34, 41.87s/it]

 val_accuracy:76.53333333333333, val_loss:0.7207490427964304, train_accuracy:77.41296296296296, train_loss:0.695361226690763


 30%|███       | 3/10 [02:05<04:51, 41.66s/it]

 val_accuracy:78.76666666666667, val_loss:0.6383116427387923, train_accuracy:79.93333333333334, train_loss:0.6092704871632182


 40%|████      | 4/10 [02:45<04:05, 41.00s/it]

 val_accuracy:80.23333333333333, val_loss:0.6115604901815161, train_accuracy:80.91296296296296, train_loss:0.569509583832323


 50%|█████     | 5/10 [03:25<03:22, 40.56s/it]

 val_accuracy:80.78333333333333, val_loss:0.5872289047981702, train_accuracy:82.07962962962964, train_loss:0.5461811976848492


 60%|██████    | 6/10 [04:04<02:41, 40.31s/it]

 val_accuracy:82.03333333333333, val_loss:0.5667278064886262, train_accuracy:82.81481481481481, train_loss:0.5194634090018925


 70%|███████   | 7/10 [04:46<02:02, 40.83s/it]

 val_accuracy:81.96666666666667, val_loss:0.5563242493287478, train_accuracy:83.24444444444444, train_loss:0.5063366428612744


 80%|████████  | 8/10 [05:27<01:21, 40.85s/it]

 val_accuracy:81.96666666666667, val_loss:0.5488578063523312, train_accuracy:83.72407407407407, train_loss:0.4908288432269392


 90%|█████████ | 9/10 [06:10<00:41, 41.32s/it]

 val_accuracy:82.06666666666666, val_loss:0.5446855746417286, train_accuracy:84.31296296296296, train_loss:0.478465062501945


100%|██████████| 10/10 [06:49<00:00, 40.99s/it]

 val_accuracy:82.81666666666666, val_loss:0.5351745559139378, train_accuracy:85.04814814814814, train_loss:0.46453795497958766





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▆▇▇▇██
train_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▄▆▆▇█████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,10.0
train_accuracy,85.04815
train_loss,0.46454
val_accuracy,82.81667
val_loss,0.53517


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wwhhpy6y with config:
[34m[1mwandb[0m: 	activation_func: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	method: xavier
[34m[1mwandb[0m: 	num_hidden_layers: 5
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	size_of_hidden_layer: 128
[34m[1mwandb[0m: 	weight_decay: 0.05


 10%|█         | 1/10 [00:40<06:07, 40.85s/it]

 val_accuracy:85.83333333333333, val_loss:0.41020328823593855, train_accuracy:86.59074074074074, train_loss:0.3764888716078967


 20%|██        | 2/10 [01:21<05:26, 40.84s/it]

 val_accuracy:86.55, val_loss:0.3918844369065555, train_accuracy:87.68888888888888, train_loss:0.34436588161877785
