<a href="https://colab.research.google.com/github/subikkshas/DA6401/blob/main/Q4to6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from keras.datasets import fashion_mnist
import numpy as np
from  matplotlib import pyplot as plt
import time
import math
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm

In [None]:
import wandb
!wandb login

[34m[1mwandb[0m: Currently logged in as: [33msubikksha[0m ([33msubikksha-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
dataset= fashion_mnist.load_data()
(X_train_and_validation, y_train_and_validation), (X_test, y_test) = dataset
X_train, X_validation, y_train, y_validation = train_test_split(X_train_and_validation, y_train_and_validation, test_size=0.1, random_state=42)
X_train = (X_train/255.0).astype(np.float32)
X_validation = (X_validation/255.0).astype(np.float32)
X_test = (X_test/255.0).astype(np.float32)

print("Train Dataset Shape: ", X_train.shape)
print("Train Target Vector Shape: ", y_train.shape)
print("Test Dataset Shape:", X_test.shape)
print("Test Target Vector Shape", y_test.shape)
print("Validation Dataset Shape:", X_validation.shape)
print("Validation Target Vector Shape", y_validation.shape)




Train Dataset Shape:  (54000, 28, 28)
Train Target Vector Shape:  (54000,)
Test Dataset Shape: (10000, 28, 28)
Test Target Vector Shape (10000,)
Validation Dataset Shape: (6000, 28, 28)
Validation Target Vector Shape (6000,)


In [None]:
X_train = np.array(X_train.reshape(X_train.shape[0], 784,1))
X_test = np.array(X_test.reshape(X_test.shape[0], 784,1))
X_validation = np.array(X_validation.reshape(X_validation.shape[0], 784,1))

In [None]:
def layer_init(arr,n1,n2,init_type):
    np.random.seed(10)
    if init_type=="random":
        arr.append(np.random.randn(n1,n2)*0.1)
    elif init_type=="xavier":
        arr.append(np.random.randn(n1,n2)*np.sqrt(2/(n1+n2)))
    return arr

def param(layers,init_type):
    W=[]
    B=[]
    for i in range(len(layers)-1):
        W=layer_init(W,layers[i+1],layers[i],init_type)
        B=layer_init(B,layers[i+1],1,init_type)
    return W,B

#Activation function
def activation(activation_function):
    if activation_function == 'sigmoid':
        return sigmoid
    if activation_function == 'tanh':
        return tanh
    if activation_function == 'ReLU':
        return relu

def sigmoid(x, derivative = False):
    if derivative:
        return sigmoid(x)*(1-sigmoid(x))
    return 1/(1 + np.exp(-x))

def tanh(x, derivative = False):
    if derivative:
        return 1 - tanh(x)**2
    return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))

def relu(x, derivative = False):
    if derivative:
        return (x>0)*1
    return x*(x>0)

def softmax(x,derivative = False):
    if derivative:
        return softmax(x)*(1- softmax(x))
    return np.exp(x)/np.sum(np.exp(x), axis = 0)

def one_hot(y, num_output_nodes):
    v = np.zeros((num_output_nodes, len(y)))
    for i,j in enumerate(y):
        v[j,i] = 1
    return v


def forward(x, W, B, activation_type):
    h = []
    a = []
    sigma = activation(activation_type)  #activation
    h.append(x)   #h0 = x
    a.append(np.dot(W[0], h[0]) + B[0])
    for i in range(len(W)-1):
        h.append(sigma(a[-1]))
        a.append(np.dot(W[i+1], h[-1]) + B[i+1])
    y_hat = softmax(a[-1])

    return y_hat, h, a



def loss(y,y_hat,l_type,W,reg,n_class):
    if l_type=='cross_entropy':
        err=-1*np.sum(np.multiply(one_hot(y,n_class),np.log(y_hat)))/one_hot(y,n_class).shape[1]
    elif l_type=='squared_error':
        err=np.sum((one_hot(y,n_class)-y_hat)**2)/(2*one_hot(y,n_class)).shape[1]

    if W:
        r=0
        for i in range(len(W)):
            r+=np.sum((np.array(W,dtype=object)**2)[i])
        err=err+reg*r
    return err

def eval_acc(y_hat, y_true):
    return np.mean(np.argmax(y_hat, axis = 0) ==y_true )*100


In [None]:
def back_prop(x, y, y_hat, a, h , W, B, batch_size,l_type,act_type):
    grad_h,grad_a,grad_W,grad_B = [0]*len(h),[0]*len(a),[0]*len(W),[0]*len(B)
    sigma = activation(act_type)

    if l_type == "cross_entropy":
        grad_h[-1] = -1*(y/y_hat)
        grad_a[-1] = -1*(y-y_hat)
    if l_type == "squared_error":   ##### edit this
        grad_h[-1] = y_hat - y
        grad_a[-1] = (y_hat - y)*softmax(a[-1])*(1-softmax(a[-1]))

    for i in range(len(W)-1, -1, -1):
        grad_W[i] = np.dot(grad_a[i], h[i].T)
        grad_B[i] = np.dot(grad_a[i], np.ones((grad_a[i].shape[1], 1)))
        if i > 0:
            grad_h[i-1] = np.dot(W[i].T, grad_a[i])
            grad_a[i-1]  = np.multiply(grad_h[i-1],sigma(a[i-1], derivative = True))

    return grad_W, grad_B, grad_h, grad_a

In [None]:
def sgd_step(W,B,grad_W,grad_B,lr,reg):
    W=np.array(W,dtype=object)
    B=np.array(B,dtype=object)
    W-=lr*reg*W+lr*np.array(grad_W,dtype=object)
    B-=lr*reg*B+lr*np.array(grad_B,dtype=object)

    return W.tolist(),B.tolist()



def momentum_step(w, b, gW, gB, lr=0.001, gamma=0.9, reg=0):
    params = {'w': w, 'b': b}

    Wmoments = [np.zeros_like(p) for p in params['w']]
    Bmoments = [np.zeros_like(p) for p in params['b']]

    Wmoments = gamma * np.array(Wmoments,dtype=object) + lr * np.array(gW,dtype=object)
    W = (1 - lr * reg) * np.array(params['w'],dtype=object) - Wmoments
    Wmoments = Wmoments.tolist()

    Bmoments = gamma * np.array(Bmoments,dtype=object) + lr * np.array(gB,dtype=object)
    B = (1 - lr * reg) * np.array(params['b'],dtype=object) - Bmoments
    Bmoments = Bmoments.tolist()

    return W.tolist(), B.tolist()


def RMSprop_step(w, b, gW, gB, lr=0.01, beta=0.99):
    params = {'w': w, 'b': b}

    vW = [np.zeros_like(p) for p in params['w']]
    vB = [np.zeros_like(p) for p in params['b']]

    vW = beta * np.array(vW, dtype=object) + (1 - beta) * (np.array(gW, dtype=object) ** 2)
    W = np.array(params['w'], dtype=object) - (lr / ((vW + 1e-7) ** 0.5)) * np.array(gW, dtype=object)

    vB = beta * np.array(vB, dtype=object) + (1 - beta) * (np.array(gB, dtype=object) ** 2)
    B = np.array(params['b'], dtype=object) - (lr / ((vB + 1e-7) ** 0.5)) * np.array(gB, dtype=object)

    return W.tolist(), B.tolist()



In [None]:
import wandb

def train(X_train, y_train, x_val, y_val, num_inputs_nodes, hidden_layers, out_num, init_type, epochs,
          batch_size, l_type, act_type, op_name, lr_rate, reg):

    # Ensure WandB starts fresh
    wandb.finish()

    run = wandb.init(project="DA6401-Assignment-1", name="training-run", reinit=True)

    in_node = [num_inputs_nodes]
    layers = []
    layers.extend(in_node)
    layers.extend(hidden_layers)
    layers.extend([out_num])

    W, B = param(layers, init_type)
    N = X_train.shape[0]
    n_batches = int(np.floor(N / batch_size))

    for epoch in range(epochs):
        train_loss, train_accuracy, val_loss, val_accuracy = [], [], [], []
        l, acc, ds, steps = 0, 0, 0, 1

        while ds < N:
            mini_batch_size = min((N - ds), batch_size)
            x = np.squeeze(X_train[ds:ds + mini_batch_size]).T
            y = one_hot(y_train[ds:ds + mini_batch_size], out_num)

            y_hat, h, a = forward(x, W, B, act_type)
            grad_W, grad_B, grad_h, grad_a = back_prop(x, y, y_hat, a, h, W, B, batch_size, l_type, act_type)

            if op_name == 'sgd':
                W, B = sgd_step(W, B, grad_W, grad_B, lr_rate, reg)
            elif op_name == 'momentum':
                W, B = momentum_step(W, B, grad_W, grad_B, lr_rate, reg)
            elif op_name == 'rmsprop':
                W, B = RMSprop_step(W, B, grad_W, grad_B, lr_rate, reg)

            l += loss(y_train[ds:ds + mini_batch_size], y_hat, l_type, W, reg, out_num)
            acc += eval_acc(y_hat, y_train[ds:ds + mini_batch_size])

            steps += 1
            ds += batch_size

        l /= (n_batches + mini_batch_size)
        acc /= steps

        train_loss.append(l)
        train_accuracy.append(acc)

        y_val_hat, _, _ = forward(np.squeeze(x_val).T, W, B, act_type)
        val_acc = eval_acc(y_val_hat, y_val)
        val_l = loss(y_val, y_val_hat, l_type, W=None, reg=reg, n_class=out_num)

        val_accuracy.append(val_acc)
        val_loss.append(val_l)

        wandb.log({"epoch": epoch, "Train_loss": l, "Train_acc": acc, "val_loss": val_l, "val_Accuracy": val_acc})

        #print(f"Epoch {epoch}: Train Loss = {l:.4f}, Train Accuracy = {acc:.4f}, Val Loss = {val_l:.4f}, Val Accuracy = {val_acc:.4f}")

    wandb.finish()  # Explicitly finish the WandB run

    return W, B, train_loss, train_accuracy, val_loss, val_accuracy


In [None]:
hidden_layers=[64,32,16]
init_type='random'
epochs=10
batch_size=50
loss_type='squared_error'
optimizer_name='rmsprop'
learning_rate=1e-3
reg_lamda=0
activation_type='sigmoid'

In [None]:
wandb.init(project="DA6401-Assignment-1")
fit=train(X_train, y_train, X_validation, y_validation,784,hidden_layers, 10, init_type, epochs, batch_size,
                                                                 loss_type, activation_type, optimizer_name, learning_rate,
                                                                 reg_lamda)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msubikksha[0m ([33msubikksha-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▅▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▆▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
Train_acc,84.07956
Train_loss,0.2313
epoch,9.0
val_Accuracy,83.65
val_loss,0.24556


In [None]:
import wandb

In [None]:
sweep_configuration = {'method'    : "random",  #Other available--> grid, bayes
                       'metric'    : {'name': 'val_Accuracy','goal':'maximize'},
                       'parameters': {'epochs':{'values':[ 5, 10]},
                                      'hidden_layers':{'values':[[64,32],[64,32,16],[128,64,32],[128,64,32,16],[128,32,32,16]] }, #change needed
                                      'learning_rate':{'values':[1e-3,1e-4]},
                                      'weight_decay':{'values':[0, 0.0005, 0.5]},
                                      'optimizer_name': {'values':['sgd', 'momentum', 'rmsprop', 'nesterov','adam']}, #, 'nesterov','adam', 'nadam'
                                      'batch_size':{'values':[16,32,64]},
                                      'init_type': {'values':['random','xavier']},
                                      'activation_type':{'values':['sigmoid','tanh','ReLU']},
                                      'loss_type': {'values':['cross_entropy']} }}

In [None]:
def sweep_train():
    hyperparameters = dict(epochs=5,
                           hidden_layers=[64, 32],
                           learning_rate=1e-4,
                           weight_decay=0,
                           optimizer_name='sgd',
                           batch_size=16,
                           init_type='random',
                           activation_type='sigmoid',
                           loss_type='cross_entropy',
                           reg_lamda=0)

    wandb.init(project="DA6401-Assignment-1", entity="subikksha", config=hyperparameters)
    config = wandb.config
    epochs = config.epochs
    hidden_layers = config.hidden_layers
    learning_rate = config.learning_rate
    weight_decay = config.weight_decay
    optimizer_name = config.optimizer_name
    batch_size = config.batch_size
    init_type = config.init_type
    activation_type = config.activation_type
    loss_type = config.loss_type
    reg_lamda = config.reg_lamda
    wandb.run.name = "e_{}_hl_{}_lr_{}_wd_{}_o_{}_bs_{}_winit_{}_ac_{}_los_{}_r_{}".format(epochs, \
                                                                                           hidden_layers, \
                                                                                           learning_rate, \
                                                                                           weight_decay, \
                                                                                           optimizer_name, \
                                                                                           batch_size, \
                                                                                           init_type, \
                                                                                           activation_type, \
                                                                                           loss_type, \
                                                                                           reg_lamda)

    _, _, train_loss, train_accuracy, val_loss, val_accuracy = train(X_train, y_train, X_validation, y_validation, 784,
                                                                     hidden_layers, 10, init_type, epochs, batch_size,
                                                                     loss_type, activation_type, optimizer_name,
                                                                     learning_rate, reg_lamda)

In [None]:
import wandb

def sweep_train():
    wandb.finish()  # Ensure no previous session is hanging

    hyperparameters = dict(
        epochs=5,
        hidden_layers=[128, 64, 32],
        learning_rate=1e-3,
        weight_decay=0.0005,
        optimizer_name='nesterov',
        batch_size=32,
        init_type='xavier',
        activation_type='ReLU',
        loss_type='cross_entropy',
        reg_lamda=0
    )

    run = wandb.init(project="DA6401-Assignment-1", config=hyperparameters)

    config = wandb.config
    wandb.run.name = f"hl_{len(config.hidden_layers)}_bs_{config.batch_size}_ac_{config.activation_type}"

    _, _, train_loss, train_accuracy, val_loss, val_accuracy = train(
        X_train, y_train, X_validation, y_validation, 784,
        config.hidden_layers, 10, config.init_type, config.epochs, config.batch_size,
        config.loss_type, config.activation_type, config.optimizer_name,
        config.learning_rate, config.reg_lamda
    )

    wandb.finish()


In [None]:
sweep_id = wandb.sweep(sweep_configuration,project="DA6401-Assignment-1")
wandb.agent(sweep_id,function=sweep_train,project="DA6401-Assignment-1",count=100)

Create sweep with ID: rzaqqqtp
Sweep URL: https://wandb.ai/subikksha-indian-institute-of-technology-madras/DA6401-Assignment-1/sweeps/rzaqqqtp


[34m[1mwandb[0m: Agent Starting Run: iis6e49w with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,10.00037
Train_loss,2.43545
epoch,4.0
val_Accuracy,9.95
val_loss,2.46971


[34m[1mwandb[0m: Agent Starting Run: tlo3abpm with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,11.57359
Train_loss,2.17743
epoch,9.0
val_Accuracy,11.61667
val_loss,2.29914


[34m[1mwandb[0m: Agent Starting Run: r7zitqzh with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▃▁▂▂▃▄▅▆▇█
Train_loss,█▄▄▃▃▂▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▇▁▅▆▇█████
val_loss,█▇▆▅▅▄▃▃▂▁

0,1
Train_acc,11.90274
Train_loss,2.18018
epoch,9.0
val_Accuracy,10.33333
val_loss,2.30131


[34m[1mwandb[0m: Agent Starting Run: koeoxxri with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▅▆▇▇▇████
Train_loss,█▁▂▁▂▃▄▅▅▅
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▆▆▆██▇▆
val_loss,▂▁▁▃▅▇▄▅██

0,1
Train_acc,85.79495
Train_loss,0.53149
epoch,9.0
val_Accuracy,84.83333
val_loss,0.57407


[34m[1mwandb[0m: Agent Starting Run: zdsmzd16 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▆▇▇▇▇████
Train_loss,█▃▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▆▆▇█████
val_loss,█▅▃▂▂▁▁▁▁▁

0,1
Train_acc,88.40529
Train_loss,0.31679
epoch,9.0
val_Accuracy,86.75
val_loss,0.37853


[34m[1mwandb[0m: Agent Starting Run: miq8e0s3 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▃▅▆▆▇▇███
Train_loss,█▅▄▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▄▅▆▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
Train_acc,79.81794
Train_loss,0.59958
epoch,9.0
val_Accuracy,79.85
val_loss,0.59721


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: f5j8xm4v with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,10.00037
Train_loss,2.44052
epoch,4.0
val_Accuracy,9.95
val_loss,2.47489


[34m[1mwandb[0m: Agent Starting Run: 8pmnel5f with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▆▇▇▇█████
Train_loss,▇▁▂▃▄▅▅▆▇█
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▄▅▆▆▇▇▇█
val_loss,▁▁▂▃▅▅▆▇██

0,1
Train_acc,85.17847
Train_loss,0.75381
epoch,9.0
val_Accuracy,85.26667
val_loss,0.77393


[34m[1mwandb[0m: Agent Starting Run: 1xhqdf84 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▇▇██
Train_loss,█▂▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▆▇█
val_loss,█▄▂▁▁

0,1
Train_acc,83.82734
Train_loss,0.47739
epoch,4.0
val_Accuracy,83.95
val_loss,0.47942


[34m[1mwandb[0m: Agent Starting Run: c8rr3722 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▃▅▇██▇▇▇█
Train_loss,██▇▇▇▇▆▅▃▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▅███▇▆▆▆█
val_loss,████▇▇▆▅▃▁

0,1
Train_acc,31.99978
Train_loss,1.91459
epoch,9.0
val_Accuracy,33.93333
val_loss,1.85954


[34m[1mwandb[0m: Agent Starting Run: 6c1zwktu with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▃▅▆▇▇▇███
Train_loss,█▇▄▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▅▆▆▇▇████
val_loss,█▅▃▂▂▂▂▁▁▁

0,1
Train_acc,77.79524
Train_loss,0.58283
epoch,9.0
val_Accuracy,78.11667
val_loss,0.59646


[34m[1mwandb[0m: Agent Starting Run: dt8ywjkk with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▄▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▆▆▆▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
Train_acc,89.06898
Train_loss,0.29849
epoch,9.0
val_Accuracy,87.98333
val_loss,0.33073


[34m[1mwandb[0m: Agent Starting Run: strpjb2b with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,10.00037
Train_loss,2.54737
epoch,4.0
val_Accuracy,9.95
val_loss,2.57336


[34m[1mwandb[0m: Agent Starting Run: 03qgmqmw with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,11.30847
Train_loss,2.31015
epoch,9.0
val_Accuracy,11.56667
val_loss,2.32829


[34m[1mwandb[0m: Agent Starting Run: usk7fjge with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▆▇▇▇█████
Train_loss,█▃▂▂▂▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▆▇▇▇█████
val_loss,█▄▂▂▁▁▁▁▁▁

0,1
Train_acc,89.2614
Train_loss,0.29594
epoch,9.0
val_Accuracy,86.9
val_loss,0.37292


[34m[1mwandb[0m: Agent Starting Run: lbtrgqhf with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▇███
Train_loss,█▁▂▃▃
epoch,▁▃▅▆█
val_Accuracy,▁▇▆▇█
val_loss,▁▁▆▆█

0,1
Train_acc,84.33427
Train_loss,0.62456
epoch,4.0
val_Accuracy,84.78333
val_loss,0.63018


[34m[1mwandb[0m: Agent Starting Run: ngniij51 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,10.00259
Train_loss,2.30882
epoch,4.0
val_Accuracy,9.95
val_loss,2.31714


[34m[1mwandb[0m: Agent Starting Run: wmevk4jy with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▂▂▄▆▆▇▇██
Train_loss,█▆▅▄▃▃▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▂▅▆▇▇▇██
val_loss,█▆▆▅▄▃▂▂▁▁

0,1
Train_acc,68.22423
Train_loss,0.96151
epoch,9.0
val_Accuracy,67.98333
val_loss,0.94041


[34m[1mwandb[0m: Agent Starting Run: sjwf1ulp with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,10.03821
Train_loss,2.18608
epoch,4.0
val_Accuracy,10.0
val_loss,2.30475


[34m[1mwandb[0m: Agent Starting Run: b20puay4 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▄▄▅▆▆▇▇██
Train_loss,█▇▆▅▄▃▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▂▃▄▅▆▇▇██
val_loss,█▇▆▅▄▃▂▂▁▁

0,1
Train_acc,64.13955
Train_loss,1.09749
epoch,9.0
val_Accuracy,65.28333
val_loss,1.13138


[34m[1mwandb[0m: Agent Starting Run: grij922z with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▅▆▇█
Train_loss,█▅▃▂▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▆▇█
val_loss,█▆▄▂▁

0,1
Train_acc,63.30669
Train_loss,0.92821
epoch,4.0
val_Accuracy,65.86667
val_loss,0.88486


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wo33mxim with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▇▇███████
Train_loss,█▁▁▁▂▂▂▂▃▃
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▅▆▇▇▇████
val_loss,▄▁▂▃▄▄▅▆██

0,1
Train_acc,84.57309
Train_loss,0.72367
epoch,9.0
val_Accuracy,83.9
val_loss,0.74431


[34m[1mwandb[0m: Agent Starting Run: lig8l0pu with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,8.63862
Train_loss,2.34914
epoch,9.0
val_Accuracy,9.05
val_loss,2.36695


[34m[1mwandb[0m: Agent Starting Run: 9hav8p9s with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▂▃▅█
Train_loss,█▅▃▂▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▃▅█
val_loss,█▅▄▂▁

0,1
Train_acc,51.15878
Train_loss,1.4876
epoch,4.0
val_Accuracy,56.38333
val_loss,1.52542


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 41u8ox1p with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▅▆▇▇█████
Train_loss,█▄▃▂▂▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▆▇▇▇███
val_loss,█▅▃▂▂▂▁▁▁▁

0,1
Train_acc,84.25104
Train_loss,0.43767
epoch,9.0
val_Accuracy,84.33333
val_loss,0.43392


[34m[1mwandb[0m: Agent Starting Run: jqu56ftl with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


  grad_h[-1] = -1*(y/y_hat)
  err=-1*np.sum(np.multiply(one_hot(y,n_class),np.log(y_hat)))/one_hot(y,n_class).shape[1]
  err=-1*np.sum(np.multiply(one_hot(y,n_class),np.log(y_hat)))/one_hot(y,n_class).shape[1]
  return np.exp(x)/np.sum(np.exp(x), axis = 0)


0,1
Train_acc,▇███████▄▁
Train_loss,█▁▂▄▆█
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,████████▁▁
val_loss,▁▁▂▂▄▅█

0,1
Train_acc,9.99384
Train_loss,
epoch,9.0
val_Accuracy,9.95
val_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: za6rl4y3 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,10.00259
Train_loss,2.5087
epoch,9.0
val_Accuracy,9.95
val_loss,2.51474


[34m[1mwandb[0m: Agent Starting Run: zhfz2o33 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,10.00037
Train_loss,2.44052
epoch,9.0
val_Accuracy,9.95
val_loss,2.47489


[34m[1mwandb[0m: Agent Starting Run: yiq9a90e with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▅▆▇▇▇████
Train_loss,█▄▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▆▆▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
Train_acc,84.5932
Train_loss,0.41536
epoch,9.0
val_Accuracy,84.53333
val_loss,0.43511


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5ucrrfhr with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,10.00037
Train_loss,2.44042
epoch,4.0
val_Accuracy,9.95
val_loss,2.47428


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: g11t9t5h with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,10.00259
Train_loss,2.4455
epoch,4.0
val_Accuracy,9.95
val_loss,2.46971


[34m[1mwandb[0m: Agent Starting Run: 712sehjt with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,10.01887
Train_loss,2.44779
epoch,9.0
val_Accuracy,9.95
val_loss,2.46959


[34m[1mwandb[0m: Agent Starting Run: b88yd1og with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▆▇█
val_loss,█▄▂▂▁

0,1
Train_acc,82.43466
Train_loss,0.50379
epoch,4.0
val_Accuracy,82.63333
val_loss,0.51647


[34m[1mwandb[0m: Agent Starting Run: 2zp51j3f with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▅▆▆▇▇████
Train_loss,█▄▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▃▅▆▆▇▇███
val_loss,█▅▃▃▂▂▁▁▁▁

0,1
Train_acc,83.40919
Train_loss,0.46397
epoch,9.0
val_Accuracy,83.68333
val_loss,0.45505


[34m[1mwandb[0m: Agent Starting Run: 7bl5id06 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▆▆▇▇▇████
Train_loss,█▄▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▆▇▇▇███
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
Train_acc,84.64127
Train_loss,0.424
epoch,9.0
val_Accuracy,84.23333
val_loss,0.45124


[34m[1mwandb[0m: Agent Starting Run: ngv92og9 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,10.00037
Train_loss,2.64377
epoch,9.0
val_Accuracy,9.95
val_loss,2.66088


[34m[1mwandb[0m: Agent Starting Run: 8xw2asux with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▆▇▇▇█████
Train_loss,█▃▂▂▂▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▆▇▇▇█████
val_loss,█▄▂▂▁▁▁▁▁▁

0,1
Train_acc,89.07359
Train_loss,0.28682
epoch,9.0
val_Accuracy,87.41667
val_loss,0.36914


[34m[1mwandb[0m: Agent Starting Run: 7f04njx2 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▂▆▇█
Train_loss,█▄▃▂▁
epoch,▁▃▅▆█
val_Accuracy,▁▇▇██
val_loss,█▇▅▄▁

0,1
Train_acc,29.11545
Train_loss,2.26673
epoch,4.0
val_Accuracy,23.25
val_loss,2.27116


[34m[1mwandb[0m: Agent Starting Run: 0gjauz6e with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▄▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▆▆▇▇████
val_loss,█▅▃▃▂▁▁▁▁▁

0,1
Train_acc,89.6234
Train_loss,0.26254
epoch,9.0
val_Accuracy,88.03333
val_loss,0.33615


[34m[1mwandb[0m: Agent Starting Run: h3cruu99 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,10.04478
Train_loss,2.28747
epoch,9.0
val_Accuracy,10.0
val_loss,2.30475


[34m[1mwandb[0m: Agent Starting Run: qzfzyntd with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▆▇▇▇█████
Train_loss,█▃▂▂▂▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▆▆▆▇▇███
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
Train_acc,84.63218
Train_loss,0.43289
epoch,9.0
val_Accuracy,84.6
val_loss,0.43794


[34m[1mwandb[0m: Agent Starting Run: yy9tmomk with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▅▆▇█
Train_loss,█▇▅▃▁
epoch,▁▃▅▆█
val_Accuracy,▁▃▄▆█
val_loss,█▇▅▃▁

0,1
Train_acc,50.77894
Train_loss,1.63389
epoch,4.0
val_Accuracy,53.36667
val_loss,1.57499


[34m[1mwandb[0m: Agent Starting Run: rt4dtfzp with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▄▅▇█
val_loss,█▆▄▃▁

0,1
Train_acc,86.29922
Train_loss,0.39932
epoch,4.0
val_Accuracy,86.35
val_loss,0.40893


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fkq3dg76 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▅▆▆▇▇████
Train_loss,█▅▄▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▃▄▅▆▇▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
Train_acc,81.59948
Train_loss,0.50999
epoch,9.0
val_Accuracy,81.71667
val_loss,0.53647


[34m[1mwandb[0m: Agent Starting Run: 3s5jotvm with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▆▇▇█
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▇██
val_loss,█▄▃▂▁

0,1
Train_acc,87.637
Train_loss,0.34016
epoch,4.0
val_Accuracy,86.93333
val_loss,0.35241


[34m[1mwandb[0m: Agent Starting Run: 8435xs57 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▆▇▇█
Train_loss,█▅▃▂▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▆▇█
val_loss,█▅▃▂▁

0,1
Train_acc,77.5678
Train_loss,0.68184
epoch,4.0
val_Accuracy,79.03333
val_loss,0.69193


[34m[1mwandb[0m: Agent Starting Run: 6xd0yy4u with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,██▆▁▁
Train_loss,▁
epoch,▁▃▅▆█
val_Accuracy,██▁▁▁
val_loss,▁

0,1
Train_acc,10.00037
Train_loss,
epoch,4.0
val_Accuracy,9.95
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 8lhi30bd with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▅▆▆▇▇████
Train_loss,█▄▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▃▅▆▆▇▇███
val_loss,█▅▃▃▂▂▁▁▁▁

0,1
Train_acc,83.37278
Train_loss,0.44371
epoch,9.0
val_Accuracy,83.65
val_loss,0.45507


[34m[1mwandb[0m: Agent Starting Run: jvbr6ufu with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,9.99384
Train_loss,2.19385
epoch,4.0
val_Accuracy,9.95
val_loss,2.31994


[34m[1mwandb[0m: Agent Starting Run: ezd8x3wr with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,9.83412
Train_loss,2.32752
epoch,4.0
val_Accuracy,9.8
val_loss,2.33897


[34m[1mwandb[0m: Agent Starting Run: ypcu9ca5 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▅▇▇█
Train_loss,█▄▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▆▇█
val_loss,█▄▂▂▁

0,1
Train_acc,70.70382
Train_loss,0.7759
epoch,4.0
val_Accuracy,73.2
val_loss,0.74334


[34m[1mwandb[0m: Agent Starting Run: itmmht2h with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▄▅▆▆▇▇███
Train_loss,█▆▅▃▃▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▅▆▇▇███
val_loss,█▆▅▄▃▂▂▁▁▁

0,1
Train_acc,78.38773
Train_loss,0.59478
epoch,9.0
val_Accuracy,78.75
val_loss,0.58931


[34m[1mwandb[0m: Agent Starting Run: 80wm0rvr with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,█▆▁▁▁
Train_loss,▁
epoch,▁▃▅▆█
val_Accuracy,█▁▁▁▁
val_loss,▁

0,1
Train_acc,10.00259
Train_loss,
epoch,4.0
val_Accuracy,9.95
val_loss,


[34m[1mwandb[0m: Agent Starting Run: 5ptcw10t with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▃▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▆▆▇██▇██
val_loss,█▄▃▂▂▁▁▂▁▁

0,1
Train_acc,88.94477
Train_loss,0.30275
epoch,9.0
val_Accuracy,87.33333
val_loss,0.37881


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 033eotlr with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▄▃▂▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▅▆▆▆▇▇███
val_loss,█▄▃▃▂▂▁▁▁▁

0,1
Train_acc,89.80536
Train_loss,0.28166
epoch,9.0
val_Accuracy,86.71667
val_loss,0.3608


[34m[1mwandb[0m: Agent Starting Run: eh6ti817 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▆▆▇▇▇████
Train_loss,█▄▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▃▅▆▆▇▇▇██
val_loss,█▅▃▃▂▂▁▁▁▁

0,1
Train_acc,85.35434
Train_loss,0.4237
epoch,9.0
val_Accuracy,85.0
val_loss,0.4279


[34m[1mwandb[0m: Agent Starting Run: 213q7q08 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▂▄▆█
Train_loss,█▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▇███
val_loss,█▅▄▂▁

0,1
Train_acc,12.9274
Train_loss,2.28058
epoch,4.0
val_Accuracy,20.18333
val_loss,2.30029


[34m[1mwandb[0m: Agent Starting Run: v06f6njo with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,10.00037
Train_loss,2.64377
epoch,4.0
val_Accuracy,9.95
val_loss,2.66088


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zspgal8p with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,10.00037
Train_loss,2.5054
epoch,4.0
val_Accuracy,9.95
val_loss,2.52141


[34m[1mwandb[0m: Agent Starting Run: 4g4vdz3x with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▆▇█
val_loss,█▄▃▂▁

0,1
Train_acc,79.75044
Train_loss,0.57618
epoch,4.0
val_Accuracy,80.58333
val_loss,0.56159


[34m[1mwandb[0m: Agent Starting Run: kwf29jlx with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▅▇▇█
Train_loss,█▄▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▆▇█
val_loss,█▄▂▂▁

0,1
Train_acc,70.70382
Train_loss,0.7759
epoch,4.0
val_Accuracy,73.2
val_loss,0.74334


[34m[1mwandb[0m: Agent Starting Run: powpzgah with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,9.99199
Train_loss,2.63953
epoch,4.0
val_Accuracy,9.95
val_loss,2.77984


[34m[1mwandb[0m: Agent Starting Run: z6giaknm with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▃▅▇█
Train_loss,█▅▄▃▁
epoch,▁▃▅▆█
val_Accuracy,▁▅███
val_loss,█▇▅▄▁

0,1
Train_acc,30.94287
Train_loss,2.24765
epoch,4.0
val_Accuracy,33.51667
val_loss,2.25962


[34m[1mwandb[0m: Agent Starting Run: n5xzwewy with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▄▃▂▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▅▆▆▇███
val_loss,█▄▃▃▂▂▁▁▁▁

0,1
Train_acc,89.43047
Train_loss,0.27675
epoch,9.0
val_Accuracy,87.68333
val_loss,0.34699


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: w8rilbfv with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,11.58045
Train_loss,2.27843
epoch,9.0
val_Accuracy,11.61667
val_loss,2.29914


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6pwvybii with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▄▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▆▆▆▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
Train_acc,89.06898
Train_loss,0.29849
epoch,9.0
val_Accuracy,87.98333
val_loss,0.33073


[34m[1mwandb[0m: Agent Starting Run: 9iijqo98 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,9.99384
Train_loss,2.18889
epoch,9.0
val_Accuracy,9.95
val_loss,2.31299


[34m[1mwandb[0m: Agent Starting Run: 0z1dzvaw with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▃▆▇█
Train_loss,█▅▃▂▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▇▇█
val_loss,█▅▃▂▁

0,1
Train_acc,62.28908
Train_loss,1.01177
epoch,4.0
val_Accuracy,64.71667
val_loss,0.96104


[34m[1mwandb[0m: Agent Starting Run: nkj4a4f5 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,11.5836
Train_loss,2.28784
epoch,9.0
val_Accuracy,11.61667
val_loss,2.29914


[34m[1mwandb[0m: Agent Starting Run: nz7vmalu with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▆▇█
val_loss,█▄▂▁▁

0,1
Train_acc,81.83467
Train_loss,0.50604
epoch,4.0
val_Accuracy,82.71667
val_loss,0.49757


[34m[1mwandb[0m: Agent Starting Run: 6glogyyk with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,10.00037
Train_loss,2.44042
epoch,4.0
val_Accuracy,9.95
val_loss,2.47428


[34m[1mwandb[0m: Agent Starting Run: 6hy5rt3b with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,10.00259
Train_loss,2.4455
epoch,9.0
val_Accuracy,9.95
val_loss,2.46971


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: k6ftspem with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,10.00037
Train_loss,2.29481
epoch,9.0
val_Accuracy,9.95
val_loss,2.31914


[34m[1mwandb[0m: Agent Starting Run: 8pmkym9k with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,█▁▃▅█
Train_loss,█▂▂▁▁
epoch,▁▃▅▆█
val_Accuracy,█▁▅▇█
val_loss,█▅▄▂▁

0,1
Train_acc,9.33615
Train_loss,2.28237
epoch,4.0
val_Accuracy,9.85
val_loss,2.30238


[34m[1mwandb[0m: Agent Starting Run: uukwd0x2 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▇██
val_loss,█▄▃▂▁

0,1
Train_acc,87.16306
Train_loss,0.34994
epoch,4.0
val_Accuracy,87.01667
val_loss,0.37109


[34m[1mwandb[0m: Agent Starting Run: jhqn0o7q with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▂▄▆█
Train_loss,██▅▃▁
epoch,▁▃▅▆█
val_Accuracy,▁▂▄▇█
val_loss,█▇▄▂▁

0,1
Train_acc,44.56087
Train_loss,1.36967
epoch,4.0
val_Accuracy,49.91667
val_loss,1.30564


[34m[1mwandb[0m: Agent Starting Run: e573jau5 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,10.00259
Train_loss,2.54153
epoch,4.0
val_Accuracy,9.95
val_loss,2.56469


[34m[1mwandb[0m: Agent Starting Run: 0nmxmr89 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,8.64188
Train_loss,2.35883
epoch,9.0
val_Accuracy,9.05
val_loss,2.36695


[34m[1mwandb[0m: Agent Starting Run: efexj24k with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▅▁▂▄▅▆▆▇▇█
Train_loss,█▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁█▃▃▃▃▃▃▃▃
val_loss,█▃▂▂▂▂▂▁▁▁

0,1
Train_acc,10.06328
Train_loss,2.28233
epoch,9.0
val_Accuracy,10.36667
val_loss,2.30242


[34m[1mwandb[0m: Agent Starting Run: 880lp7v3 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,11.93165
Train_loss,2.65136
epoch,4.0
val_Accuracy,11.9
val_loss,2.65856


[34m[1mwandb[0m: Agent Starting Run: bcxn1q2u with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▄▃▂▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▆▆▇▇▇██
val_loss,█▄▃▂▂▁▁▁▁▁

0,1
Train_acc,89.79376
Train_loss,0.28459
epoch,9.0
val_Accuracy,87.21667
val_loss,0.36138


[34m[1mwandb[0m: Agent Starting Run: oamwnpfz with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,11.58045
Train_loss,2.27843
epoch,9.0
val_Accuracy,11.61667
val_loss,2.29914


[34m[1mwandb[0m: Agent Starting Run: cmdxi956 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,10.04702
Train_loss,2.29693
epoch,9.0
val_Accuracy,10.0
val_loss,2.30475


[34m[1mwandb[0m: Agent Starting Run: 6dy32qlb with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▅▇██
val_loss,█▄▂▁▁

0,1
Train_acc,86.85428
Train_loss,0.36079
epoch,4.0
val_Accuracy,86.51667
val_loss,0.37868


[34m[1mwandb[0m: Agent Starting Run: zgtpdwo3 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,9.43171
Train_loss,2.54407
epoch,9.0
val_Accuracy,9.41667
val_loss,2.69338


[34m[1mwandb[0m: Agent Starting Run: jb09cyb7 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▅▆▇▇█████
Train_loss,█▃▂▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▆▇▇█████
val_loss,█▄▂▂▁▁▁▁▁▁

0,1
Train_acc,82.64588
Train_loss,0.53098
epoch,9.0
val_Accuracy,82.18333
val_loss,0.53486


[34m[1mwandb[0m: Agent Starting Run: kvc2b2hs with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▅▇██
Train_loss,█▄▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▆▇▇█
val_loss,█▂▂▁▁

0,1
Train_acc,71.13633
Train_loss,0.76604
epoch,4.0
val_Accuracy,72.16667
val_loss,0.73686


[34m[1mwandb[0m: Agent Starting Run: xih22r95 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,10.00259
Train_loss,2.4455
epoch,4.0
val_Accuracy,9.95
val_loss,2.46971


[34m[1mwandb[0m: Agent Starting Run: pm9xxemm with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,10.00259
Train_loss,2.30667
epoch,4.0
val_Accuracy,9.95
val_loss,2.31834


[34m[1mwandb[0m: Agent Starting Run: jsvzxito with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 32, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,10.0211
Train_loss,2.45788
epoch,4.0
val_Accuracy,9.95
val_loss,2.46959


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2wcgadk2 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: nesterov
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▁▁▁▁▁▁▁▁▁
Train_loss,▁▁▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
Train_acc,10.00259
Train_loss,2.55787
epoch,9.0
val_Accuracy,9.95
val_loss,2.57336


[34m[1mwandb[0m: Agent Starting Run: 6e42auna with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,10.00074
Train_loss,2.77335
epoch,4.0
val_Accuracy,9.95
val_loss,2.77984


[34m[1mwandb[0m: Agent Starting Run: z07ev5ar with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,10.00259
Train_loss,2.31545
epoch,4.0
val_Accuracy,9.95
val_loss,2.32373


[34m[1mwandb[0m: Agent Starting Run: dovrfqsb with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▆▇██
Train_loss,█▃▂▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▆▇██
val_loss,█▃▂▁▁

0,1
Train_acc,85.87648
Train_loss,0.36778
epoch,4.0
val_Accuracy,86.05
val_loss,0.38505


[34m[1mwandb[0m: Agent Starting Run: whx6mko0 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32, 16]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▂▄▅█
Train_loss,█▆▅▄▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,█▇▆▄▁

0,1
Train_acc,10.36859
Train_loss,2.18117
epoch,4.0
val_Accuracy,10.31667
val_loss,2.30074


[34m[1mwandb[0m: Agent Starting Run: ld4oh4ue with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32, 16]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▄▃▂▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▅▆▆▇▇████
val_loss,█▅▃▂▂▂▁▁▁▁

0,1
Train_acc,89.37907
Train_loss,0.29626
epoch,9.0
val_Accuracy,87.28333
val_loss,0.35183


[34m[1mwandb[0m: Agent Starting Run: fqu8a020 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▅▆▇▇▇▇███
Train_loss,█▄▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▆▇▇▇███
val_loss,█▅▃▃▂▂▁▁▁▁

0,1
Train_acc,89.49267
Train_loss,0.28802
epoch,9.0
val_Accuracy,88.06667
val_loss,0.33101


[34m[1mwandb[0m: Agent Starting Run: b8o3kwih with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5


0,1
Train_acc,▁▆▇▇▇█████
Train_loss,█▁▁▂▃▃▄▅▅▆
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▅▅▆▆▇▇███
val_loss,▂▁▂▃▄▅▅▆▇█

0,1
Train_acc,84.43054
Train_loss,0.73137
epoch,9.0
val_Accuracy,83.93333
val_loss,0.78327


[34m[1mwandb[0m: Agent Starting Run: 1i154ash with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: adam
[34m[1mwandb[0m: 	weight_decay: 0


0,1
Train_acc,▁▁▁▁▁
Train_loss,▁▁▁▁▁
epoch,▁▃▅▆█
val_Accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
Train_acc,7.06224
Train_loss,2.83865
epoch,4.0
val_Accuracy,6.76667
val_loss,2.85756


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mhwejke0 with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer_name: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
Train_acc,▁▅▆▆▇▇▇███
Train_loss,█▄▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▄▅▆▆▇████
val_loss,█▅▄▃▂▂▁▁▁▁

0,1
Train_acc,89.6888
Train_loss,0.28365
epoch,9.0
val_Accuracy,87.83333
val_loss,0.33159
