<a href="https://colab.research.google.com/github/manglesh001/DL-assigment1/blob/main/DL_Ass1_Q3_Q4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [41]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import fashion_mnist
from sklearn.preprocessing import OneHotEncoder
import wandb

In [25]:
# Load the Fashion-MNIST dataset
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [26]:
x_train = x_train.reshape(x_train.shape[0], -1) / 255.0
x_test = x_test.reshape(x_test.shape[0], -1) / 255.0

In [27]:
# One-hot encode the labels
encoder = OneHotEncoder(sparse_output=False)
y_train = encoder.fit_transform(y_train.reshape(-1, 1))
y_test = encoder.transform(y_test.reshape(-1, 1))

In [28]:
# Activation functions and derivatives
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

In [29]:
#tanh activation and its derivative
def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x) ** 2

In [30]:
#relu activation and its derivative

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

In [31]:
# Weight Initialization
def initialize_weights(layers, method="random"):
    weights = []
    biases = []
    for i in range(len(layers) - 1):
        if method == "xavier":
            weights.append(np.random.randn(layers[i], layers[i+1]) * np.sqrt(1 / layers[i]))
        else:  # random
            weights.append(np.random.randn(layers[i], layers[i+1]) * 0.01)
        biases.append(np.zeros((1, layers[i+1])))
    return weights, biases

In [32]:
# Forward Propagation
def forward_propagation(X, weights, biases, activation):
    activations = [X]
    zs = []

    for i in range(len(weights)):
        z = np.dot(activations[-1], weights[i]) + biases[i]
        zs.append(z)

        if activation[i] == "sigmoid":
            activations.append(sigmoid(z))
        elif activation[i] == "relu":
            activations.append(relu(z))
        elif activation[i] == "tanh":
            activations.append(tanh(z))

    return activations, zs

In [33]:
# Backpropagation
def backpropagation(y, activations, zs, weights, activation):
    gradients_w = [None] * len(weights)
    gradients_b = [None] * len(weights)

    # Output layer error
    error = activations[-1] - y

    for i in reversed(range(len(weights))):
        if activation[i] == "sigmoid":
            delta = error * sigmoid_derivative(activations[i+1])
        elif activation[i] == "relu":
            delta = error * relu_derivative(activations[i+1])
        elif activation[i] == "tanh":
            delta = error * tanh_derivative(activations[i+1])

        gradients_w[i] = np.dot(activations[i].T, delta)
        gradients_b[i] = np.sum(delta, axis=0, keepdims=True)

        error = np.dot(delta, weights[i].T)

    return gradients_w, gradients_b

In [34]:
#Optimizer Momentum
def momentum(weights, biases, gradients_w, gradients_b, lr, velocity, beta=0.9):
    for i in range(len(weights)):
        velocity[i] = beta * velocity[i] + (1 - beta) * gradients_w[i]
        weights[i] -= lr * velocity[i]
        biases[i] -= lr * gradients_b[i]
    return weights, biases, velocity

In [35]:
#Adam Optimizer
def adam(weights, biases, gradients_w, gradients_b, lr, m_w, v_w, m_b, v_b, beta1=0.9, beta2=0.999, epsilon=1e-8, t=1):
    for i in range(len(weights)):
        # Update for weights
        m_w[i] = beta1 * m_w[i] + (1 - beta1) * gradients_w[i]
        v_w[i] = beta2 * v_w[i] + (1 - beta2) * (gradients_w[i] ** 2)
        m_hat_w = m_w[i] / (1 - beta1 ** t)
        v_hat_w = v_w[i] / (1 - beta2 ** t)
        weights[i] -= lr * m_hat_w / (np.sqrt(v_hat_w) + epsilon)

        # Update for biases
        m_b[i] = beta1 * m_b[i] + (1 - beta1) * gradients_b[i]
        v_b[i] = beta2 * v_b[i] + (1 - beta2) * (gradients_b[i] ** 2)
        m_hat_b = m_b[i] / (1 - beta1 ** t)
        v_hat_b = v_b[i] / (1 - beta2 ** t)
        biases[i] -= lr * m_hat_b / (np.sqrt(v_hat_b) + epsilon)

    return weights, biases, m_w, v_w, m_b, v_b

In [36]:
#optimizer Nesterov
def nesterov(weights, biases, gradients_w, gradients_b, lr, velocity, beta=0.9):
    for i in range(len(weights)):
        temp_velocity = beta * velocity[i]
        weights[i] -= lr * (beta * temp_velocity + (1 - beta) * gradients_w[i])
        biases[i] -= lr * gradients_b[i]
        velocity[i] = temp_velocity + (1 - beta) * gradients_w[i]
    return weights, biases, velocity

In [37]:
#RMSProp Otimizer
def rmsprop(weights, biases, gradients_w, gradients_b, lr, cache_w, cache_b, beta=0.99, epsilon=1e-8):
    for i in range(len(weights)):
        # Update cache for weights
        cache_w[i] = beta * cache_w[i] + (1 - beta) * (gradients_w[i] ** 2)
        weights[i] -= lr * gradients_w[i] / (np.sqrt(cache_w[i]) + epsilon)

        # Update cache for biases
        cache_b[i] = beta * cache_b[i] + (1 - beta) * (gradients_b[i] ** 2)
        biases[i] -= lr * gradients_b[i] / (np.sqrt(cache_b[i]) + epsilon)

    return weights, biases, cache_w, cache_b


In [18]:
#Nadam Optimizer

# def nadam(weights, biases, gradients_w, gradients_b, lr, m, v, beta1=0.9, beta2=0.999, epsilon=1e-8, t=1):
#     for i in range(len(weights)):
#         m[i] = beta1 * m[i] + (1 - beta1) * gradients_w[i]
#         v[i] = beta2 * v[i] + (1 - beta2) * (gradients_w[i] ** 2)
#         m_hat = (beta1 * m[i] + (1 - beta1) * gradients_w[i]) / (1 - beta1 ** t)
#         v_hat = v[i] / (1 - beta2 ** t)
#         weights[i] -= lr * m_hat / (np.sqrt(v_hat) + epsilon)
#         biases[i] -= lr * gradients_b[i] / (np.sqrt(v_hat) + epsilon)
#     return weights, biases, m, v

In [38]:
# Optimizers SGD
def sgd(weights, biases, gradients_w, gradients_b, lr):
    for i in range(len(weights)):
        weights[i] -= lr * gradients_w[i]
        biases[i] -= lr * gradients_b[i]
    return weights, biases

In [40]:
#Load Fashion-MNIST dataset
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], -1) / 255.0
X_test = X_test.reshape(X_test.shape[0], -1) / 255.0

y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]

In [42]:

def train_network(X_train, y_train, X_val, y_val, config):
    np.random.seed(42)
    layers = [X_train.shape[1]] + [config['hidden_size']] * config['hidden_layers'] + [10]
    activation = [config['activation']] * config['hidden_layers'] + ['sigmoid']

    weights, biases = initialize_weights(layers, config['weight_init'])
    optimizer = config['optimizer']

    velocity = [np.zeros_like(w) for w in weights]
    cache_w = [np.zeros_like(w) for w in weights]
    cache_b = [np.zeros_like(b) for b in biases]
    m_w = [np.zeros_like(w) for w in weights]  # For weights
    v_w = [np.zeros_like(w) for w in weights]  # For weights
    m_b = [np.zeros_like(b) for b in biases]   # For biases
    v_b = [np.zeros_like(b) for b in biases]   # For biases

    batch_size = config['batch_size']
    epochs = config['epochs']
    lr = config['learning_rate']

    for epoch in range(epochs):
        indices = np.random.permutation(X_train.shape[0])
        X_train_shuffled, y_train_shuffled = X_train[indices], y_train[indices]

        train_loss = 0
        train_correct = 0
        train_total = 0

        for i in range(0, X_train_shuffled.shape[0], batch_size):
            X_batch = X_train_shuffled[i:i+batch_size]
            y_batch = y_train_shuffled[i:i+batch_size]

            # Forward propagation
            activations, zs = forward_propagation(X_batch, weights, biases, activation)

            # Calculate training loss (cross-entropy loss)
            output = activations[-1]

            train_loss += -np.sum(y_batch * np.log(output + 1e-8)) / len(y_batch)
            #calculate mean squared error
           # train_loss += np.mean(np.square(activations[-1] - y_batch))

            # Calculate training accuracy
            train_preds = np.argmax(output, axis=1)
            train_true = np.argmax(y_batch, axis=1)
            train_correct += np.sum(train_preds == train_true)
            train_total += len(y_batch)

            # Backpropagation
            gradients_w, gradients_b = backpropagation(y_batch, activations, zs, weights, activation)

            # Update weights and biases based on optimizer
            if optimizer == 'sgd':
                weights, biases = sgd(weights, biases, gradients_w, gradients_b, lr)
            elif optimizer == 'momentum':
                weights, biases, velocity = momentum(weights, biases, gradients_w, gradients_b, lr, velocity)
            elif optimizer == 'nesterov':
                weights, biases, velocity = nesterov(weights, biases, gradients_w, gradients_b, lr, velocity)
            elif optimizer == 'rmsprop':
                weights, biases, cache_w, cache_b = rmsprop(weights, biases, gradients_w, gradients_b, lr, cache_w, cache_b)
            elif optimizer == 'adam':
                weights, biases, m_w, v_w, m_b, v_b = adam(weights, biases, gradients_w, gradients_b, lr, m_w, v_w, m_b, v_b, t=epoch+1)
            # elif optimizer == 'nadam':
            #     pass

        # Calculate average training loss and accuracy for the epoch
        train_loss /= (X_train_shuffled.shape[0] // batch_size)
        train_accuracy = train_correct / train_total

        # Validate model
        val_activations, _ = forward_propagation(X_val, weights, biases, activation)
        val_loss = -np.sum(y_val * np.log(val_activations[-1] + 1e-8)) / len(y_val)
        val_accuracy = np.mean(np.argmax(val_activations[-1], axis=1) == np.argmax(y_val, axis=1))

        # Log metrics to wandb
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_accuracy": train_accuracy,
            "val_loss": val_loss,
            "val_accuracy": val_accuracy
        })

        # Print metrics
        print(f"Epoch {epoch + 1}/{epochs}, "
              f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, "
              f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")

    return

In [43]:
# Split training data into train and validation
X_train, X_val = X_train[:54000], X_train[54000:]
y_train, y_val = y_train[:54000], y_train[54000:]

In [44]:
# Hyperparameter tuning with wandb
sweep_config = {
    'method': 'random',
    'metric': {'name': 'val_accuracy', 'goal': 'maximize'},
    'parameters': {
        'epochs': {'values': [5, 10]},
        'hidden_layers': {'values': [3, 4, 5]},
        'hidden_size': {'values': [32, 64, 128]},
        'weight_decay': {'values': [0, 0.0005, 0.5]},
        'learning_rate': {'values': [1e-3, 1e-4]},
        'optimizer': {'values': ['sgd', 'momentum', 'nesterov', 'rmsprop', 'adam', 'nadam']},
        'batch_size': {'values': [16, 32, 64]},
        'weight_init': {'values': ['random', 'xavier']},
        'activation': {'values': ['sigmoid', 'tanh', 'relu']}
    }
}

In [45]:
#wandb initize and sweep rename  call train
def train():
    wandb.init()

    sweep_name = (
        f"hl_{wandb.config.hidden_layers}_"
        f"hs_{wandb.config.hidden_size}_"
        f"bs_{wandb.config.batch_size}_"
        f"ac_{wandb.config.activation}_"
        f"opt_{wandb.config.optimizer}_"
        f"lr_{wandb.config.learning_rate}_"
        f"init_{wandb.config.weight_init}"
    )
    wandb.run.name = sweep_name

    config = wandb.config
    train_network(X_train, y_train, X_val, y_val, config)

In [46]:
sweep_id = wandb.sweep(sweep_config, project="fashion-mnist")
wandb.agent(sweep_id, function=train, count=20)
wandb.finish()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: wk1e0itn
Sweep URL: https://wandb.ai/mangleshpatidar2233-iit-madras-alumni-association/fashion-mnist/sweeps/wk1e0itn


[34m[1mwandb[0m: Agent Starting Run: ilbxb9a6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier
[34m[1mwandb[0m: Currently logged in as: [33mmangleshpatidar2233[0m ([33mmangleshpatidar2233-iit-madras-alumni-association[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/5, Train Loss: 2.4835, Train Accuracy: 0.1443, Val Loss: 3.0635, Val Accuracy: 0.1773
Epoch 2/5, Train Loss: 2.9456, Train Accuracy: 0.2178, Val Loss: 2.8786, Val Accuracy: 0.1983
Epoch 3/5, Train Loss: 2.8599, Train Accuracy: 0.2136, Val Loss: 2.9924, Val Accuracy: 0.2512
Epoch 4/5, Train Loss: 3.0924, Train Accuracy: 0.3097, Val Loss: 3.2699, Val Accuracy: 0.3432
Epoch 5/5, Train Loss: 3.2640, Train Accuracy: 0.3722, Val Loss: 3.3433, Val Accuracy: 0.3995


0,1
epoch,▁▃▅▆█
train_accuracy,▁▃▃▆█
train_loss,▁▅▄▆█
val_accuracy,▁▂▃▆█
val_loss,▄▁▃▇█

0,1
epoch,5.0
train_accuracy,0.37219
train_loss,3.26395
val_accuracy,0.3995
val_loss,3.3433


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: aa9yj2oz with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/5, Train Loss: 2.2451, Train Accuracy: 0.2535, Val Loss: 2.6418, Val Accuracy: 0.2653
Epoch 2/5, Train Loss: 2.5191, Train Accuracy: 0.2932, Val Loss: 2.4454, Val Accuracy: 0.3317
Epoch 3/5, Train Loss: 2.3627, Train Accuracy: 0.3862, Val Loss: 2.3435, Val Accuracy: 0.4438
Epoch 4/5, Train Loss: 2.2329, Train Accuracy: 0.4518, Val Loss: 2.1645, Val Accuracy: 0.4658
Epoch 5/5, Train Loss: 1.9482, Train Accuracy: 0.4996, Val Loss: 1.7302, Val Accuracy: 0.5773


0,1
epoch,▁▃▅▆█
train_accuracy,▁▂▅▇█
train_loss,▅█▆▄▁
val_accuracy,▁▂▅▅█
val_loss,█▆▆▄▁

0,1
epoch,5.0
train_accuracy,0.49965
train_loss,1.94822
val_accuracy,0.57733
val_loss,1.73018


[34m[1mwandb[0m: Agent Starting Run: f5pay41u with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1/10, Train Loss: 7.8140, Train Accuracy: 0.2941, Val Loss: 8.1334, Val Accuracy: 0.4878
Epoch 2/10, Train Loss: 7.8656, Train Accuracy: 0.5053, Val Loss: 8.0094, Val Accuracy: 0.5010
Epoch 3/10, Train Loss: 6.9948, Train Accuracy: 0.5258, Val Loss: 6.2232, Val Accuracy: 0.5362
Epoch 4/10, Train Loss: 6.0058, Train Accuracy: 0.5619, Val Loss: 5.9453, Val Accuracy: 0.5717
Epoch 5/10, Train Loss: 3.7303, Train Accuracy: 0.6534, Val Loss: 2.4914, Val Accuracy: 0.7188
Epoch 6/10, Train Loss: 2.4497, Train Accuracy: 0.7320, Val Loss: 2.5210, Val Accuracy: 0.7180
Epoch 7/10, Train Loss: 2.4220, Train Accuracy: 0.7413, Val Loss: 2.4508, Val Accuracy: 0.7393
Epoch 8/10, Train Loss: 2.3977, Train Accuracy: 0.7470, Val Loss: 2.4220, Val Accuracy: 0.7450
Epoch 9/10, Train Loss: 2.3781, Train Accuracy: 0.7542, Val Loss: 2.4145, Val Accuracy: 0.7493
Epoch 10/10, Train Loss: 2.3547, Train Accuracy: 0.7601, Val Loss: 2.3942, Val Accuracy: 0.7522


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▄▅▆█████
train_loss,██▇▆▃▁▁▁▁▁
val_accuracy,▁▁▂▃▇▇████
val_loss,██▆▅▁▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.76011
train_loss,2.35467
val_accuracy,0.75217
val_loss,2.39421


[34m[1mwandb[0m: Agent Starting Run: szuqd6oc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1/10, Train Loss: 2.2026, Train Accuracy: 0.1307, Val Loss: 1.7082, Val Accuracy: 0.3267
Epoch 2/10, Train Loss: 1.3253, Train Accuracy: 0.4286, Val Loss: 1.3040, Val Accuracy: 0.4367
Epoch 3/10, Train Loss: 2.0982, Train Accuracy: 0.3261, Val Loss: 1.6460, Val Accuracy: 0.4367
Epoch 4/10, Train Loss: 1.4616, Train Accuracy: 0.4279, Val Loss: 1.2409, Val Accuracy: 0.4893
Epoch 5/10, Train Loss: 1.2000, Train Accuracy: 0.5210, Val Loss: 1.2725, Val Accuracy: 0.5313
Epoch 6/10, Train Loss: 1.3234, Train Accuracy: 0.5135, Val Loss: 1.2385, Val Accuracy: 0.5408
Epoch 7/10, Train Loss: 1.2385, Train Accuracy: 0.5476, Val Loss: 1.1727, Val Accuracy: 0.5627
Epoch 8/10, Train Loss: 1.2335, Train Accuracy: 0.5657, Val Loss: 1.1289, Val Accuracy: 0.5927
Epoch 9/10, Train Loss: 1.2154, Train Accuracy: 0.5692, Val Loss: 1.2755, Val Accuracy: 0.5490
Epoch 10/10, Train Loss: 1.3463, Train Accuracy: 0.5340, Val Loss: 1.2475, Val Accuracy: 0.5742


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▄▆▇▇███▇
train_loss,█▂▇▃▁▂▁▁▁▂
val_accuracy,▁▄▄▅▆▇▇█▇█
val_loss,█▃▇▂▃▂▂▁▃▂

0,1
epoch,10.0
train_accuracy,0.53398
train_loss,1.34634
val_accuracy,0.57417
val_loss,1.24749


[34m[1mwandb[0m: Agent Starting Run: g5s2hnf2 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/10, Train Loss: 1.6857, Train Accuracy: 0.3598, Val Loss: 1.1366, Val Accuracy: 0.6228
Epoch 2/10, Train Loss: 0.8858, Train Accuracy: 0.7148, Val Loss: 0.7450, Val Accuracy: 0.7567
Epoch 3/10, Train Loss: 0.6767, Train Accuracy: 0.7745, Val Loss: 0.6289, Val Accuracy: 0.7903
Epoch 4/10, Train Loss: 0.6021, Train Accuracy: 0.7986, Val Loss: 0.5693, Val Accuracy: 0.8083
Epoch 5/10, Train Loss: 0.5559, Train Accuracy: 0.8228, Val Loss: 0.5227, Val Accuracy: 0.8257
Epoch 6/10, Train Loss: 0.5233, Train Accuracy: 0.8367, Val Loss: 0.5313, Val Accuracy: 0.8327
Epoch 7/10, Train Loss: 0.5043, Train Accuracy: 0.8445, Val Loss: 0.5048, Val Accuracy: 0.8425
Epoch 8/10, Train Loss: 0.4913, Train Accuracy: 0.8503, Val Loss: 0.4747, Val Accuracy: 0.8468
Epoch 9/10, Train Loss: 0.4789, Train Accuracy: 0.8537, Val Loss: 0.4837, Val Accuracy: 0.8473
Epoch 10/10, Train Loss: 0.4706, Train Accuracy: 0.8571, Val Loss: 0.5074, Val Accuracy: 0.8417


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▇▇██████
train_loss,█▃▂▂▁▁▁▁▁▁
val_accuracy,▁▅▆▇▇█████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.85706
train_loss,0.47059
val_accuracy,0.84167
val_loss,0.5074


[34m[1mwandb[0m: Agent Starting Run: ysy2e8lt with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1/10, Train Loss: 2.2247, Train Accuracy: 0.0998, Val Loss: 2.3035, Val Accuracy: 0.1003
Epoch 2/10, Train Loss: 2.3049, Train Accuracy: 0.1006, Val Loss: 2.3036, Val Accuracy: 0.1027
Epoch 3/10, Train Loss: 2.3049, Train Accuracy: 0.1000, Val Loss: 2.3035, Val Accuracy: 0.0925
Epoch 4/10, Train Loss: 2.3050, Train Accuracy: 0.0984, Val Loss: 2.3030, Val Accuracy: 0.1027
Epoch 5/10, Train Loss: 2.3048, Train Accuracy: 0.1016, Val Loss: 2.3040, Val Accuracy: 0.1050
Epoch 6/10, Train Loss: 2.3050, Train Accuracy: 0.0991, Val Loss: 2.3035, Val Accuracy: 0.1055
Epoch 7/10, Train Loss: 2.3049, Train Accuracy: 0.0990, Val Loss: 2.3036, Val Accuracy: 0.1008
Epoch 8/10, Train Loss: 2.3050, Train Accuracy: 0.0975, Val Loss: 2.3030, Val Accuracy: 0.1008
Epoch 9/10, Train Loss: 2.3050, Train Accuracy: 0.0971, Val Loss: 2.3035, Val Accuracy: 0.1003
Epoch 10/10, Train Loss: 2.3048, Train Accuracy: 0.1010, Val Loss: 2.3040, Val Accuracy: 0.0942


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▅▇▆▃█▄▄▂▁▇
train_loss,▁█████████
val_accuracy,▅▆▁▆██▅▅▅▂
val_loss,▅▅▄▁█▄▅▁▅█

0,1
epoch,10.0
train_accuracy,0.10104
train_loss,2.30482
val_accuracy,0.09417
val_loss,2.30396


[34m[1mwandb[0m: Agent Starting Run: 2i6mwy7b with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1/10, Train Loss: 0.6936, Train Accuracy: 0.1382, Val Loss: 0.6931, Val Accuracy: 0.1417
Epoch 2/10, Train Loss: 0.6936, Train Accuracy: 0.1382, Val Loss: 0.6931, Val Accuracy: 0.1417
Epoch 3/10, Train Loss: 0.6936, Train Accuracy: 0.1382, Val Loss: 0.6931, Val Accuracy: 0.1417
Epoch 4/10, Train Loss: 0.6936, Train Accuracy: 0.1382, Val Loss: 0.6931, Val Accuracy: 0.1417
Epoch 5/10, Train Loss: 0.6936, Train Accuracy: 0.1382, Val Loss: 0.6931, Val Accuracy: 0.1417
Epoch 6/10, Train Loss: 0.6936, Train Accuracy: 0.1382, Val Loss: 0.6931, Val Accuracy: 0.1417
Epoch 7/10, Train Loss: 0.6936, Train Accuracy: 0.1382, Val Loss: 0.6931, Val Accuracy: 0.1417
Epoch 8/10, Train Loss: 0.6936, Train Accuracy: 0.1382, Val Loss: 0.6931, Val Accuracy: 0.1417
Epoch 9/10, Train Loss: 0.6936, Train Accuracy: 0.1382, Val Loss: 0.6931, Val Accuracy: 0.1417
Epoch 10/10, Train Loss: 0.6936, Train Accuracy: 0.1382, Val Loss: 0.6931, Val Accuracy: 0.1417


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▁▁▁▁▁▁▁▁▁
train_loss,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.13824
train_loss,0.69356
val_accuracy,0.14167
val_loss,0.69315


[34m[1mwandb[0m: Agent Starting Run: t9zeem8h with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1/5, Train Loss: 0.8237, Train Accuracy: 0.0987, Val Loss: 0.9454, Val Accuracy: 0.0942
Epoch 2/5, Train Loss: 1.0531, Train Accuracy: 0.0996, Val Loss: 1.1514, Val Accuracy: 0.0942
Epoch 3/5, Train Loss: 1.2416, Train Accuracy: 0.1006, Val Loss: 1.3258, Val Accuracy: 0.0942
Epoch 4/5, Train Loss: 1.4092, Train Accuracy: 0.1006, Val Loss: 1.4887, Val Accuracy: 0.0942
Epoch 5/5, Train Loss: 1.5693, Train Accuracy: 0.1006, Val Loss: 1.6450, Val Accuracy: 0.0942


0,1
epoch,▁▃▅▆█
train_accuracy,▁▄███
train_loss,▁▃▅▆█
val_accuracy,▁▁▁▁▁
val_loss,▁▃▅▆█

0,1
epoch,5.0
train_accuracy,0.10065
train_loss,1.56932
val_accuracy,0.09417
val_loss,1.64496


[34m[1mwandb[0m: Agent Starting Run: 9rao6cch with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/10, Train Loss: 0.8584, Train Accuracy: 0.7022, Val Loss: 0.5918, Val Accuracy: 0.7868
Epoch 2/10, Train Loss: 0.5558, Train Accuracy: 0.8157, Val Loss: 0.5226, Val Accuracy: 0.8258
Epoch 3/10, Train Loss: 0.5165, Train Accuracy: 0.8331, Val Loss: 0.5008, Val Accuracy: 0.8365
Epoch 4/10, Train Loss: 0.4972, Train Accuracy: 0.8427, Val Loss: 0.4882, Val Accuracy: 0.8433
Epoch 5/10, Train Loss: 0.4845, Train Accuracy: 0.8473, Val Loss: 0.4696, Val Accuracy: 0.8462
Epoch 6/10, Train Loss: 0.4747, Train Accuracy: 0.8519, Val Loss: 0.4583, Val Accuracy: 0.8465
Epoch 7/10, Train Loss: 0.4638, Train Accuracy: 0.8556, Val Loss: 0.4665, Val Accuracy: 0.8503
Epoch 8/10, Train Loss: 0.4565, Train Accuracy: 0.8584, Val Loss: 0.4597, Val Accuracy: 0.8495
Epoch 9/10, Train Loss: 0.4484, Train Accuracy: 0.8610, Val Loss: 0.4559, Val Accuracy: 0.8560
Epoch 10/10, Train Loss: 0.4433, Train Accuracy: 0.8639, Val Loss: 0.4462, Val Accuracy: 0.8567


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▇▇▇▇████
train_loss,█▃▂▂▂▂▁▁▁▁
val_accuracy,▁▅▆▇▇▇▇▇██
val_loss,█▅▄▃▂▂▂▂▁▁

0,1
epoch,10.0
train_accuracy,0.86385
train_loss,0.44325
val_accuracy,0.85667
val_loss,0.44617


[34m[1mwandb[0m: Agent Starting Run: tywmcyy4 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/10, Train Loss: 2.7023, Train Accuracy: 0.4480, Val Loss: 2.8947, Val Accuracy: 0.6653
Epoch 2/10, Train Loss: 2.6767, Train Accuracy: 0.6878, Val Loss: 2.5356, Val Accuracy: 0.6902
Epoch 3/10, Train Loss: 1.8328, Train Accuracy: 0.7461, Val Loss: 1.6741, Val Accuracy: 0.7650
Epoch 4/10, Train Loss: 1.4944, Train Accuracy: 0.7792, Val Loss: 1.5522, Val Accuracy: 0.7753
Epoch 5/10, Train Loss: 1.4013, Train Accuracy: 0.7860, Val Loss: 1.3719, Val Accuracy: 0.7798
Epoch 6/10, Train Loss: 1.3208, Train Accuracy: 0.7920, Val Loss: 1.3270, Val Accuracy: 0.7860
Epoch 7/10, Train Loss: 1.2193, Train Accuracy: 0.7957, Val Loss: 1.2117, Val Accuracy: 0.7892
Epoch 8/10, Train Loss: 0.9734, Train Accuracy: 0.8061, Val Loss: 0.5560, Val Accuracy: 0.8462
Epoch 9/10, Train Loss: 0.4697, Train Accuracy: 0.8558, Val Loss: 0.4750, Val Accuracy: 0.8535
Epoch 10/10, Train Loss: 0.4412, Train Accuracy: 0.8611, Val Loss: 0.4300, Val Accuracy: 0.8608


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▇▇▇▇▇██
train_loss,██▅▄▄▄▃▃▁▁
val_accuracy,▁▂▅▅▅▅▅▇██
val_loss,█▇▅▄▄▄▃▁▁▁

0,1
epoch,10.0
train_accuracy,0.86109
train_loss,0.44116
val_accuracy,0.86083
val_loss,0.42997


[34m[1mwandb[0m: Agent Starting Run: bcrlz37c with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/10, Train Loss: 1.8288, Train Accuracy: 0.4462, Val Loss: 0.7728, Val Accuracy: 0.7248
Epoch 2/10, Train Loss: 0.6548, Train Accuracy: 0.7821, Val Loss: 0.4965, Val Accuracy: 0.8198
Epoch 3/10, Train Loss: 0.5551, Train Accuracy: 0.8241, Val Loss: 0.5604, Val Accuracy: 0.8218
Epoch 4/10, Train Loss: 0.5059, Train Accuracy: 0.8380, Val Loss: 0.5087, Val Accuracy: 0.8387
Epoch 5/10, Train Loss: 0.4793, Train Accuracy: 0.8488, Val Loss: 0.4802, Val Accuracy: 0.8503
Epoch 6/10, Train Loss: 0.4520, Train Accuracy: 0.8571, Val Loss: 0.4369, Val Accuracy: 0.8578
Epoch 7/10, Train Loss: 0.4338, Train Accuracy: 0.8635, Val Loss: 0.4352, Val Accuracy: 0.8585
Epoch 8/10, Train Loss: 0.4184, Train Accuracy: 0.8676, Val Loss: 0.4632, Val Accuracy: 0.8440
Epoch 9/10, Train Loss: 0.4045, Train Accuracy: 0.8730, Val Loss: 0.4010, Val Accuracy: 0.8643
Epoch 10/10, Train Loss: 0.3959, Train Accuracy: 0.8754, Val Loss: 0.4074, Val Accuracy: 0.8635


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▇▇██████
train_loss,█▂▂▂▁▁▁▁▁▁
val_accuracy,▁▆▆▇▇██▇██
val_loss,█▃▄▃▂▂▂▂▁▁

0,1
epoch,10.0
train_accuracy,0.87535
train_loss,0.39592
val_accuracy,0.8635
val_loss,0.40744


[34m[1mwandb[0m: Agent Starting Run: 0z8v8dcv with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1/5, Train Loss: 0.8248, Train Accuracy: 0.0995, Val Loss: 0.9513, Val Accuracy: 0.0925
Epoch 2/5, Train Loss: 1.0711, Train Accuracy: 0.1008, Val Loss: 1.1916, Val Accuracy: 0.0925
Epoch 3/5, Train Loss: 1.3181, Train Accuracy: 0.1007, Val Loss: 1.4470, Val Accuracy: 0.0925
Epoch 4/5, Train Loss: 1.5703, Train Accuracy: 0.1002, Val Loss: 1.6866, Val Accuracy: 0.0925
Epoch 5/5, Train Loss: 1.7834, Train Accuracy: 0.1008, Val Loss: 1.8703, Val Accuracy: 0.0925


0,1
epoch,▁▃▅▆█
train_accuracy,▁█▇▅█
train_loss,▁▃▅▆█
val_accuracy,▁▁▁▁▁
val_loss,▁▃▅▇█

0,1
epoch,5.0
train_accuracy,0.10083
train_loss,1.78339
val_accuracy,0.0925
val_loss,1.87034


[34m[1mwandb[0m: Agent Starting Run: ab5y7d4f with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/5, Train Loss: 2.2285, Train Accuracy: 0.0988, Val Loss: 2.3035, Val Accuracy: 0.0985
Epoch 2/5, Train Loss: 2.3048, Train Accuracy: 0.0990, Val Loss: 2.3035, Val Accuracy: 0.1027
Epoch 3/5, Train Loss: 2.3049, Train Accuracy: 0.0983, Val Loss: 2.3037, Val Accuracy: 0.0985
Epoch 4/5, Train Loss: 2.3048, Train Accuracy: 0.0983, Val Loss: 2.3035, Val Accuracy: 0.0925
Epoch 5/5, Train Loss: 2.3049, Train Accuracy: 0.0992, Val Loss: 2.3033, Val Accuracy: 0.1055


0,1
epoch,▁▃▅▆█
train_accuracy,▅▇▁▁█
train_loss,▁████
val_accuracy,▄▆▄▁█
val_loss,▅▄█▅▁

0,1
epoch,5.0
train_accuracy,0.0992
train_loss,2.30485
val_accuracy,0.1055
val_loss,2.30332


[34m[1mwandb[0m: Agent Starting Run: tamhatpx with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/5, Train Loss: 0.8050, Train Accuracy: 0.7199, Val Loss: 0.5479, Val Accuracy: 0.8138
Epoch 2/5, Train Loss: 0.5148, Train Accuracy: 0.8273, Val Loss: 0.4758, Val Accuracy: 0.8368
Epoch 3/5, Train Loss: 0.4670, Train Accuracy: 0.8434, Val Loss: 0.4564, Val Accuracy: 0.8447
Epoch 4/5, Train Loss: 0.4438, Train Accuracy: 0.8520, Val Loss: 0.4287, Val Accuracy: 0.8462
Epoch 5/5, Train Loss: 0.4285, Train Accuracy: 0.8579, Val Loss: 0.4239, Val Accuracy: 0.8505


0,1
epoch,▁▃▅▆█
train_accuracy,▁▆▇██
train_loss,█▃▂▁▁
val_accuracy,▁▅▇▇█
val_loss,█▄▃▁▁

0,1
epoch,5.0
train_accuracy,0.85785
train_loss,0.42851
val_accuracy,0.8505
val_loss,0.42392


[34m[1mwandb[0m: Agent Starting Run: lr24wepr with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/5, Train Loss: 0.7504, Train Accuracy: 0.7381, Val Loss: 0.5449, Val Accuracy: 0.8097
Epoch 2/5, Train Loss: 0.5186, Train Accuracy: 0.8229, Val Loss: 0.4671, Val Accuracy: 0.8295
Epoch 3/5, Train Loss: 0.4735, Train Accuracy: 0.8390, Val Loss: 0.4536, Val Accuracy: 0.8413
Epoch 4/5, Train Loss: 0.4504, Train Accuracy: 0.8473, Val Loss: 0.4401, Val Accuracy: 0.8450
Epoch 5/5, Train Loss: 0.4352, Train Accuracy: 0.8531, Val Loss: 0.4298, Val Accuracy: 0.8502


0,1
epoch,▁▃▅▆█
train_accuracy,▁▆▇██
train_loss,█▃▂▁▁
val_accuracy,▁▄▆▇█
val_loss,█▃▂▂▁

0,1
epoch,5.0
train_accuracy,0.85309
train_loss,0.43523
val_accuracy,0.85017
val_loss,0.42982


[34m[1mwandb[0m: Agent Starting Run: 9ke2awfh with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1/10, Train Loss: 2.2579, Train Accuracy: 0.1009, Val Loss: 2.3053, Val Accuracy: 0.0925
Epoch 2/10, Train Loss: 2.3057, Train Accuracy: 0.1009, Val Loss: 2.3053, Val Accuracy: 0.1003
Epoch 3/10, Train Loss: 2.3058, Train Accuracy: 0.0982, Val Loss: 2.3040, Val Accuracy: 0.1050
Epoch 4/10, Train Loss: 2.3058, Train Accuracy: 0.1007, Val Loss: 2.3044, Val Accuracy: 0.0973
Epoch 5/10, Train Loss: 2.3057, Train Accuracy: 0.1002, Val Loss: 2.3038, Val Accuracy: 0.1032
Epoch 6/10, Train Loss: 2.3058, Train Accuracy: 0.1000, Val Loss: 2.3047, Val Accuracy: 0.0925
Epoch 7/10, Train Loss: 2.3058, Train Accuracy: 0.0988, Val Loss: 2.3049, Val Accuracy: 0.0925
Epoch 8/10, Train Loss: 2.3059, Train Accuracy: 0.0982, Val Loss: 2.3041, Val Accuracy: 0.1055
Epoch 9/10, Train Loss: 2.3057, Train Accuracy: 0.0980, Val Loss: 2.3052, Val Accuracy: 0.0942
Epoch 10/10, Train Loss: 2.3056, Train Accuracy: 0.1010, Val Loss: 2.3060, Val Accuracy: 0.1008


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,██▁▇▆▆▃▁▁█
train_loss,▁█████████
val_accuracy,▁▅█▄▇▁▁█▂▅
val_loss,▆▆▂▃▁▄▅▂▆█

0,1
epoch,10.0
train_accuracy,0.10098
train_loss,2.30561
val_accuracy,0.10083
val_loss,2.30597


[34m[1mwandb[0m: Agent Starting Run: g0rg8tqp with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1/10, Train Loss: 4.2955, Train Accuracy: 0.1019, Val Loss: 5.8954, Val Accuracy: 0.0973
Epoch 2/10, Train Loss: 6.5644, Train Accuracy: 0.1279, Val Loss: 7.2184, Val Accuracy: 0.1122
Epoch 3/10, Train Loss: 7.4028, Train Accuracy: 0.2098, Val Loss: 7.9764, Val Accuracy: 0.2153
Epoch 4/10, Train Loss: 7.8283, Train Accuracy: 0.2661, Val Loss: 7.9811, Val Accuracy: 0.2822
Epoch 5/10, Train Loss: 7.7104, Train Accuracy: 0.3603, Val Loss: 7.9413, Val Accuracy: 0.4372
Epoch 6/10, Train Loss: 7.7409, Train Accuracy: 0.4619, Val Loss: 8.0213, Val Accuracy: 0.4688
Epoch 7/10, Train Loss: 7.8120, Train Accuracy: 0.4815, Val Loss: 8.0732, Val Accuracy: 0.4798
Epoch 8/10, Train Loss: 7.8431, Train Accuracy: 0.4885, Val Loss: 8.0716, Val Accuracy: 0.4855
Epoch 9/10, Train Loss: 7.8477, Train Accuracy: 0.4921, Val Loss: 8.0704, Val Accuracy: 0.4893
Epoch 10/10, Train Loss: 7.8407, Train Accuracy: 0.4954, Val Loss: 8.0571, Val Accuracy: 0.4928


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▁▃▄▆▇████
train_loss,▁▅▇███████
val_accuracy,▁▁▃▄▇█████
val_loss,▁▅████████

0,1
epoch,10.0
train_accuracy,0.49544
train_loss,7.84074
val_accuracy,0.49283
val_loss,8.05712


[34m[1mwandb[0m: Agent Starting Run: ba2lt5my with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/10, Train Loss: 1.5865, Train Accuracy: 0.3056, Val Loss: 1.4053, Val Accuracy: 0.4418
Epoch 2/10, Train Loss: 1.1374, Train Accuracy: 0.5966, Val Loss: 0.9395, Val Accuracy: 0.6755
Epoch 3/10, Train Loss: 0.8494, Train Accuracy: 0.6960, Val Loss: 0.7809, Val Accuracy: 0.7343
Epoch 4/10, Train Loss: 0.7633, Train Accuracy: 0.7314, Val Loss: 0.7261, Val Accuracy: 0.7563
Epoch 5/10, Train Loss: 0.7278, Train Accuracy: 0.7493, Val Loss: 0.7244, Val Accuracy: 0.7545
Epoch 6/10, Train Loss: 0.7219, Train Accuracy: 0.7537, Val Loss: 0.7264, Val Accuracy: 0.7545
Epoch 7/10, Train Loss: 0.7229, Train Accuracy: 0.7544, Val Loss: 0.7097, Val Accuracy: 0.7610
Epoch 8/10, Train Loss: 0.7131, Train Accuracy: 0.7534, Val Loss: 0.6933, Val Accuracy: 0.7652
Epoch 9/10, Train Loss: 0.6964, Train Accuracy: 0.7601, Val Loss: 0.6896, Val Accuracy: 0.7662
Epoch 10/10, Train Loss: 0.6908, Train Accuracy: 0.7666, Val Loss: 0.6884, Val Accuracy: 0.7752


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▇▇██████
train_loss,█▄▂▂▁▁▁▁▁▁
val_accuracy,▁▆▇███████
val_loss,█▃▂▁▁▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.76657
train_loss,0.69081
val_accuracy,0.77517
val_loss,0.68844


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zlo99bhw with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/5, Train Loss: 0.6587, Train Accuracy: 0.1062, Val Loss: 0.6575, Val Accuracy: 0.1103
Epoch 2/5, Train Loss: 0.6587, Train Accuracy: 0.1062, Val Loss: 0.6575, Val Accuracy: 0.1103
Epoch 3/5, Train Loss: 0.6587, Train Accuracy: 0.1062, Val Loss: 0.6575, Val Accuracy: 0.1103
Epoch 4/5, Train Loss: 0.6587, Train Accuracy: 0.1062, Val Loss: 0.6575, Val Accuracy: 0.1103
Epoch 5/5, Train Loss: 0.6587, Train Accuracy: 0.1062, Val Loss: 0.6575, Val Accuracy: 0.1103


0,1
epoch,▁▃▅▆█
train_accuracy,▁▁▁▁▁
train_loss,▂▂▁▆█
val_accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
epoch,5.0
train_accuracy,0.10622
train_loss,0.65874
val_accuracy,0.11033
val_loss,0.65752


[34m[1mwandb[0m: Agent Starting Run: 3vu6pza9 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1/5, Train Loss: 1.6266, Train Accuracy: 0.0986, Val Loss: 2.0157, Val Accuracy: 0.0985
Epoch 2/5, Train Loss: 2.1289, Train Accuracy: 0.0987, Val Loss: 2.2037, Val Accuracy: 0.0942
Epoch 3/5, Train Loss: 2.2389, Train Accuracy: 0.0987, Val Loss: 2.2643, Val Accuracy: 0.0942
Epoch 4/5, Train Loss: 2.2774, Train Accuracy: 0.1000, Val Loss: 2.2873, Val Accuracy: 0.0925
Epoch 5/5, Train Loss: 2.2924, Train Accuracy: 0.1008, Val Loss: 2.2965, Val Accuracy: 0.0925


0,1
epoch,▁▃▅▆█
train_accuracy,▁▁▁▅█
train_loss,▁▆▇██
val_accuracy,█▃▃▁▁
val_loss,▁▆▇██

0,1
epoch,5.0
train_accuracy,0.10081
train_loss,2.2924
val_accuracy,0.0925
val_loss,2.29646
