<a href="https://colab.research.google.com/github/manglesh001/DL-assigment1/blob/main/DL_Ass1_Q8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import fashion_mnist
from sklearn.preprocessing import OneHotEncoder
import wandb

In [2]:
# Load the Fashion-MNIST dataset
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:
x_train = x_train.reshape(x_train.shape[0], -1) / 255.0
x_test = x_test.reshape(x_test.shape[0], -1) / 255.0

In [4]:
# One-hot encode the labels
encoder = OneHotEncoder(sparse_output=False)
y_train = encoder.fit_transform(y_train.reshape(-1, 1))
y_test = encoder.transform(y_test.reshape(-1, 1))

In [5]:
# Activation functions and derivatives
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

In [6]:
#tanh activation and its derivative
def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x) ** 2

In [7]:
#relu activation and its derivative

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

In [8]:
# Weight Initialization
def initialize_weights(layers, method="random"):
    weights = []
    biases = []
    for i in range(len(layers) - 1):
        if method == "xavier":
            weights.append(np.random.randn(layers[i], layers[i+1]) * np.sqrt(1 / layers[i]))
        else:  # random
            weights.append(np.random.randn(layers[i], layers[i+1]) * 0.01)
        biases.append(np.zeros((1, layers[i+1])))
    return weights, biases

In [9]:
# Forward Propagation
def forward_propagation(X, weights, biases, activation):
    activations = [X]
    zs = []

    for i in range(len(weights)):
        z = np.dot(activations[-1], weights[i]) + biases[i]
        zs.append(z)

        if activation[i] == "sigmoid":
            activations.append(sigmoid(z))
        elif activation[i] == "relu":
            activations.append(relu(z))
        elif activation[i] == "tanh":
            activations.append(tanh(z))

    return activations, zs

In [10]:
# Backpropagation
def backpropagation(y, activations, zs, weights, activation):
    gradients_w = [None] * len(weights)
    gradients_b = [None] * len(weights)

    # Output layer error
    error = activations[-1] - y

    for i in reversed(range(len(weights))):
        if activation[i] == "sigmoid":
            delta = error * sigmoid_derivative(activations[i+1])
        elif activation[i] == "relu":
            delta = error * relu_derivative(activations[i+1])
        elif activation[i] == "tanh":
            delta = error * tanh_derivative(activations[i+1])

        gradients_w[i] = np.dot(activations[i].T, delta)
        gradients_b[i] = np.sum(delta, axis=0, keepdims=True)

        error = np.dot(delta, weights[i].T)

    return gradients_w, gradients_b

In [11]:
#Optimizer Momentum
def momentum(weights, biases, gradients_w, gradients_b, lr, velocity, beta=0.9):
    for i in range(len(weights)):
        velocity[i] = beta * velocity[i] + (1 - beta) * gradients_w[i]
        weights[i] -= lr * velocity[i]
        biases[i] -= lr * gradients_b[i]
    return weights, biases, velocity

In [12]:
#Adam Optimizer
def adam(weights, biases, gradients_w, gradients_b, lr, m_w, v_w, m_b, v_b, beta1=0.9, beta2=0.999, epsilon=1e-8, t=1):
    for i in range(len(weights)):
        # Update for weights
        m_w[i] = beta1 * m_w[i] + (1 - beta1) * gradients_w[i]
        v_w[i] = beta2 * v_w[i] + (1 - beta2) * (gradients_w[i] ** 2)
        m_hat_w = m_w[i] / (1 - beta1 ** t)
        v_hat_w = v_w[i] / (1 - beta2 ** t)
        weights[i] -= lr * m_hat_w / (np.sqrt(v_hat_w) + epsilon)

        # Update for biases
        m_b[i] = beta1 * m_b[i] + (1 - beta1) * gradients_b[i]
        v_b[i] = beta2 * v_b[i] + (1 - beta2) * (gradients_b[i] ** 2)
        m_hat_b = m_b[i] / (1 - beta1 ** t)
        v_hat_b = v_b[i] / (1 - beta2 ** t)
        biases[i] -= lr * m_hat_b / (np.sqrt(v_hat_b) + epsilon)

    return weights, biases, m_w, v_w, m_b, v_b

In [13]:
#optimizer Nesterov
def nesterov(weights, biases, gradients_w, gradients_b, lr, velocity, beta=0.9):
    for i in range(len(weights)):
        temp_velocity = beta * velocity[i]
        weights[i] -= lr * (beta * temp_velocity + (1 - beta) * gradients_w[i])
        biases[i] -= lr * gradients_b[i]
        velocity[i] = temp_velocity + (1 - beta) * gradients_w[i]
    return weights, biases, velocity

In [14]:
#RMSProp Otimizer
def rmsprop(weights, biases, gradients_w, gradients_b, lr, cache_w, cache_b, beta=0.99, epsilon=1e-8):
    for i in range(len(weights)):
        # Update cache for weights
        cache_w[i] = beta * cache_w[i] + (1 - beta) * (gradients_w[i] ** 2)
        weights[i] -= lr * gradients_w[i] / (np.sqrt(cache_w[i]) + epsilon)

        # Update cache for biases
        cache_b[i] = beta * cache_b[i] + (1 - beta) * (gradients_b[i] ** 2)
        biases[i] -= lr * gradients_b[i] / (np.sqrt(cache_b[i]) + epsilon)

    return weights, biases, cache_w, cache_b


In [15]:
#Nadam Optimizer

# def nadam(weights, biases, gradients_w, gradients_b, lr, m, v, beta1=0.9, beta2=0.999, epsilon=1e-8, t=1):
#     for i in range(len(weights)):
#         m[i] = beta1 * m[i] + (1 - beta1) * gradients_w[i]
#         v[i] = beta2 * v[i] + (1 - beta2) * (gradients_w[i] ** 2)
#         m_hat = (beta1 * m[i] + (1 - beta1) * gradients_w[i]) / (1 - beta1 ** t)
#         v_hat = v[i] / (1 - beta2 ** t)
#         weights[i] -= lr * m_hat / (np.sqrt(v_hat) + epsilon)
#         biases[i] -= lr * gradients_b[i] / (np.sqrt(v_hat) + epsilon)
#     return weights, biases, m, v

In [16]:
# Optimizers SGD
def sgd(weights, biases, gradients_w, gradients_b, lr):
    for i in range(len(weights)):
        weights[i] -= lr * gradients_w[i]
        biases[i] -= lr * gradients_b[i]
    return weights, biases

In [17]:
#Load Fashion-MNIST dataset
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], -1) / 255.0
X_test = X_test.reshape(X_test.shape[0], -1) / 255.0

y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]

In [18]:

def train_network(X_train, y_train, X_val, y_val, config):
    np.random.seed(42)
    layers = [X_train.shape[1]] + [config['hidden_size']] * config['hidden_layers'] + [10]
    activation = [config['activation']] * config['hidden_layers'] + ['sigmoid']

    weights, biases = initialize_weights(layers, config['weight_init'])
    optimizer = config['optimizer']

    velocity = [np.zeros_like(w) for w in weights]
    cache_w = [np.zeros_like(w) for w in weights]
    cache_b = [np.zeros_like(b) for b in biases]
    m_w = [np.zeros_like(w) for w in weights]  # For weights
    v_w = [np.zeros_like(w) for w in weights]  # For weights
    m_b = [np.zeros_like(b) for b in biases]   # For biases
    v_b = [np.zeros_like(b) for b in biases]   # For biases

    batch_size = config['batch_size']
    epochs = config['epochs']
    lr = config['learning_rate']

    for epoch in range(epochs):
        indices = np.random.permutation(X_train.shape[0])
        X_train_shuffled, y_train_shuffled = X_train[indices], y_train[indices]

        train_loss = 0
        train_correct = 0
        train_total = 0

        for i in range(0, X_train_shuffled.shape[0], batch_size):
            X_batch = X_train_shuffled[i:i+batch_size]
            y_batch = y_train_shuffled[i:i+batch_size]

            # Forward propagation
            activations, zs = forward_propagation(X_batch, weights, biases, activation)

            # Calculate training loss (cross-entropy loss)
            output = activations[-1]

            #train_loss += -np.sum(y_batch * np.log(output + 1e-8)) / len(y_batch)
            #calculate mean squared error
            train_loss += np.mean(np.square(activations[-1] - y_batch))

            # Calculate training accuracy
            train_preds = np.argmax(output, axis=1)
            train_true = np.argmax(y_batch, axis=1)
            train_correct += np.sum(train_preds == train_true)
            train_total += len(y_batch)

            # Backpropagation
            gradients_w, gradients_b = backpropagation(y_batch, activations, zs, weights, activation)

            # Update weights and biases based on optimizer
            if optimizer == 'sgd':
                weights, biases = sgd(weights, biases, gradients_w, gradients_b, lr)
            elif optimizer == 'momentum':
                weights, biases, velocity = momentum(weights, biases, gradients_w, gradients_b, lr, velocity)
            elif optimizer == 'nesterov':
                weights, biases, velocity = nesterov(weights, biases, gradients_w, gradients_b, lr, velocity)
            elif optimizer == 'rmsprop':
                weights, biases, cache_w, cache_b = rmsprop(weights, biases, gradients_w, gradients_b, lr, cache_w, cache_b)
            elif optimizer == 'adam':
                weights, biases, m_w, v_w, m_b, v_b = adam(weights, biases, gradients_w, gradients_b, lr, m_w, v_w, m_b, v_b, t=epoch+1)
            # elif optimizer == 'nadam':
            #     pass

        # Calculate average training loss and accuracy for the epoch
        train_loss /= (X_train_shuffled.shape[0] // batch_size)
        train_accuracy = train_correct / train_total

        # Validate model
        val_activations, _ = forward_propagation(X_val, weights, biases, activation)
        val_loss = -np.sum(y_val * np.log(val_activations[-1] + 1e-8)) / len(y_val)
        val_accuracy = np.mean(np.argmax(val_activations[-1], axis=1) == np.argmax(y_val, axis=1))

        # Log metrics to wandb
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_accuracy": train_accuracy,
            "val_loss": val_loss,
            "val_accuracy": val_accuracy
        })

        # Print metrics
        print(f"Epoch {epoch + 1}/{epochs}, "
              f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, "
              f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")

    return

In [19]:
# Split training data into train and validation
X_train, X_val = X_train[:54000], X_train[54000:]
y_train, y_val = y_train[:54000], y_train[54000:]

In [20]:
# Hyperparameter tuning with wandb
sweep_config = {
    'method': 'random',
    'metric': {'name': 'val_accuracy', 'goal': 'maximize'},
    'parameters': {
        'epochs': {'values': [5, 10]},
        'hidden_layers': {'values': [3, 4, 5]},
        'hidden_size': {'values': [32, 64, 128]},
        'weight_decay': {'values': [0, 0.0005, 0.5]},
        'learning_rate': {'values': [1e-3, 1e-4]},
        'optimizer': {'values': ['sgd', 'momentum', 'nesterov', 'rmsprop', 'adam', 'nadam']},
        'batch_size': {'values': [16, 32, 64]},
        'weight_init': {'values': ['random', 'xavier']},
        'activation': {'values': ['sigmoid', 'tanh', 'relu']}
    }
}

In [21]:
#wandb initize and sweep rename  call train
def train():
    wandb.init()

    sweep_name = (
        f"hl_{wandb.config.hidden_layers}_"
        f"hs_{wandb.config.hidden_size}_"
        f"bs_{wandb.config.batch_size}_"
        f"ac_{wandb.config.activation}_"
        f"opt_{wandb.config.optimizer}_"
        f"lr_{wandb.config.learning_rate}_"
        f"init_{wandb.config.weight_init}"
    )
    wandb.run.name = sweep_name

    config = wandb.config
    train_network(X_train, y_train, X_val, y_val, config)

In [22]:
sweep_id = wandb.sweep(sweep_config, project="fashion-mnist-mse")
wandb.agent(sweep_id, function=train, count=20)
wandb.finish()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: a9cqlvc6
Sweep URL: https://wandb.ai/mangleshpatidar2233-iit-madras-alumni-association/fashion-mnist-mse/sweeps/a9cqlvc6


[34m[1mwandb[0m: Agent Starting Run: hru8mg0q with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random
[34m[1mwandb[0m: Currently logged in as: [33mmangleshpatidar2233[0m ([33mmangleshpatidar2233-iit-madras-alumni-association[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/10, Train Loss: 0.0911, Train Accuracy: 0.1009, Val Loss: 2.3052, Val Accuracy: 0.0925
Epoch 2/10, Train Loss: 0.0900, Train Accuracy: 0.0981, Val Loss: 2.3039, Val Accuracy: 0.1032
Epoch 3/10, Train Loss: 0.0900, Train Accuracy: 0.0993, Val Loss: 2.3046, Val Accuracy: 0.0942
Epoch 4/10, Train Loss: 0.0900, Train Accuracy: 0.0983, Val Loss: 2.3049, Val Accuracy: 0.0925
Epoch 5/10, Train Loss: 0.0900, Train Accuracy: 0.1001, Val Loss: 2.3033, Val Accuracy: 0.1027
Epoch 6/10, Train Loss: 0.0900, Train Accuracy: 0.1006, Val Loss: 2.3041, Val Accuracy: 0.0925
Epoch 7/10, Train Loss: 0.0900, Train Accuracy: 0.0987, Val Loss: 2.3051, Val Accuracy: 0.0942
Epoch 8/10, Train Loss: 0.0900, Train Accuracy: 0.0997, Val Loss: 2.3045, Val Accuracy: 0.1008
Epoch 9/10, Train Loss: 0.0900, Train Accuracy: 0.1004, Val Loss: 2.3036, Val Accuracy: 0.1055
Epoch 10/10, Train Loss: 0.0900, Train Accuracy: 0.0999, Val Loss: 2.3040, Val Accuracy: 0.1003


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,█▁▄▁▆▇▂▅▇▅
train_loss,█▁▁▁▁▁▁▁▁▁
val_accuracy,▁▇▂▁▆▁▂▅█▅
val_loss,█▃▆▇▁▄█▅▂▄

0,1
epoch,10.0
train_accuracy,0.09985
train_loss,0.09001
val_accuracy,0.10033
val_loss,2.304


[34m[1mwandb[0m: Agent Starting Run: 04wyjsub with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1/10, Train Loss: 0.2069, Train Accuracy: 0.0989, Val Loss: 0.9486, Val Accuracy: 0.0925
Epoch 2/10, Train Loss: 0.1514, Train Accuracy: 0.0987, Val Loss: 1.1740, Val Accuracy: 0.0925
Epoch 3/10, Train Loss: 0.1217, Train Accuracy: 0.0995, Val Loss: 1.4015, Val Accuracy: 0.0925
Epoch 4/10, Train Loss: 0.1047, Train Accuracy: 0.0994, Val Loss: 1.6298, Val Accuracy: 0.0925
Epoch 5/10, Train Loss: 0.0962, Train Accuracy: 0.1009, Val Loss: 1.8220, Val Accuracy: 0.0925
Epoch 6/10, Train Loss: 0.0927, Train Accuracy: 0.1005, Val Loss: 1.9643, Val Accuracy: 0.0925
Epoch 7/10, Train Loss: 0.0912, Train Accuracy: 0.1008, Val Loss: 2.0646, Val Accuracy: 0.0925
Epoch 8/10, Train Loss: 0.0906, Train Accuracy: 0.1008, Val Loss: 2.1345, Val Accuracy: 0.0925
Epoch 9/10, Train Loss: 0.0903, Train Accuracy: 0.1008, Val Loss: 2.1834, Val Accuracy: 0.0925
Epoch 10/10, Train Loss: 0.0902, Train Accuracy: 0.1008, Val Loss: 2.2178, Val Accuracy: 0.0925


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▁▄▃█▇████
train_loss,█▅▃▂▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▂▃▅▆▇▇███

0,1
epoch,10.0
train_accuracy,0.10083
train_loss,0.09022
val_accuracy,0.0925
val_loss,2.21777


[34m[1mwandb[0m: Agent Starting Run: tvohflih with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1/10, Train Loss: 0.1012, Train Accuracy: 0.1001, Val Loss: 2.2395, Val Accuracy: 0.1003
Epoch 2/10, Train Loss: 0.0901, Train Accuracy: 0.0990, Val Loss: 2.2998, Val Accuracy: 0.1027
Epoch 3/10, Train Loss: 0.0901, Train Accuracy: 0.0998, Val Loss: 2.3030, Val Accuracy: 0.0925
Epoch 4/10, Train Loss: 0.0901, Train Accuracy: 0.0970, Val Loss: 2.3028, Val Accuracy: 0.1008
Epoch 5/10, Train Loss: 0.0901, Train Accuracy: 0.1010, Val Loss: 2.3031, Val Accuracy: 0.0925
Epoch 6/10, Train Loss: 0.0901, Train Accuracy: 0.0987, Val Loss: 2.3029, Val Accuracy: 0.1055
Epoch 7/10, Train Loss: 0.0901, Train Accuracy: 0.0994, Val Loss: 2.3029, Val Accuracy: 0.0985
Epoch 8/10, Train Loss: 0.0901, Train Accuracy: 0.0978, Val Loss: 2.3028, Val Accuracy: 0.1008
Epoch 9/10, Train Loss: 0.0901, Train Accuracy: 0.0974, Val Loss: 2.3030, Val Accuracy: 0.0973
Epoch 10/10, Train Loss: 0.0901, Train Accuracy: 0.1003, Val Loss: 2.3031, Val Accuracy: 0.0973


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▆▅▆▁█▄▅▂▂▇
train_loss,█▁▁▁▁▁▁▁▁▁
val_accuracy,▅▆▁▅▁█▄▅▄▄
val_loss,▁█████████

0,1
epoch,10.0
train_accuracy,0.10026
train_loss,0.09006
val_accuracy,0.09733
val_loss,2.30308


[34m[1mwandb[0m: Agent Starting Run: ztgq4xgx with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/10, Train Loss: 0.2388, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102
Epoch 2/10, Train Loss: 0.2388, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102
Epoch 3/10, Train Loss: 0.2388, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102
Epoch 4/10, Train Loss: 0.2388, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102
Epoch 5/10, Train Loss: 0.2388, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102
Epoch 6/10, Train Loss: 0.2388, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102
Epoch 7/10, Train Loss: 0.2388, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102
Epoch 8/10, Train Loss: 0.2388, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102
Epoch 9/10, Train Loss: 0.2388, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102
Epoch 10/10, Train Loss: 0.2388, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▁▁▁▁▁▁▁▁▁
train_loss,▇▅▁▁▆▃█▁▁▅
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.10352
train_loss,0.23884
val_accuracy,0.11017
val_loss,0.7267


[34m[1mwandb[0m: Agent Starting Run: 06bel8rs with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: random


Epoch 1/5, Train Loss: 0.2069, Train Accuracy: 0.0984, Val Loss: 0.9454, Val Accuracy: 0.0942
Epoch 2/5, Train Loss: 0.1529, Train Accuracy: 0.0997, Val Loss: 1.1515, Val Accuracy: 0.0942
Epoch 3/5, Train Loss: 0.1262, Train Accuracy: 0.1006, Val Loss: 1.3265, Val Accuracy: 0.0942
Epoch 4/5, Train Loss: 0.1111, Train Accuracy: 0.1006, Val Loss: 1.4905, Val Accuracy: 0.0942
Epoch 5/5, Train Loss: 0.1018, Train Accuracy: 0.1006, Val Loss: 1.6477, Val Accuracy: 0.0942


0,1
epoch,▁▃▅▆█
train_accuracy,▁▅███
train_loss,█▄▃▂▁
val_accuracy,▁▁▁▁▁
val_loss,▁▃▅▆█

0,1
epoch,5.0
train_accuracy,0.10065
train_loss,0.10182
val_accuracy,0.09417
val_loss,1.6477


[34m[1mwandb[0m: Agent Starting Run: h5boukuo with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/10, Train Loss: 0.0914, Train Accuracy: 0.1002, Val Loss: 2.3051, Val Accuracy: 0.0925
Epoch 2/10, Train Loss: 0.0901, Train Accuracy: 0.0986, Val Loss: 2.3034, Val Accuracy: 0.0988
Epoch 3/10, Train Loss: 0.0901, Train Accuracy: 0.1016, Val Loss: 2.3038, Val Accuracy: 0.0942
Epoch 4/10, Train Loss: 0.0901, Train Accuracy: 0.1012, Val Loss: 2.3039, Val Accuracy: 0.0925
Epoch 5/10, Train Loss: 0.0901, Train Accuracy: 0.1039, Val Loss: 2.3018, Val Accuracy: 0.1027
Epoch 6/10, Train Loss: 0.0901, Train Accuracy: 0.1040, Val Loss: 2.3022, Val Accuracy: 0.1050
Epoch 7/10, Train Loss: 0.0900, Train Accuracy: 0.1027, Val Loss: 2.3028, Val Accuracy: 0.0942
Epoch 8/10, Train Loss: 0.0900, Train Accuracy: 0.1049, Val Loss: 2.3021, Val Accuracy: 0.1008
Epoch 9/10, Train Loss: 0.0900, Train Accuracy: 0.1054, Val Loss: 2.3008, Val Accuracy: 0.1055
Epoch 10/10, Train Loss: 0.0900, Train Accuracy: 0.1055, Val Loss: 2.3005, Val Accuracy: 0.1682


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▃▁▄▄▆▆▅▇██
train_loss,█▁▁▁▁▁▁▁▁▁
val_accuracy,▁▂▁▁▂▂▁▂▂█
val_loss,█▅▆▆▃▄▅▃▁▁

0,1
epoch,10.0
train_accuracy,0.10552
train_loss,0.09003
val_accuracy,0.16817
val_loss,2.30053


[34m[1mwandb[0m: Agent Starting Run: l1egowhq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/5, Train Loss: 0.1121, Train Accuracy: 0.1743, Val Loss: 2.8783, Val Accuracy: 0.2313
Epoch 2/5, Train Loss: 0.0906, Train Accuracy: 0.2719, Val Loss: 2.7068, Val Accuracy: 0.2505
Epoch 3/5, Train Loss: 0.0818, Train Accuracy: 0.2800, Val Loss: 2.6822, Val Accuracy: 0.3257
Epoch 4/5, Train Loss: 0.0734, Train Accuracy: 0.4110, Val Loss: 2.4915, Val Accuracy: 0.4575
Epoch 5/5, Train Loss: 0.0671, Train Accuracy: 0.5015, Val Loss: 2.1464, Val Accuracy: 0.5223


0,1
epoch,▁▃▅▆█
train_accuracy,▁▃▃▆█
train_loss,█▅▃▂▁
val_accuracy,▁▁▃▆█
val_loss,█▆▆▄▁

0,1
epoch,5.0
train_accuracy,0.50148
train_loss,0.06714
val_accuracy,0.52233
val_loss,2.14643


[34m[1mwandb[0m: Agent Starting Run: a1cgbxwq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/10, Train Loss: 0.0666, Train Accuracy: 0.4893, Val Loss: 1.5242, Val Accuracy: 0.7003
Epoch 2/10, Train Loss: 0.0388, Train Accuracy: 0.7349, Val Loss: 0.6292, Val Accuracy: 0.7883
Epoch 3/10, Train Loss: 0.0291, Train Accuracy: 0.8059, Val Loss: 0.5516, Val Accuracy: 0.8018
Epoch 4/10, Train Loss: 0.0257, Train Accuracy: 0.8266, Val Loss: 0.5242, Val Accuracy: 0.8285
Epoch 5/10, Train Loss: 0.0240, Train Accuracy: 0.8391, Val Loss: 0.4877, Val Accuracy: 0.8395
Epoch 6/10, Train Loss: 0.0229, Train Accuracy: 0.8464, Val Loss: 0.4746, Val Accuracy: 0.8467
Epoch 7/10, Train Loss: 0.0220, Train Accuracy: 0.8540, Val Loss: 0.4598, Val Accuracy: 0.8500
Epoch 8/10, Train Loss: 0.0213, Train Accuracy: 0.8578, Val Loss: 0.4718, Val Accuracy: 0.8490
Epoch 9/10, Train Loss: 0.0207, Train Accuracy: 0.8617, Val Loss: 0.4250, Val Accuracy: 0.8525
Epoch 10/10, Train Loss: 0.0202, Train Accuracy: 0.8656, Val Loss: 0.4376, Val Accuracy: 0.8557


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▆▇▇██████
train_loss,█▄▂▂▂▁▁▁▁▁
val_accuracy,▁▅▆▇▇█████
val_loss,█▂▂▂▁▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.86559
train_loss,0.02021
val_accuracy,0.85567
val_loss,0.43757


[34m[1mwandb[0m: Agent Starting Run: rt6ozzbl with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/5, Train Loss: 0.0941, Train Accuracy: 0.0994, Val Loss: 2.3009, Val Accuracy: 0.0973
Epoch 2/5, Train Loss: 0.0901, Train Accuracy: 0.1001, Val Loss: 2.3031, Val Accuracy: 0.1003
Epoch 3/5, Train Loss: 0.0901, Train Accuracy: 0.0979, Val Loss: 2.3033, Val Accuracy: 0.0942
Epoch 4/5, Train Loss: 0.0901, Train Accuracy: 0.1001, Val Loss: 2.3033, Val Accuracy: 0.1008
Epoch 5/5, Train Loss: 0.0901, Train Accuracy: 0.0998, Val Loss: 2.3030, Val Accuracy: 0.1008


0,1
epoch,▁▃▅▆█
train_accuracy,▆█▁█▇
train_loss,█▁▁▁▁
val_accuracy,▄▇▁██
val_loss,▁▇██▇

0,1
epoch,5.0
train_accuracy,0.09978
train_loss,0.09011
val_accuracy,0.10083
val_loss,2.30298


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0eg9bx7m with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1/5, Train Loss: 0.2501, Train Accuracy: 0.0924, Val Loss: 0.6931, Val Accuracy: 0.0927
Epoch 2/5, Train Loss: 0.2501, Train Accuracy: 0.0924, Val Loss: 0.6931, Val Accuracy: 0.0927
Epoch 3/5, Train Loss: 0.2501, Train Accuracy: 0.0924, Val Loss: 0.6931, Val Accuracy: 0.0927
Epoch 4/5, Train Loss: 0.2501, Train Accuracy: 0.0924, Val Loss: 0.6931, Val Accuracy: 0.0927
Epoch 5/5, Train Loss: 0.2501, Train Accuracy: 0.0924, Val Loss: 0.6931, Val Accuracy: 0.0927


0,1
epoch,▁▃▅▆█
train_accuracy,▁▁▁▁▁
train_loss,▁▁▁▁▁
val_accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
epoch,5.0
train_accuracy,0.09235
train_loss,0.25015
val_accuracy,0.09267
val_loss,0.69315


[34m[1mwandb[0m: Agent Starting Run: p0jan6jo with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/10, Train Loss: 0.0318, Train Accuracy: 0.7683, Val Loss: 0.4651, Val Accuracy: 0.8307
Epoch 2/10, Train Loss: 0.0211, Train Accuracy: 0.8531, Val Loss: 0.4381, Val Accuracy: 0.8522
Epoch 3/10, Train Loss: 0.0190, Train Accuracy: 0.8697, Val Loss: 0.4397, Val Accuracy: 0.8517
Epoch 4/10, Train Loss: 0.0179, Train Accuracy: 0.8775, Val Loss: 0.3765, Val Accuracy: 0.8692
Epoch 5/10, Train Loss: 0.0168, Train Accuracy: 0.8846, Val Loss: 0.3655, Val Accuracy: 0.8800
Epoch 6/10, Train Loss: 0.0162, Train Accuracy: 0.8883, Val Loss: 0.3588, Val Accuracy: 0.8810
Epoch 7/10, Train Loss: 0.0156, Train Accuracy: 0.8936, Val Loss: 0.3642, Val Accuracy: 0.8832
Epoch 8/10, Train Loss: 0.0151, Train Accuracy: 0.8970, Val Loss: 0.3555, Val Accuracy: 0.8830
Epoch 9/10, Train Loss: 0.0146, Train Accuracy: 0.8998, Val Loss: 0.4188, Val Accuracy: 0.8727
Epoch 10/10, Train Loss: 0.0143, Train Accuracy: 0.9036, Val Loss: 0.3738, Val Accuracy: 0.8767


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▅▆▇▇▇▇███
train_loss,█▄▃▂▂▂▂▁▁▁
val_accuracy,▁▄▄▆████▇▇
val_loss,█▆▆▂▂▁▂▁▅▂

0,1
epoch,10.0
train_accuracy,0.90356
train_loss,0.01426
val_accuracy,0.87667
val_loss,0.37381


[34m[1mwandb[0m: Agent Starting Run: awcgwhiy with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: random


Epoch 1/10, Train Loss: 0.1157, Train Accuracy: 0.0981, Val Loss: 2.1328, Val Accuracy: 0.0942
Epoch 2/10, Train Loss: 0.0901, Train Accuracy: 0.0968, Val Loss: 2.2882, Val Accuracy: 0.0973
Epoch 3/10, Train Loss: 0.0901, Train Accuracy: 0.0987, Val Loss: 2.3016, Val Accuracy: 0.0925
Epoch 4/10, Train Loss: 0.0901, Train Accuracy: 0.0979, Val Loss: 2.3027, Val Accuracy: 0.0925
Epoch 5/10, Train Loss: 0.0901, Train Accuracy: 0.0998, Val Loss: 2.3028, Val Accuracy: 0.1008
Epoch 6/10, Train Loss: 0.0901, Train Accuracy: 0.0996, Val Loss: 2.3029, Val Accuracy: 0.0925
Epoch 7/10, Train Loss: 0.0901, Train Accuracy: 0.0983, Val Loss: 2.3028, Val Accuracy: 0.0925
Epoch 8/10, Train Loss: 0.0901, Train Accuracy: 0.0978, Val Loss: 2.3029, Val Accuracy: 0.0925
Epoch 9/10, Train Loss: 0.0901, Train Accuracy: 0.0997, Val Loss: 2.3028, Val Accuracy: 0.1027
Epoch 10/10, Train Loss: 0.0901, Train Accuracy: 0.0991, Val Loss: 2.3029, Val Accuracy: 0.0925


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▄▁▅▄██▅▃█▆
train_loss,█▁▁▁▁▁▁▁▁▁
val_accuracy,▂▄▁▁▇▁▁▁█▁
val_loss,▁▇████████

0,1
epoch,10.0
train_accuracy,0.09907
train_loss,0.09006
val_accuracy,0.0925
val_loss,2.30287


[34m[1mwandb[0m: Agent Starting Run: 061s6xdl with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/10, Train Loss: 0.1117, Train Accuracy: 0.1754, Val Loss: 2.8846, Val Accuracy: 0.2315
Epoch 2/10, Train Loss: 0.0908, Train Accuracy: 0.2724, Val Loss: 2.7094, Val Accuracy: 0.2480
Epoch 3/10, Train Loss: 0.0819, Train Accuracy: 0.2807, Val Loss: 2.6853, Val Accuracy: 0.3248
Epoch 4/10, Train Loss: 0.0735, Train Accuracy: 0.4110, Val Loss: 2.4923, Val Accuracy: 0.4615
Epoch 5/10, Train Loss: 0.0671, Train Accuracy: 0.5031, Val Loss: 2.1687, Val Accuracy: 0.5282
Epoch 6/10, Train Loss: 0.0603, Train Accuracy: 0.5571, Val Loss: 2.1483, Val Accuracy: 0.5725
Epoch 7/10, Train Loss: 0.0553, Train Accuracy: 0.5821, Val Loss: 2.1217, Val Accuracy: 0.5997
Epoch 8/10, Train Loss: 0.0523, Train Accuracy: 0.6061, Val Loss: 2.0016, Val Accuracy: 0.6157
Epoch 9/10, Train Loss: 0.0497, Train Accuracy: 0.6499, Val Loss: 1.5113, Val Accuracy: 0.7055
Epoch 10/10, Train Loss: 0.0454, Train Accuracy: 0.7141, Val Loss: 1.2900, Val Accuracy: 0.7240


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▂▂▄▅▆▆▇▇█
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▁▂▄▅▆▆▆██
val_loss,█▇▇▆▅▅▅▄▂▁

0,1
epoch,10.0
train_accuracy,0.71411
train_loss,0.04542
val_accuracy,0.724
val_loss,1.29002


[34m[1mwandb[0m: Agent Starting Run: 9ifsa1ln with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/5, Train Loss: 0.2700, Train Accuracy: 0.1000, Val Loss: 0.6660, Val Accuracy: 0.1003
Epoch 2/5, Train Loss: 0.2700, Train Accuracy: 0.1000, Val Loss: 0.6660, Val Accuracy: 0.1003
Epoch 3/5, Train Loss: 0.2700, Train Accuracy: 0.1000, Val Loss: 0.6660, Val Accuracy: 0.1003
Epoch 4/5, Train Loss: 0.2700, Train Accuracy: 0.1000, Val Loss: 0.6660, Val Accuracy: 0.1003
Epoch 5/5, Train Loss: 0.2700, Train Accuracy: 0.1000, Val Loss: 0.6660, Val Accuracy: 0.1003


0,1
epoch,▁▃▅▆█
train_accuracy,▁▁▁▁▁
train_loss,▁▁▁▁▁
val_accuracy,▁▁▁▁▁
val_loss,▁▁▁▁▁

0,1
epoch,5.0
train_accuracy,0.09996
train_loss,0.27
val_accuracy,0.10033
val_loss,0.66602


[34m[1mwandb[0m: Agent Starting Run: iu6960ae with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/10, Train Loss: 0.2386, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102
Epoch 2/10, Train Loss: 0.2386, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102
Epoch 3/10, Train Loss: 0.2386, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102
Epoch 4/10, Train Loss: 0.2386, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102
Epoch 5/10, Train Loss: 0.2386, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102
Epoch 6/10, Train Loss: 0.2386, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102
Epoch 7/10, Train Loss: 0.2386, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102
Epoch 8/10, Train Loss: 0.2386, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102
Epoch 9/10, Train Loss: 0.2386, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102
Epoch 10/10, Train Loss: 0.2386, Train Accuracy: 0.1035, Val Loss: 0.7267, Val Accuracy: 0.1102


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▁▁▁▁▁▁▁▁▁
train_loss,▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.10352
train_loss,0.23855
val_accuracy,0.11017
val_loss,0.7267


[34m[1mwandb[0m: Agent Starting Run: zqimjf61 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: 3
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/5, Train Loss: 0.0936, Train Accuracy: 0.1069, Val Loss: 2.2988, Val Accuracy: 0.0802
Epoch 2/5, Train Loss: 0.0900, Train Accuracy: 0.1330, Val Loss: 2.2948, Val Accuracy: 0.3003
Epoch 3/5, Train Loss: 0.0900, Train Accuracy: 0.1801, Val Loss: 2.2893, Val Accuracy: 0.1685
Epoch 4/5, Train Loss: 0.0899, Train Accuracy: 0.2119, Val Loss: 2.2827, Val Accuracy: 0.2652
Epoch 5/5, Train Loss: 0.0898, Train Accuracy: 0.2186, Val Loss: 2.2740, Val Accuracy: 0.1643


0,1
epoch,▁▃▅▆█
train_accuracy,▁▃▆██
train_loss,█▁▁▁▁
val_accuracy,▁█▄▇▄
val_loss,█▇▅▃▁

0,1
epoch,5.0
train_accuracy,0.21863
train_loss,0.08984
val_accuracy,0.16433
val_loss,2.27399


[34m[1mwandb[0m: Agent Starting Run: 6yarsydn with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0.0005
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/10, Train Loss: 0.2501, Train Accuracy: 0.1176, Val Loss: 0.6963, Val Accuracy: 0.1127
Epoch 2/10, Train Loss: 0.2501, Train Accuracy: 0.1176, Val Loss: 0.6963, Val Accuracy: 0.1127
Epoch 3/10, Train Loss: 0.2501, Train Accuracy: 0.1176, Val Loss: 0.6963, Val Accuracy: 0.1127
Epoch 4/10, Train Loss: 0.2501, Train Accuracy: 0.1176, Val Loss: 0.6963, Val Accuracy: 0.1127
Epoch 5/10, Train Loss: 0.2501, Train Accuracy: 0.1176, Val Loss: 0.6963, Val Accuracy: 0.1127
Epoch 6/10, Train Loss: 0.2501, Train Accuracy: 0.1176, Val Loss: 0.6963, Val Accuracy: 0.1127
Epoch 7/10, Train Loss: 0.2501, Train Accuracy: 0.1176, Val Loss: 0.6963, Val Accuracy: 0.1127
Epoch 8/10, Train Loss: 0.2501, Train Accuracy: 0.1176, Val Loss: 0.6963, Val Accuracy: 0.1127
Epoch 9/10, Train Loss: 0.2501, Train Accuracy: 0.1176, Val Loss: 0.6963, Val Accuracy: 0.1127
Epoch 10/10, Train Loss: 0.2501, Train Accuracy: 0.1176, Val Loss: 0.6963, Val Accuracy: 0.1127


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▁▁▁▁▁▁▁▁▁
train_loss,▅▄▂▄▆▄▃█▄▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,10.0
train_accuracy,0.11757
train_loss,0.25014
val_accuracy,0.11267
val_loss,0.69629


[34m[1mwandb[0m: Agent Starting Run: 3ry9iz3s with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/5, Train Loss: 0.0977, Train Accuracy: 0.2711, Val Loss: 1.8656, Val Accuracy: 0.3130
Epoch 2/5, Train Loss: 0.0803, Train Accuracy: 0.3425, Val Loss: 1.6379, Val Accuracy: 0.3868
Epoch 3/5, Train Loss: 0.0734, Train Accuracy: 0.4326, Val Loss: 1.4040, Val Accuracy: 0.4862
Epoch 4/5, Train Loss: 0.0667, Train Accuracy: 0.5461, Val Loss: 1.1978, Val Accuracy: 0.6018
Epoch 5/5, Train Loss: 0.0604, Train Accuracy: 0.6218, Val Loss: 1.0468, Val Accuracy: 0.6342


0,1
epoch,▁▃▅▆█
train_accuracy,▁▂▄▆█
train_loss,█▅▃▂▁
val_accuracy,▁▃▅▇█
val_loss,█▆▄▂▁

0,1
epoch,5.0
train_accuracy,0.62178
train_loss,0.06045
val_accuracy,0.63417
val_loss,1.04681


[34m[1mwandb[0m: Agent Starting Run: pmscw423 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: momentum
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: xavier


Epoch 1/5, Train Loss: 0.1061, Train Accuracy: 0.0941, Val Loss: 2.0949, Val Accuracy: 0.1008
Epoch 2/5, Train Loss: 0.0902, Train Accuracy: 0.0951, Val Loss: 2.2426, Val Accuracy: 0.0748
Epoch 3/5, Train Loss: 0.0901, Train Accuracy: 0.0964, Val Loss: 2.2832, Val Accuracy: 0.0925
Epoch 4/5, Train Loss: 0.0901, Train Accuracy: 0.0939, Val Loss: 2.2960, Val Accuracy: 0.0918
Epoch 5/5, Train Loss: 0.0901, Train Accuracy: 0.1010, Val Loss: 2.3003, Val Accuracy: 0.0925


0,1
epoch,▁▃▅▆█
train_accuracy,▁▂▃▁█
train_loss,█▁▁▁▁
val_accuracy,█▁▆▆▆
val_loss,▁▆▇██

0,1
epoch,5.0
train_accuracy,0.101
train_loss,0.09005
val_accuracy,0.0925
val_loss,2.30032


[34m[1mwandb[0m: Agent Starting Run: etp0vp95 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimizer: nesterov
[34m[1mwandb[0m: 	weight_decay: 0
[34m[1mwandb[0m: 	weight_init: random


Epoch 1/5, Train Loss: 0.2067, Train Accuracy: 0.0988, Val Loss: 0.9483, Val Accuracy: 0.0925
Epoch 2/5, Train Loss: 0.1515, Train Accuracy: 0.0992, Val Loss: 1.1697, Val Accuracy: 0.0925
Epoch 3/5, Train Loss: 0.1226, Train Accuracy: 0.0996, Val Loss: 1.3838, Val Accuracy: 0.0925
Epoch 4/5, Train Loss: 0.1060, Train Accuracy: 0.0992, Val Loss: 1.5959, Val Accuracy: 0.0925
Epoch 5/5, Train Loss: 0.0973, Train Accuracy: 0.1008, Val Loss: 1.7807, Val Accuracy: 0.0925


0,1
epoch,▁▃▅▆█
train_accuracy,▁▂▄▃█
train_loss,█▄▃▂▁
val_accuracy,▁▁▁▁▁
val_loss,▁▃▅▆█

0,1
epoch,5.0
train_accuracy,0.10083
train_loss,0.0973
val_accuracy,0.0925
val_loss,1.7807
