<a href="https://colab.research.google.com/github/subikkshas/DA6401/blob/main/DLass1q4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
import wandb
from tensorflow.keras.datasets import fashion_mnist
from sklearn.model_selection import train_test_split

wandb.login()

# Load Fashion MNIST dataset
(X_train_and_validation, y_train_and_validation), (X_test, y_test) = fashion_mnist.load_data()

# Split into train and validation
X_train, X_validation, y_train, y_validation = train_test_split(
    X_train_and_validation, y_train_and_validation, test_size=0.1, random_state=42)

# Normalize data
X_train = (X_train / 255.0).astype(np.float32)
X_validation = (X_validation / 255.0).astype(np.float32)
X_test = (X_test / 255.0).astype(np.float32)

# Reshape for input layer
X_train = X_train.reshape(X_train.shape[0], 784, 1)
X_validation = X_validation.reshape(X_validation.shape[0], 784, 1)
X_test = X_test.reshape(X_test.shape[0], 784, 1)

# One-hot encode labels
def one_hot(y, num_classes=10):
    return np.eye(num_classes)[y].T

def layer_init(arr, n1, n2, init_type):
    np.random.seed(10)
    if init_type == "random":
        arr.append(np.random.randn(n1, n2) * 0.1)
    elif init_type == "xavier":
        arr.append(np.random.randn(n1, n2) * np.sqrt(2 / (n1 + n2)))
    return arr

def param(layers, init_type):
    W, B = [], []
    for i in range(len(layers) - 1):
        W = layer_init(W, layers[i + 1], layers[i], init_type)
        B = layer_init(B, layers[i + 1], 1, init_type)
    return W, B

def activation(act_type):
    return {
        'sigmoid': lambda x, deriv=False: x * (1 - x) if deriv else 1 / (1 + np.exp(-x)),
        'tanh': lambda x, deriv=False: 1 - np.tanh(x) ** 2 if deriv else np.tanh(x),
        'ReLU': lambda x, deriv=False: (x > 0) * 1 if deriv else x * (x > 0)
    }[act_type]

def softmax(x):
    exps = np.exp(x - np.max(x))
    return exps / np.sum(exps, axis=0)

def forward(x, W, B, act_type):
    h, a = [x], []
    sigma = activation(act_type)
    for i in range(len(W)):
        a.append(np.dot(W[i], h[-1]) + B[i])
        h.append(sigma(a[-1]) if i < len(W) - 1 else softmax(a[-1]))
    return h[-1], h, a

def loss(y, y_hat, l_type):
    if l_type == 'cross_entropy':
        return -np.sum(y * np.log(y_hat)) / y.shape[1]

def eval_acc(y_hat, y_true):
    return np.mean(np.argmax(y_hat, axis=0) == y_true) * 100

def backprop(x, y, y_hat, a, h, W, B, batch_size, act_type):
    grad_h, grad_a, grad_W, grad_B = [0] * len(h), [0] * len(a), [0] * len(W), [0] * len(B)
    sigma = activation(act_type)
    grad_h[-1] = y_hat - y
    grad_a[-1] = grad_h[-1] * softmax(a[-1]) * (1 - softmax(a[-1]))
    for i in range(len(W) - 1, -1, -1):
        grad_W[i] = np.dot(grad_a[i], h[i].T)
        grad_B[i] = np.sum(grad_a[i], axis=1, keepdims=True)
        if i > 0:
            grad_h[i - 1] = np.dot(W[i].T, grad_a[i])
            grad_a[i - 1] = grad_h[i - 1] * sigma(a[i - 1], deriv=True)
    return grad_W, grad_B

def optimizer_step(optimizer, W, B, grad_W, grad_B, lr):
    if optimizer == 'sgd':
        W = [w - lr * gw for w, gw in zip(W, grad_W)]
        B = [b - lr * gb for b, gb in zip(B, grad_B)]
    return W, B

def train(X_train, y_train, X_val, y_val, layers, init_type, epochs, batch_size, loss_type, act_type, optimizer, lr):
    W, B = param(layers, init_type)
    wandb.init(project="DA6401-Assignment-1")

    for epoch in range(epochs):
        train_loss, train_acc = [], []
        for i in range(0, X_train.shape[0], batch_size):
            x_batch = X_train[i:i + batch_size].reshape(784, -1)
            y_batch = one_hot(y_train[i:i + batch_size])
            y_hat, h, a = forward(x_batch, W, B, act_type)
            grad_W, grad_B = backprop(x_batch, y_batch, y_hat, a, h, W, B, batch_size, act_type)
            W, B = optimizer_step(optimizer, W, B, grad_W, grad_B, lr)
            train_loss.append(loss(y_batch, y_hat, loss_type))
            train_acc.append(eval_acc(y_hat, y_train[i:i + batch_size]))

        val_hat, _, _ = forward(X_val.reshape(784, -1), W, B, act_type)
        val_acc = eval_acc(val_hat, y_val)

        wandb.log({"epoch": epoch, "train_loss": np.mean(train_loss), "train_acc": np.mean(train_acc), "val_acc": val_acc})

    return W, B

def sweep_train():
    wandb.init(project="DA6401-Assignment-1", id="Question-4", config=sweep_config)
    config = wandb.config
    layers = [784] + config.hidden_layers + [10]
    train(X_train, y_train, X_validation, y_validation, layers, 'random', config.epochs, config.batch_size, 'cross_entropy', config.activation_type, config.optimizer, config.learning_rate)

sweep_config = {
    'method': 'random',
    'metric': {'name': 'val_Accuracy', 'goal': 'maximize'},
    'parameters': {
        'epochs': {'values': [5, 10]},
        'hidden_layers': {'values': [[64, 32], [128, 64, 32]]},  # Ensure valid structure
        'learning_rate': {'values': [1e-3, 1e-4]},
        'optimizer': {'values': ['sgd', 'momentum', 'nesterov', 'rmsprop', 'adam', 'nadam']},
        'batch_size': {'values': [16, 32, 64]},
        'init_type': {'values': ['random', 'xavier']},
        'activation_type': {'values': ['sigmoid', 'tanh', 'ReLU']},
        'loss_type': {'values': ['cross_entropy']}
    }
}


sweep_id = wandb.sweep(sweep_config, project="DA6401-Assignment-1")
wandb.agent(sweep_id, function=sweep_train, count=10)


Create sweep with ID: 6v09fkzk
Sweep URL: https://wandb.ai/subikksha-indian-institute-of-technology-madras/DA6401-Assignment-1/sweeps/6v09fkzk


[34m[1mwandb[0m: Agent Starting Run: 4z8svcnc with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer: nesterov


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,9.0
train_acc,9.88333
train_loss,2.34843
val_acc,9.9


[34m[1mwandb[0m: Agent Starting Run: stcafski with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer: nadam


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,9.0
train_acc,10.12593
train_loss,2.33754
val_acc,10.26667


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fjxjzokj with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer: nadam


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,9.0
train_acc,10.09701
train_loss,2.33905
val_acc,10.26667


[34m[1mwandb[0m: Agent Starting Run: hdcukpaa with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer: rmsprop


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,9.0
train_acc,10.19698
train_loss,2.33243
val_acc,9.91667


[34m[1mwandb[0m: Agent Starting Run: mx9lxb8m with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer: adam


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,9.0
train_acc,10.1507
train_loss,2.33662
val_acc,10.53333


[34m[1mwandb[0m: Agent Starting Run: 99xt64x4 with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [128, 64, 32]
[34m[1mwandb[0m: 	init_type: xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer: sgd


  'sigmoid': lambda x, deriv=False: x * (1 - x) if deriv else 1 / (1 + np.exp(-x)),
  'sigmoid': lambda x, deriv=False: x * (1 - x) if deriv else 1 / (1 + np.exp(-x)),
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


0,1
epoch,▁▃▅▆█
train_acc,█▁▁▃▅
train_loss,█▁▁▁
val_acc,█▃▁▃▁

0,1
epoch,4.0
train_acc,9.68787
train_loss,
val_acc,9.95


[34m[1mwandb[0m: Agent Starting Run: q4zp3zf1 with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer: nadam


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,9.0
train_acc,9.96371
train_loss,2.34974
val_acc,9.9


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: d831ifdz with config:
[34m[1mwandb[0m: 	activation_type: ReLU
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer: adam


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▁▁▁▁▁▁▁▁▁
train_loss,▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁▁▁▁▁▁▁▁▁

0,1
epoch,9.0
train_acc,9.96371
train_loss,2.34974
val_acc,9.9


[34m[1mwandb[0m: Agent Starting Run: bw7ekzfn with config:
[34m[1mwandb[0m: 	activation_type: sigmoid
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer: momentum


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,▁▁▁▁▁
val_acc,▁▁▁▁▁

0,1
epoch,4.0
train_acc,10.00629
train_loss,2.32553
val_acc,9.95


[34m[1mwandb[0m: Agent Starting Run: sfomgght with config:
[34m[1mwandb[0m: 	activation_type: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layers: [64, 32]
[34m[1mwandb[0m: 	init_type: random
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_type: cross_entropy
[34m[1mwandb[0m: 	optimizer: rmsprop


0,1
epoch,▁▃▅▆█
train_acc,▁▁▁▁▁
train_loss,▁▁▁▁▁
val_acc,▁▁▁▁▁

0,1
epoch,4.0
train_acc,10.19698
train_loss,2.33243
val_acc,9.91667
