In [None]:
import torchvision
import numpy as np
import matplotlib
import wandb
from src import preprocess, activation_functions, initializers, layer, loss_functions, optimizers, FFNN, training, wandblogger, dropout, batchnorm



In [None]:
train_dataset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=False)
test_dataset = torchvision.datasets.FashionMNIST(root='./data', train=False, download=False)

In [None]:
validation_percent = 0.2
one_hot_encoder = preprocess.OneHotEncoder()


train_size = int((1 - validation_percent) * len(train_dataset))
validation_size = len(train_dataset) - train_size

x_train = train_dataset.data[:train_size].reshape(train_size, -1)
targets_train = np.array(train_dataset.targets[:train_size])
y_train = one_hot_encoder.transform(targets_train)

x_valid = train_dataset.data[train_size:].reshape(validation_size, -1)
targets_valid = np.array(train_dataset.targets[train_size:])
y_valid = one_hot_encoder.transform(targets_valid)

x_test = test_dataset.data.reshape(len(test_dataset), -1)
targets_test = np.array(test_dataset.targets)
y_test = one_hot_encoder.transform(targets_test)

print(type(x_train), type(y_train))
print(x_train.shape, y_train.shape)

In [None]:
#find min and max value of y_train
print("y_train min:", np.min(y_train), "y_train max:", np.max(y_train))

In [None]:


activation_function = activation_functions.ReLU()
identity_activation = activation_functions.Identity()

weight_init = initializers.HeInitializer()
bias_init = initializers.ConstantInitializer(0.0)

loss_fn = loss_functions.CrossEntropyLoss()
optimizer = optimizers.SGD(learning_rate=0.01)

layers = [
    layer.Layer(
        input_size= 28 * 28,
        output_size=128,
        activation_function=activation_functions.Identity(),
        weight_initializer=weight_init,
        bias_initializer=bias_init,
    ),
    batchnorm.BatchNorm(128),
    activation_functions.ReLU(),
    dropout.Dropout(0.5),
    layer.Layer(
        input_size=128,
        output_size=64,
        activation_function=activation_functions.Identity(),
        weight_initializer=weight_init,
        bias_initializer=bias_init,
    ),
    batchnorm.BatchNorm(64),
    activation_functions.ReLU(),
    dropout.Dropout(0.5),
    layer.Layer(
        input_size=64,
        output_size=10,
        activation_function=activation_functions.Identity(),
        weight_initializer=weight_init,
        bias_initializer=bias_init,
    ),
]

model = FFNN.FFNN(layers=layers, loss_function=loss_fn, optimizer=optimizer)

In [None]:
# activation_func = activation_functions.ReLU()
# loss_func = loss_functions.CrossEntropyLoss()
# optimizer = optimizers.SGD()
# W_initializers = initializers.HeInitializer()
# B_initializer = initializers.ConstantInitializer(0)
# input_size = x_train.shape[1]
# hidden_size = 392
# output_size = 10

# Input_layer = layer.Layer(input_size=input_size, output_size=output_size, activation_function=activation_func, weight_initializer=W_initializers, bias_initializer=B_initializer)
# layers = [Input_layer]
# FeedForward = FFNN.FFNN(layers=layers, loss_function=loss_func, optimizer=optimizer)


In [None]:
epochs = 10
batch_size = 32
training.Trainer(model, loss_function=loss_fn, optimizer=optimizer).train(X=x_train, y=y_train, x_val=x_valid, y_val=y_valid, epochs=epochs, batch_size=batch_size, shuffle=True)

In [None]:
def train_sweep():
    wandb.init()        # load sweep config
    cfg = wandb.config  # hyperparameters
    
    # ----- Build the layers dynamically -----
    layers = []

    input_size = 28 * 28
    prev_size = input_size

    for hidden_size in cfg.layer_sizes:     # list of hidden layer sizes
        layers.append(
            layer.Layer(
                input_size=prev_size,
                output_size=hidden_size,
                weight_initializer=initializers.HeInitializer(),
                bias_initializer=initializers.ConstantInitializer(0.0),
            )
        )

        if cfg.use_batchnorm:
            layers.append(batchnorm.BatchNorm(hidden_size))

        layers.append(activation_functions.ReLU())

        if cfg.dropout_rate > 0:
            layers.append(dropout.Dropout(cfg.dropout_rate))

        prev_size = hidden_size

    # Output layer
    layers.append(
        layer.Layer(
            input_size=prev_size,
            output_size=10,
            weight_initializer=initializers.HeInitializer(),
            bias_initializer=initializers.ConstantInitializer(0.0),
        )
    )

    if cfg.optimizer == "sgd":
        optimizer = optimizers.SGD(learning_rate=cfg.learning_rate)
    elif cfg.optimizer == "adam":
        optimizer = optimizers.Adam(learning_rate=cfg.learning_rate)
    
    model = FFNN.FFNN(
        layers=layers,
        loss_function=loss_functions.CrossEntropyLoss(),
        optimizer=optimizer,
    )

    logger = wandblogger.Logger("FashionMnist")
    trainer = training.Trainer(model=model, loss_function=loss_functions.CrossEntropyLoss(), optimizer=optimizer, logger=logger)

    history = trainer.train(
        x_train,
        y_train,x_val=x_valid,
        y_val=y_valid,
        epochs=cfg.epochs,
        batch_size=cfg.batch_size,
        shuffle=True
    )

    wandb.finish()


In [None]:
sweep_config = {
    "method": "random",
    "metric": {"name": "val_loss", "goal": "minimize"},
    "parameters": {
        "optimizer": {"values": ["sgd"]},
        "learning_rate": {"min": 0.0005, "max": 0.1},
        "batch_size": {"values": [32, 64, 128]},
        "epochs": {"value": 20},

        # ðŸ”¥ The important part:
        # Hidden layer configurations can vary
        "layer_sizes": {
            "values": [
                [128],
                [128, 64],
                [256, 128, 64],
                [64, 64, 64, 64]
            ]
        },
        
        "use_batchnorm": {"values": [True, False]},
        "dropout_rate": {"values": [0.0, 0.3, 0.5]},
    },
}

In [None]:
sweep_id = wandb.sweep(sweep_config, project="numpy-ffnn-sweep")

wandb.agent(sweep_id, function=train_sweep, count=10)