Cifar10

Download data

In [None]:
import nbformat
import wandb
import numpy as np
from src import (
    FFNN,
    layer,
    activation_functions,
    initializers,
    loss_functions,
    optimizers,
    batchnorm,
    dropout,
    preprocess,
    sweeping,   
    training,  
    wandblogger
)

In [None]:
from torchvision.datasets import CIFAR10
cifar10_trainset = CIFAR10("./data/cifar10/", train=True, download=True)
cifar10_testset = CIFAR10("./data/cifar10/", train=False, download=True)

Split data into train, validation and test sets

In [None]:


validation_percent = 0.2

scaler = preprocess.ScaleToUnit()
one_hot_encoder = preprocess.OneHotEncoder()


train_size = int((1 - validation_percent) * len(cifar10_trainset))
validation_size = len(cifar10_trainset) - train_size

x_train = cifar10_trainset.data[:train_size].reshape(train_size, -1)
x_train = scaler.transform(x_train)

targets_train = np.array(cifar10_trainset.targets[:train_size])
y_train = one_hot_encoder.transform(targets_train)


x_valid = cifar10_trainset.data[train_size:].reshape(validation_size, -1)
x_valid = scaler.transform(x_valid)

targets_valid = np.array(cifar10_trainset.targets[train_size:])
y_valid = one_hot_encoder.transform(targets_valid)


x_test = cifar10_testset.data.reshape(len(cifar10_testset), -1)
x_test = scaler.transform(x_test)

targets_test = np.array(cifar10_testset.targets)
y_test = one_hot_encoder.transform(targets_test)

print(type(x_train), type(y_train))
print(x_train.shape, y_train.shape)

print(y_train)

Setup neural network, loss function, optimizer and logger

In [None]:


loss_fn = loss_functions.CrossEntropyLoss()

weight_init = initializers.HeInitializer()
bias_init = initializers.ConstantInitializer(0.0)

optimizer = optimizers.SGD(learning_rate=0.01)

layers = [
    layer.Layer(
        input_size=32 * 32 * 3,
        output_size=128,
        weight_initializer=weight_init,
        bias_initializer=bias_init,
    ),
    batchnorm.BatchNorm(128),
    activation_functions.ReLU(),
    dropout.Dropout(0.5),
    layer.Layer(
        input_size=128,
        output_size=64,
        weight_initializer=weight_init,
        bias_initializer=bias_init,
    ),
    batchnorm.BatchNorm(64),
    activation_functions.ReLU(),
    dropout.Dropout(0.5),
    layer.Layer(
        input_size=64,
        output_size=10,
        weight_initializer=weight_init,
        bias_initializer=bias_init,
    ),
]

model = FFNN.FFNN(layers=layers, loss_function=loss_fn, optimizer=optimizer)

Training the model

In [None]:
"""from src import training, wandblogger

epochs = 10
batch_size = 64

logger = wandblogger.Logger("CIFAR10")
trainer = training.Trainer(model=model, loss_function=loss_fn, optimizer=optimizer, logger=logger)


history = trainer.train(
    x_train,
    y_train,
    x_valid,
    y_valid,
    epochs=epochs,
    batch_size=batch_size,
    shuffle=True)"""

In [None]:
project = "CIFAR10"
entity = "DTU-Deep-Learning-Project"

In [None]:
sweep_config = {
    "method": "random",
    "metric": {"name": "val_loss", "goal": "minimize"},
    "parameters": {
        "optimizer": {"values": ["sgd"]},
        "learning_rate": {"min": 0.0005, "max": 0.1},
        "batch_size": {"values": [32, 64, 128]},
        "epochs": {"value": 20},

        # ðŸ”¥ The important part:
        # Hidden layer configurations can vary
        "layer_sizes": {
            "values": [
                [128],
                [128, 64],
                [256, 128, 64],
                [64, 64, 64, 64]
            ]
        },
        
        "use_batchnorm": {"values": [True, False]},
        "dropout_rate": {"values": [0.0, 0.3, 0.5]},
    },
}

In [None]:
sweep_id = wandb.sweep(sweep_config, entity=entity,
    project=project)

wandb.agent(sweep_id, function=lambda: sweeping.train_sweep(entity, project, config=sweep_config, x_train=x_train, y_train=y_train, x_valid=x_valid, y_valid=y_valid), count=10)