Import libraries

In [7]:
from mltrainer import ReportTypes, Trainer, TrainerSettings, metrics, rnn_models
from mltrainer.preprocessors import BasePreprocessor
from pathlib import Path
import torch
from mads_datasets import DatasetFactoryProvider, DatasetType

Setting seeds for isolated testing, but doesnt fix all randomness unfortunately?

In [8]:
import numpy as np
import random

# Set random seeds for reproducibility
seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

Get flowers data into a streamer

In [16]:
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import BasePreprocessor
preprocessor = BasePreprocessor()

fashion = DatasetFactoryProvider.create_factory(DatasetType.FASHION)
batchsize = 64
streamers = fashion.create_datastreamer(batchsize=batchsize, preprocessor=preprocessor)
train = streamers['train']
valid = streamers['valid']

[32m2025-10-28 20:41:38.213[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at C:\Users\tycoh\.cache\mads_datasets\fashionmnist[0m
[32m2025-10-28 20:41:38.216[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m124[0m - [1mFile already exists at C:\Users\tycoh\.cache\mads_datasets\fashionmnist\fashionmnist.pt[0m


In [17]:
trainstreamer = train.stream()
validstreamer = valid.stream()
x, y = next(iter(trainstreamer))
x.shape, y.shape

(torch.Size([64, 1, 28, 28]), torch.Size([64]))

In [18]:
y

tensor([8, 8, 2, 5, 8, 9, 7, 8, 5, 0, 8, 9, 4, 0, 4, 0, 4, 3, 0, 8, 2, 2, 1, 5,
        0, 6, 5, 6, 2, 7, 7, 6, 1, 3, 4, 4, 6, 1, 7, 5, 5, 4, 5, 8, 4, 1, 3, 9,
        3, 0, 6, 5, 6, 5, 6, 0, 3, 4, 0, 5, 2, 9, 8, 2], dtype=torch.uint8)

Create a configurable model that can be hypertuned for the flowers dataset classification

Show you can
1. Make a hypothesis based on the theory (use the book)
1. Design experiments to test your hypothesis
1. Work iterative: eg start with a small experiment to get a direction, then reduce the search space and run a more extensive experiment

For classifying flowers we need a convolutional neural network because images are high dimensional, nearby pixels are statistically related and if pictures shifts a little al pixels values are different but it is still the same picture. By using a convolutional neural network we make sure we can use weight sharing to deal with the high dimensions, the kernel also takes care of nearby related pixels and takes care of recognizing the geomtric transformations. There are multiple architectures to choose from like LeNet, AlexNet (8 layers), VGG (19 layers), GoogLeNet (22 layers, inception), ResNet(152 layers, skip layers), SqueezeNet (less parameters, 50x less then alexnet). I am working on a simple laptop with cpu so i would like the model which is trained te fastest.   

Hypothesis
- Increasing the number of batchnorm layers increases the accuracy of the TestCNN model.


settings, trainer, ml flow logging

In [24]:
import torch.nn as nn
from typing import List

# make a CNN class
class TestCNN(nn.Module):
    # initialise class
    def __init__(self, num_classes: int, filters: int, dropout: float) -> None:
        # inherent functions from module
        super().__init__()
        self.filters = filters

        self.features = nn.Sequential(
                nn.Conv2d(1, filters, kernel_size=3, padding=1),
                nn.ReLU(),
                nn.MaxPool2d(2),

                nn.Conv2d(filters, filters, kernel_size=3, padding=1),
                nn.ReLU(),
                nn.MaxPool2d(2),
        
                nn.Conv2d(filters, filters, kernel_size=3, padding=1),
                nn.ReLU(),
                nn.MaxPool2d(2),       
        )
        self.agg = nn.AdaptiveAvgPool2d((1,1))

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(filters, filters),
            nn.ReLU(),
            nn.Dropout(p = dropout),
            nn.Linear(filters, num_classes)
        )

    def forward(self, x):
            x = self.features(x)
            x = self.agg(x)
            x = self.classifier(x)
            return x

TEST

In [12]:
import mlflow
experiment = "exercise_4"
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment(experiment)

<Experiment: artifact_location='file:c:/Users/tycoh/Desktop/MADS-ML-Tyco/4-hypertuning-ray/mlruns/1', creation_time=1761489130988, experiment_id='1', last_update_time=1761489130988, lifecycle_stage='active', name='exercise_4', tags={}>

In [19]:
from mltrainer import imagemodels, Trainer, TrainerSettings, ReportTypes, metrics
import torch.optim as optim

with mlflow.start_run():
    settings = TrainerSettings(
        epochs=2,
        metrics=[metrics.Accuracy()],
        logdir='modellogs',
        train_steps= 50,
        valid_steps= 50,
        reporttypes=[ReportTypes.MLFLOW]
    )
    
    loss_fn =nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau
    dropout = 0.5
    model = TestCNN(num_classes=10, filters = 32, dropout=dropout)
    
    mlflow.log_params({
        "epochs": settings.epochs,
        "metrics": settings.metrics,
        "train_steps": settings.train_steps,
        "valid_steps": settings.valid_steps,
        "dropout": dropout,
        "loss_fn": loss_fn,
        "optimizer": optimizer,
        "scheduler": scheduler
    })
    
    trainer = Trainer(
        model = model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer= optimizer,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=scheduler
    )
    trainer.loop()
    mlflow.end_run()

[32m2025-10-28 20:42:13.409[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to modellogs\20251028-204213[0m
[32m2025-10-28 20:42:13.409[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
100%|[38;2;30;71;6m██████████[0m| 50/50 [00:01<00:00, 30.64it/s]
[32m2025-10-28 20:42:15.809[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 0 train 2.2202 test 1.8888 metric ['0.3653'][0m
100%|[38;2;30;71;6m██████████[0m| 50/50 [00:02<00:00, 24.99it/s]
[32m2025-10-28 20:42:18.791[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 1 train 1.5027 test 1.1760 metric ['0.5587'][0m
100%|[38;2;30;71;6m██████████[0m| 2/2 [00:05<00:00,  2.70s/it]


Hypothesis
- Model has an increase in accuracy by using dropout regularization because this makes the model less dependent on any given hidden unit and encourages weights to have smaller magnitudes and therefore not overfit and thus generalize better

Experiment
- We set a seed so it is an isolated experiment with na randomness
- First, we run 3 epochs for quick testing with TestCNN to examine accuracy with dropout set to 0.0
- Next, we run 3 epochs for quick testing with TestCNN with dropout set to 0.5 and examine the results
- Lastly, we run 3 epochs for quick testing with TestCNN with dropout set to 1 and examine the results

Results
- Dropout 0.0 gives max accuracy 0.48
- Dropout 0.5 gives max accuracy 0.46
- Dropout 1.0 gives max accuracy 0.21

Conclusion
- Without dropout the model has the best performance. This rejects the hypothesis that it should generalize better with dropout. I believe this is a bad experiment, because this does not match with the theory (which has been tested as well). Dropout is used to prevent overfitting and this simple CNN with a low number of filters is not yet overfitting. I am therefore using regularisation on a model that is not yet performing very well and therefore making the model worse. The next experiment should be focussed on an overfitting CNN and then trying out dropout to increase increase accuracy.


Hypothesis 2
- Adding dropout should increase accuracy on an overfitted CNN

Experiment
- train an CNN till it is overfitting with dropout 0.0 and check accuracy
- train an overfitting CNN, add dropout 0.2 and check accuracy
- train an overfitting CNN, add dropout 0.5 and check accuracy

Results
- 
- 
- 

Conclusion

In [None]:
from mltrainer import imagemodels, Trainer, TrainerSettings, ReportTypes, metrics
import torch.optim as optim

with mlflow.start_run():
    settings = TrainerSettings(
        epochs=20,
        metrics=[metrics.Accuracy()],
        logdir='modellogs',
        train_steps= 50,
        valid_steps= 50,
        reporttypes=[ReportTypes.MLFLOW]
    )
    
    loss_fn =nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau
    dropout = 0.0
    model = TestCNN(num_classes=10, filters=512, dropout=dropout)
    
    mlflow.log_params({
        "epochs": settings.epochs,
        "metrics": settings.metrics,
        "train_steps": settings.train_steps,
        "valid_steps": settings.valid_steps,
        "dropout": dropout,
        "loss_fn": loss_fn,
        "optimizer": optimizer,
        "scheduler": scheduler
    })
    
    trainer = Trainer(
        model = model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer= optimizer,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=scheduler
    )
    trainer.loop()
    mlflow.end_run()

[32m2025-10-28 21:07:51.478[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to modellogs\20251028-210751[0m
[32m2025-10-28 21:07:51.478[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
  0%|[38;2;30;71;6m          [0m| 0/20 [00:00<?, ?it/s]

In [1]:
modeldir = Path("models").resolve()
if not modeldir.exists():
    modeldir.mkdir()
    print(f"Created {modeldir}")

NameError: name 'Path' is not defined

In [None]:
import torch.optim as optim
from mltrainer import metrics, Trainer, TrainerSettings, ReportTypes
from datetime import datetime
from mltrainer.imagemodels import CNNConfig, CNNblocks

optimizer = optim.Adam
loss_fn = torch.nn.CrossEntropyLoss()
accuracy = metrics.Accuracy()

# Define the hyperparameter search space
settings = TrainerSettings(
    epochs=3,
    metrics=[accuracy],
    logdir=modeldir,
    train_steps=100,
    valid_steps=100,
    reporttypes=[ReportTypes.MLFLOW, ReportTypes.TOML],
)


# Define the objective function for hyperparameter optimization
def objective(params):
    # Start a new MLflow run for tracking the experiment
    with mlflow.start_run():
        # Set MLflow tags to record metadata about the model and developer
        mlflow.set_tag("model", "cnn")
        # Log hyperparameters to MLflow
        mlflow.log_params(params)
        mlflow.log_param("batchsize", f"{batchsize}")


        # Initialize the optimizer, loss function, and accuracy metric

        config = CNNConfig(
            matrixshape = (224, 224), # every image is 224x224
            batchsize = batchsize,
            input_channels = 3, 
            hidden = params["filters"], 
            kernel_size = params["kernel_size"],
            maxpool = 3, # kernel size of the maxpool
            num_layers = params["num_layers"], 
            num_classes = 5,
        )

        # Instantiate the CNN model with the given hyperparameters
        model = CNNblocks(config)
        # Train the model using a custom train loop
        trainer = Trainer(
            model=model,
            settings=settings,
            loss_fn=loss_fn,
            optimizer=optimizer,
            traindataloader=trainstreamer,
            validdataloader=validstreamer,
            scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        )
        trainer.loop()

        # Save the trained model with a timestamp
        tag = datetime.now().strftime("%Y%m%d-%H%M")
        modelpath = modeldir / (tag + "model.pt")
        torch.save(model, modelpath)

        # Log the saved model as an artifact in MLflow
        mlflow.log_artifact(local_path=modelpath, artifact_path="pytorch_models")
        return {'loss' : trainer.test_loss}

In [None]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

search_space = {
    'filters' : scope.int(hp.quniform('filters', 16, 128, 8)),
    'kernel_size' : scope.int(hp.quniform('kernel_size', 2, 5, 1)),
    'num_layers' : scope.int(hp.quniform('num_layers', 1, 10, 1)),
}

In [None]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=3,
    trials=Trials()
)

Calculated matrix size: 12544                        
Caluclated flatten size: 1304576                     
  0%|          | 0/3 [00:00<?, ?trial/s, best loss=?]

[32m2025-10-26 16:25:43.238[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to C:\Users\tycoh\Desktop\MADS-ML-Tyco\4-hypertuning-ray\models\20251026-162543[0m
[32m2025-10-26 16:25:43.238[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
  0%|[38;2;30;71;6m          [0m| 0/3 [00:00<?, ?it/s]
  0%|[38;2;30;71;6m          [0m| 0/100 [00:00<?, ?it/s][A
  1%|[38;2;30;71;6m1         [0m| 1/100 [00:30<51:07, 30.98s/it][A
  2%|[38;2;30;71;6m2         [0m| 2/100 [00:58<46:53, 28.71s/it][A
  3%|[38;2;30;71;6m3         [0m| 3/100 [01:21<42:32, 26.32s/it][A
  4%|[38;2;30;71;6m4         [0m| 4/100 [01:44<40:13, 25.14s/it][A
  5%|[38;2;30;71;6m5         [0m| 5/100 [02:08<39:00, 24.64s/it][A
  6%|[38;2;30;71;6m6         [0m| 6/100 [02:32<38:09, 24.36s/it][A
  7%|[38;2;30;71;6m7         [0m| 7/100 [

In [None]:
best_result

NameError: name 'best_result' is not defined