In [1]:
import sys
sys.path.insert(0, "../..")

import torch
import torch.nn as nn
from src.data import make_dataset
from pathlib import Path
from loguru import logger

  warn(


Lets start with our good'ol MNIST.

In [2]:
from src.data.make_dataset import DatasetFactoryProvider, DatasetType
fashionfactory = DatasetFactoryProvider.get_factory(DatasetType.FASHION)
batchsize = 64
streamers = fashionfactory.create_datastreamer(batchsize=batchsize)
train = streamers["train"]
valid = streamers["valid"]
trainstreamer = train.stream()
validstreamer = valid.stream()

We can obtain an item:

In [3]:
x, y = next(iter(trainstreamer))
x.shape, y.shape

(torch.Size([64, 1, 28, 28]), torch.Size([64]))

The image follows the channels-first convention: (channel, width, height). The label is an integer.

Let's re-use the model we had:

In [4]:
import torch
from torch import nn

# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

# Define model
class CNN(nn.Module):
    def __init__(self, filters, units1, units2, input_size=(32, 1, 28, 28)):
        super().__init__()

        self.convolutions = nn.Sequential(
            nn.Conv2d(1, filters, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(filters, filters, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(filters, filters, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )

        activation_map_size = self._conv_test(input_size)
        logger.info(f"Aggregating activationmap with size {activation_map_size}")
        self.agg = nn.AvgPool2d(activation_map_size)

        self.dense = nn.Sequential(
            nn.Flatten(),
            nn.Linear(filters, units1),
            nn.ReLU(),
            nn.Linear(units1, units2),
            nn.ReLU(),
            nn.Linear(units2, 10)
        )

    def _conv_test(self, input_size = (32, 1, 28, 28)):
        x = torch.ones(input_size)
        x = self.convolutions(x)
        return x.shape[-2:]

    def forward(self, x):
        x = self.convolutions(x)
        x = self.agg(x)
        logits = self.dense(x)
        return logits

model = CNN(filters=32, units1=128, units2=64).to(device)

2023-06-02 13:19:24.350 | INFO     | __main__:__init__:26 - Aggregating activationmap with size torch.Size([2, 2])


Using cpu device


In [5]:
from torchsummary import summary
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
              ReLU-2           [-1, 32, 28, 28]               0
         MaxPool2d-3           [-1, 32, 14, 14]               0
            Conv2d-4           [-1, 32, 12, 12]           9,248
              ReLU-5           [-1, 32, 12, 12]               0
         MaxPool2d-6             [-1, 32, 6, 6]               0
            Conv2d-7             [-1, 32, 4, 4]           9,248
              ReLU-8             [-1, 32, 4, 4]               0
         MaxPool2d-9             [-1, 32, 2, 2]               0
        AvgPool2d-10             [-1, 32, 1, 1]               0
          Flatten-11                   [-1, 32]               0
           Linear-12                  [-1, 128]           4,224
             ReLU-13                  [-1, 128]               0
           Linear-14                   

And set up the optimizer, loss and accuracy.

In [6]:
import torch.optim as optim
from src.models import metrics
optimizer = optim.Adam
loss_fn = torch.nn.CrossEntropyLoss()
accuracy = metrics.Accuracy()

In [7]:
yhat = model(x)
accuracy(y, yhat)

tensor(0.1094)

# MLflow
MLflow is an open-source platform designed to manage the entire Machine Learning (ML) lifecycle, including experimentation, reproducibility, deployment, and governance. It provides a set of APIs and tools to streamline ML workflows, making it easier to track experiments, package code, manage model versions, and deploy models.

Reasons to use MLflow over TensorBoard, gin-config, or Ray:

- End-to-end ML lifecycle management: While TensorBoard focuses on visualizing model training metrics and gin-config on hyperparameter configuration, MLflow covers a broader range of tasks, such as experiment tracking, model packaging, and deployment.

- Framework agnostic: MLflow is not tied to a specific ML framework, making it suitable for projects using different libraries or even multiple libraries.

- Model Registry: MLflow provides a centralized model registry, allowing you to version, track, and manage your models, which is not available in TensorBoard or gin-config.

- Deployment support: MLflow facilitates model deployment to various platforms, such as local, cloud, or Kubernetes environments, whereas TensorBoard and gin-config are not built for deployment tasks.

- Integration with other tools: MLflow integrates with popular tools and platforms like Databricks, AWS, and Azure, making it easy to incorporate into existing workflows.

However, the choice between MLflow and other tools like TensorBoard, gin-config, or Ray depends on your specific use case and the scope of the ML workflow you want to manage.

In [8]:
experiment_path = "test_tune"

In [9]:
import mlflow
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment(experiment_path)

2023/06/02 13:19:25 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2023/06/02 13:19:25 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Running upgrade  -> 451aebb31d03, add metric step
INFO  [alembic.runtime.migration] Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
INFO  [alembic.runtime.migration] Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
INFO  [alembic.runtime.migration] Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
INFO  [alembic.runtime.migration] Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
INFO  [alembic.runtime.migration] Running upgrade 7ac759974ad8 -> 89d4b8295536, create latest metrics table
INFO  [89d4b8295536_create_latest_metrics_table_py] Migration complete!
INFO  

<Experiment: artifact_location='/workspaces/ML22/notebooks/2_convolutions/mlruns/1', creation_time=1685711969081, experiment_id='1', last_update_time=1685711969081, lifecycle_stage='active', name='test_tune', tags={}>

In the code above, we set the MLflow tracking URI to a local SQLite database file. This is done to configure the storage location for MLflow's experiment tracking data, such as metrics, parameters, and artifacts. By specifying a SQLite database, we enable a lightweight and easy-to-use storage solution for tracking the experiments and their associated information.

The line mlflow.set_experiment("mnist_convolutions") sets the active MLflow experiment to "mnist_convolutions". This is useful for organizing and grouping your runs, as it allows you to associate the upcoming ML training runs with a specific experiment name, making it easier to search, compare, and analyze the results later.

In [10]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

We import functions and classes from the hyperopt library to perform hyperparameter optimization. This library helps us find the best hyperparameter values for our machine learning model by searching through a defined search space and using optimization algorithms like Tree-structured Parzen Estimator (TPE). The goal is to improve our model's performance by tuning its hyperparameters.

Advantages of TPE:

- Model-based approach: TPE is a Bayesian optimization method that models the objective function as a probability distribution. It learns from previous evaluations to decide which points in the search space to explore next, making it more efficient in finding optimal hyperparameters.

- Exploration-exploitation trade-off: TPE balances the trade-off between exploration (searching in new regions of the search space) and exploitation (refining around the current best points). This can lead to better results in problems with complex search spaces.

- Continuous hyperparameter optimization: TPE can handle continuous hyperparameters more naturally, as it builds a probability model to estimate the performance for any given point in the search space.

Lets set up an objective function and start logging some usefull things we might want to track:

In [11]:
modeldir = Path("../../models/mnist").resolve()
if not modeldir.exists():
    modeldir.mkdir()
    print(f"Created {modeldir}")

Created /workspaces/ML22/models/mnist


In [12]:
import torch.optim as optim
from src.models import metrics
from src.models import train_model
from datetime import datetime

from src.settings import TrainerSettings, ReportTypes

settings = TrainerSettings(
    epochs=3,
    metrics=[accuracy],
    logdir="modellog",
    train_steps=100,
    valid_steps=100,
    tunewrite=[ReportTypes.MLFLOW],
)


# Define the objective function for hyperparameter optimization
def objective(params):
    # Start a new MLflow run for tracking the experiment
    with mlflow.start_run():
        # Set MLflow tags to record metadata about the model and developer
        mlflow.set_tag("model", "convnet")
        mlflow.set_tag("dev", "raoul")
        # Log hyperparameters to MLflow
        mlflow.log_params(params)
        mlflow.log_param("batchsize", f"{batchsize}")


        # Initialize the optimizer, loss function, and accuracy metric      
        optimizer = optim.Adam
        loss_fn = torch.nn.CrossEntropyLoss()
        accuracy = metrics.Accuracy()

        # Instantiate the CNN model with the given hyperparameters
        model = CNN(**params)
        # Train the model using a custom train loop
        trainer = train_model.Trainer(
            model=model, 
            settings=settings, 
            loss_fn=loss_fn,
            optimizer=optimizer, 
            traindataloader=trainstreamer, 
            validdataloader=validstreamer, 
            scheduler=optim.lr_scheduler.ReduceLROnPlateau
        )
        trainer.loop()

        # Save the trained model with a timestamp   
        tag = datetime.now().strftime("%Y%m%d-%H%M")
        modelpath = modeldir / (tag + "model.pt")
        torch.save(model, modelpath)

        # Log the saved model as an artifact in MLflow
        mlflow.log_artifact(local_path=modelpath, artifact_path="pytorch_models")
        return {'loss' : trainer.test_loss, 'status': STATUS_OK}

In [13]:
search_space = {
    'filters' : scope.int(hp.quniform('filters', 16, 128, 8)),
    'units1' : scope.int(hp.quniform('units1', 32, 128, 8)),
    'units2' : scope.int(hp.quniform('units2', 32, 128, 8)),
}

We define a search space for hyperparameter optimization using Hyperopt. The search space specifies the range and distribution of hyperparameters to explore during the optimization process. This is crucial for finding the optimal set of hyperparameters that yield the best performance for the machine learning model. The search space defined here includes the number of filters in the convolutional layers, and the number of units in two fully connected layers, allowing Hyperopt to find the best combination within the given ranges.


Now, finally, let us perform the hyperparameter search using the fmin function from hyperopt. The function takes the following arguments:

- `fn=objective`: The objective function to minimize, which is defined earlier to train the model and return the test loss.
- `space=search_space`: The search space defined earlier, containing the range of hyperparameters to explore.
- `algo=tpe.suggest`: The optimization algorithm to use, in this case, the Tree-structured Parzen Estimator (TPE) method.
- `max_evals=10`: The maximum number of function evaluations, i.e., the maximum number of hyperparameter combinations to try.
- `trials=Trials()`: A Trials object to store the results of each evaluation.

The fmin function searches for the best hyperparameters within the given search space using the TPE algorithm, aiming to minimize the objective function (test loss). Once the optimization process is completed, the best hyperparameters found are stored in the best_result variable.

In [14]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=10,
    trials=Trials()
)

  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]

2023-06-02 13:19:30.487 | INFO     | __main__:__init__:26 - Aggregating activationmap with size torch.Size([2, 2])
2023-06-02 13:19:30.490 | INFO     | src.data.data_tools:dir_add_timestamp:146 - Logging to modellog/20230602-1319
2023-06-02 13:19:30.502 | INFO     | src.models.train_model:__init__:108 - Found earlystop_kwargs in settings.Set to None if you dont want earlystopping.
  0%|[38;2;30;71;6m          [0m| 0/3 [00:00<?, ?it/s]
  0%|[38;2;30;71;6m          [0m| 0/100 [00:00<?, ?it/s][A
  5%|[38;2;30;71;6m5         [0m| 5/100 [00:00<00:02, 42.85it/s][A
 10%|[38;2;30;71;6m#         [0m| 10/100 [00:00<00:02, 41.54it/s][A
 15%|[38;2;30;71;6m#5        [0m| 15/100 [00:00<00:02, 38.42it/s][A
 19%|[38;2;30;71;6m#9        [0m| 19/100 [00:00<00:02, 37.76it/s][A
 23%|[38;2;30;71;6m##3       [0m| 23/100 [00:00<00:02, 37.13it/s][A
 27%|[38;2;30;71;6m##7       [0m| 27/100 [00:00<00:01, 36.97it/s][A
 32%|[38;2;30;71;6m###2      [0m| 32/100 [00:00<00:01, 39.39it/s][A
 

 10%|█         | 1/10 [00:16<02:30, 16.67s/trial, best loss: 0.8098279416561127]

2023-06-02 13:19:47.149 | INFO     | __main__:__init__:26 - Aggregating activationmap with size torch.Size([2, 2])
2023-06-02 13:19:47.157 | INFO     | src.data.data_tools:dir_add_timestamp:146 - Logging to modellog/20230602-1319
2023-06-02 13:19:47.160 | INFO     | src.models.train_model:__init__:108 - Found earlystop_kwargs in settings.Set to None if you dont want earlystopping.
  0%|[38;2;30;71;6m          [0m| 0/3 [00:00<?, ?it/s]
  0%|[38;2;30;71;6m          [0m| 0/100 [00:00<?, ?it/s][A
  1%|[38;2;30;71;6m1         [0m| 1/100 [00:00<00:29,  3.31it/s][A
  3%|[38;2;30;71;6m3         [0m| 3/100 [00:00<00:13,  7.05it/s][A
  4%|[38;2;30;71;6m4         [0m| 4/100 [00:00<00:12,  7.43it/s][A
  5%|[38;2;30;71;6m5         [0m| 5/100 [00:00<00:11,  8.04it/s][A
  6%|[38;2;30;71;6m6         [0m| 6/100 [00:00<00:11,  8.50it/s][A
  8%|[38;2;30;71;6m8         [0m| 8/100 [00:00<00:09,  9.74it/s][A
  9%|[38;2;30;71;6m9         [0m| 9/100 [00:01<00:12,  7.38it/s][A
 10%|[

 20%|██        | 2/10 [01:07<04:54, 36.78s/trial, best loss: 0.8004965114593506]

2023-06-02 13:20:37.805 | INFO     | __main__:__init__:26 - Aggregating activationmap with size torch.Size([2, 2])
2023-06-02 13:20:37.807 | INFO     | src.data.data_tools:dir_add_timestamp:146 - Logging to modellog/20230602-1320
2023-06-02 13:20:37.823 | INFO     | src.models.train_model:__init__:108 - Found earlystop_kwargs in settings.Set to None if you dont want earlystopping.
  0%|[38;2;30;71;6m          [0m| 0/3 [00:00<?, ?it/s]
  0%|[38;2;30;71;6m          [0m| 0/100 [00:00<?, ?it/s][A
  4%|[38;2;30;71;6m4         [0m| 4/100 [00:00<00:02, 34.92it/s][A
  9%|[38;2;30;71;6m9         [0m| 9/100 [00:00<00:02, 41.79it/s][A
 14%|[38;2;30;71;6m#4        [0m| 14/100 [00:00<00:01, 43.25it/s][A
 19%|[38;2;30;71;6m#9        [0m| 19/100 [00:00<00:01, 45.54it/s][A
 24%|[38;2;30;71;6m##4       [0m| 24/100 [00:00<00:01, 46.96it/s][A
 29%|[38;2;30;71;6m##9       [0m| 29/100 [00:00<00:01, 44.58it/s][A
 34%|[38;2;30;71;6m###4      [0m| 34/100 [00:00<00:01, 43.44it/s][A
 3

 30%|███       | 3/10 [01:18<02:55, 25.05s/trial, best loss: 0.8004965114593506]

2023-06-02 13:20:48.933 | INFO     | __main__:__init__:26 - Aggregating activationmap with size torch.Size([2, 2])
2023-06-02 13:20:48.936 | INFO     | src.data.data_tools:dir_add_timestamp:146 - Logging to modellog/20230602-1320
2023-06-02 13:20:48.938 | INFO     | src.models.train_model:__init__:108 - Found earlystop_kwargs in settings.Set to None if you dont want earlystopping.
  0%|[38;2;30;71;6m          [0m| 0/3 [00:00<?, ?it/s]
  0%|[38;2;30;71;6m          [0m| 0/100 [00:00<?, ?it/s][A
  1%|[38;2;30;71;6m1         [0m| 1/100 [00:00<00:10,  9.85it/s][A
  3%|[38;2;30;71;6m3         [0m| 3/100 [00:00<00:08, 11.66it/s][A
  5%|[38;2;30;71;6m5         [0m| 5/100 [00:00<00:09, 10.36it/s][A
  7%|[38;2;30;71;6m7         [0m| 7/100 [00:00<00:12,  7.53it/s][A
  8%|[38;2;30;71;6m8         [0m| 8/100 [00:01<00:13,  6.94it/s][A
 10%|[38;2;30;71;6m#         [0m| 10/100 [00:01<00:10,  8.34it/s][A
 11%|[38;2;30;71;6m#1        [0m| 11/100 [00:01<00:10,  8.58it/s][A
 12%|

 40%|████      | 4/10 [01:55<02:58, 29.74s/trial, best loss: 0.7625156235694885]

2023-06-02 13:21:25.920 | INFO     | __main__:__init__:26 - Aggregating activationmap with size torch.Size([2, 2])
2023-06-02 13:21:25.922 | INFO     | src.data.data_tools:dir_add_timestamp:146 - Logging to modellog/20230602-1321
2023-06-02 13:21:25.929 | INFO     | src.models.train_model:__init__:108 - Found earlystop_kwargs in settings.Set to None if you dont want earlystopping.
  0%|[38;2;30;71;6m          [0m| 0/3 [00:00<?, ?it/s]
  0%|[38;2;30;71;6m          [0m| 0/100 [00:00<?, ?it/s][A
  2%|[38;2;30;71;6m2         [0m| 2/100 [00:00<00:05, 18.37it/s][A
  5%|[38;2;30;71;6m5         [0m| 5/100 [00:00<00:04, 22.20it/s][A
  8%|[38;2;30;71;6m8         [0m| 8/100 [00:00<00:03, 23.44it/s][A
 11%|[38;2;30;71;6m#1        [0m| 11/100 [00:00<00:03, 23.04it/s][A
 14%|[38;2;30;71;6m#4        [0m| 14/100 [00:00<00:03, 22.94it/s][A
 17%|[38;2;30;71;6m#7        [0m| 17/100 [00:00<00:04, 20.47it/s][A
 20%|[38;2;30;71;6m##        [0m| 20/100 [00:00<00:03, 21.61it/s][A
 23

 50%|█████     | 5/10 [02:16<02:12, 26.51s/trial, best loss: 0.7495398753881455]

2023-06-02 13:21:46.666 | INFO     | __main__:__init__:26 - Aggregating activationmap with size torch.Size([2, 2])
2023-06-02 13:21:46.670 | INFO     | src.data.data_tools:dir_add_timestamp:146 - Logging to modellog/20230602-1321
2023-06-02 13:21:46.672 | INFO     | src.models.train_model:__init__:108 - Found earlystop_kwargs in settings.Set to None if you dont want earlystopping.
  0%|[38;2;30;71;6m          [0m| 0/3 [00:00<?, ?it/s]
  0%|[38;2;30;71;6m          [0m| 0/100 [00:00<?, ?it/s][A
  2%|[38;2;30;71;6m2         [0m| 2/100 [00:00<00:06, 14.69it/s][A
  4%|[38;2;30;71;6m4         [0m| 4/100 [00:00<00:05, 17.12it/s][A
  7%|[38;2;30;71;6m7         [0m| 7/100 [00:00<00:04, 19.24it/s][A
 10%|[38;2;30;71;6m#         [0m| 10/100 [00:00<00:04, 20.75it/s][A
 13%|[38;2;30;71;6m#3        [0m| 13/100 [00:00<00:04, 17.82it/s][A
 15%|[38;2;30;71;6m#5        [0m| 15/100 [00:00<00:04, 18.03it/s][A
 17%|[38;2;30;71;6m#7        [0m| 17/100 [00:01<00:05, 15.22it/s][A
 19

 60%|██████    | 6/10 [02:42<01:45, 26.44s/trial, best loss: 0.7495398753881455]

2023-06-02 13:22:13.000 | INFO     | __main__:__init__:26 - Aggregating activationmap with size torch.Size([2, 2])
2023-06-02 13:22:13.007 | INFO     | src.data.data_tools:dir_add_timestamp:146 - Logging to modellog/20230602-1322
2023-06-02 13:22:13.030 | INFO     | src.models.train_model:__init__:108 - Found earlystop_kwargs in settings.Set to None if you dont want earlystopping.
  0%|[38;2;30;71;6m          [0m| 0/3 [00:00<?, ?it/s]
  0%|[38;2;30;71;6m          [0m| 0/100 [00:00<?, ?it/s][A
  1%|[38;2;30;71;6m1         [0m| 1/100 [00:00<00:12,  8.21it/s][A
  3%|[38;2;30;71;6m3         [0m| 3/100 [00:00<00:08, 10.91it/s][A
  5%|[38;2;30;71;6m5         [0m| 5/100 [00:00<00:06, 13.70it/s][A
  7%|[38;2;30;71;6m7         [0m| 7/100 [00:00<00:06, 15.09it/s][A
  9%|[38;2;30;71;6m9         [0m| 9/100 [00:00<00:06, 14.53it/s][A
 11%|[38;2;30;71;6m#1        [0m| 11/100 [00:00<00:06, 14.54it/s][A
 13%|[38;2;30;71;6m#3        [0m| 13/100 [00:00<00:05, 15.05it/s][A
 15%|

 70%|███████   | 7/10 [03:15<01:25, 28.63s/trial, best loss: 0.7495398753881455]

2023-06-02 13:22:46.115 | INFO     | __main__:__init__:26 - Aggregating activationmap with size torch.Size([2, 2])
2023-06-02 13:22:46.120 | INFO     | src.data.data_tools:dir_add_timestamp:146 - Logging to modellog/20230602-1322
2023-06-02 13:22:46.122 | INFO     | src.models.train_model:__init__:108 - Found earlystop_kwargs in settings.Set to None if you dont want earlystopping.
  0%|[38;2;30;71;6m          [0m| 0/3 [00:00<?, ?it/s]
  0%|[38;2;30;71;6m          [0m| 0/100 [00:00<?, ?it/s][A
  4%|[38;2;30;71;6m4         [0m| 4/100 [00:00<00:03, 30.87it/s][A
  8%|[38;2;30;71;6m8         [0m| 8/100 [00:00<00:02, 32.23it/s][A
 13%|[38;2;30;71;6m#3        [0m| 13/100 [00:00<00:02, 37.93it/s][A
 17%|[38;2;30;71;6m#7        [0m| 17/100 [00:00<00:02, 29.65it/s][A
 21%|[38;2;30;71;6m##1       [0m| 21/100 [00:00<00:02, 31.59it/s][A
 25%|[38;2;30;71;6m##5       [0m| 25/100 [00:00<00:02, 33.36it/s][A
 29%|[38;2;30;71;6m##9       [0m| 29/100 [00:00<00:02, 34.97it/s][A
 3

 80%|████████  | 8/10 [03:32<00:49, 24.92s/trial, best loss: 0.7495398753881455]

2023-06-02 13:23:03.022 | INFO     | __main__:__init__:26 - Aggregating activationmap with size torch.Size([2, 2])
2023-06-02 13:23:03.025 | INFO     | src.data.data_tools:dir_add_timestamp:146 - Logging to modellog/20230602-1323
2023-06-02 13:23:03.035 | INFO     | src.models.train_model:__init__:108 - Found earlystop_kwargs in settings.Set to None if you dont want earlystopping.
  0%|[38;2;30;71;6m          [0m| 0/3 [00:00<?, ?it/s]
  0%|[38;2;30;71;6m          [0m| 0/100 [00:00<?, ?it/s][A
  4%|[38;2;30;71;6m4         [0m| 4/100 [00:00<00:02, 33.45it/s][A
  8%|[38;2;30;71;6m8         [0m| 8/100 [00:00<00:03, 27.90it/s][A
 12%|[38;2;30;71;6m#2        [0m| 12/100 [00:00<00:02, 29.89it/s][A
 16%|[38;2;30;71;6m#6        [0m| 16/100 [00:00<00:02, 31.11it/s][A
 20%|[38;2;30;71;6m##        [0m| 20/100 [00:00<00:03, 26.27it/s][A
 24%|[38;2;30;71;6m##4       [0m| 24/100 [00:00<00:02, 28.40it/s][A
 27%|[38;2;30;71;6m##7       [0m| 27/100 [00:00<00:02, 25.76it/s][A
 3

 90%|█████████ | 9/10 [03:53<00:23, 23.49s/trial, best loss: 0.7495398753881455]

2023-06-02 13:23:23.386 | INFO     | __main__:__init__:26 - Aggregating activationmap with size torch.Size([2, 2])
2023-06-02 13:23:23.389 | INFO     | src.data.data_tools:dir_add_timestamp:146 - Logging to modellog/20230602-1323
2023-06-02 13:23:23.391 | INFO     | src.models.train_model:__init__:108 - Found earlystop_kwargs in settings.Set to None if you dont want earlystopping.
  0%|[38;2;30;71;6m          [0m| 0/3 [00:00<?, ?it/s]
  0%|[38;2;30;71;6m          [0m| 0/100 [00:00<?, ?it/s][A
  2%|[38;2;30;71;6m2         [0m| 2/100 [00:00<00:05, 16.35it/s][A
  4%|[38;2;30;71;6m4         [0m| 4/100 [00:00<00:05, 17.29it/s][A
  7%|[38;2;30;71;6m7         [0m| 7/100 [00:00<00:04, 19.84it/s][A
  9%|[38;2;30;71;6m9         [0m| 9/100 [00:00<00:05, 16.09it/s][A
 11%|[38;2;30;71;6m#1        [0m| 11/100 [00:00<00:05, 17.18it/s][A
 14%|[38;2;30;71;6m#4        [0m| 14/100 [00:00<00:04, 19.87it/s][A
 17%|[38;2;30;71;6m#7        [0m| 17/100 [00:00<00:04, 17.75it/s][A
 19%

100%|██████████| 10/10 [04:16<00:00, 25.69s/trial, best loss: 0.7495398753881455]


After running this, you can look at the best_result

In [16]:
best_result

{'filters': 56.0, 'units1': 48.0, 'units2': 104.0}

But you can also explore the UI from mlflow. It is pretty nice. The help you out, you can use the makefile by first navigating to `/notebooks/2_convolutions` in the terminal and then typing `make show_logs`. This starts a server you can open at `localhost:5000` . Also, have a look at the `Makefile` in this folder to see what you execute. It save the user from typing an inconvenient long and complex command every time.