# 07. PyTorch Experiment Tracking

Machine Learning is very experimental

In order to figure out which experminets are worth pursuing, that's where **experiment tracking** comes in, it helps you to figure out what doesn't work so you can figure out what does work

In this notebook, we're going to see an example of programmatically tracking experiments

## Resources

**Course book:** https://www.learnpytorch.io/07_pytorch_experiment_tracking/

## 0. Prepare libraries

In [1]:
import torch
import torchvision

print(torch.__version__)
print(torchvision.__version__)

2.7.1
0.22.1


In [2]:
import matplotlib.pyplot as plt
from torchinfo import summary

from src import get_data, setup_data, engine

In [3]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else (
    "mps" if torch.mps.is_available() else "cpu"
)

device

'mps'

In [4]:
# Set seeds
def set_seeds(seed: int=42):
    """Sets random seeds for torch operations

    Args:
        seed (int, optional): Random seed to set. Defaults to 42.
    """
    
    # Set the see for general torch operations
    torch.manual_seed(seed)
    
    # Set the seed for CUDA torch operations
    torch.cuda.manual_seed(seed)
    
    # Set the seed for MPS torch operations
    torch.mps.manual_seed(seed)

## 1. Get data

Want to get pizza, steak, sushi images.

So we can run epxeriments buidling FoodVision Mini and see which model performs best

In [5]:
get_data.get_data(
    data_dir_str="data/",
    image_path_str="pizza_steak_sushi",
    data_url_str="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip",
    file_name_str="pizza_steak_sushi.zip"
)

data/pizza_steak_sushi exists...
Data in data/pizza_steak_sushi already exits, skipping downloading and unzipping...
Finished getting data...


## 2. Create DataSets and DataLoaders

### 2.1 Creating DataLoaders with manual transforms

The goal with transforms is to ensure your cusotm data is formatted in a reproducible way as well as a way thta will suit pretrained models

In [6]:
# Setup direcotires
from pathlib import Path

image_path = Path("data/pizza_steak_sushi")
train_dir = image_path / "train"
test_dir = image_path / "test"

train_dir, test_dir

(PosixPath('data/pizza_steak_sushi/train'),
 PosixPath('data/pizza_steak_sushi/test'))

In [7]:
# Setup ImageNet normalization levels
from torchvision import transforms

normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
)

# Create transform pipeline manually
manual_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    normalize,
])

print(f"Manually created transforms: {manual_transforms}")

# Create DataLoaders
train_dataloader, test_dataloader, class_names = setup_data.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=manual_transforms,
    batch_size=32,
    num_workers=1,
)

print(f"Size of train_dataloader: {len(train_dataloader)} | Size of test_dataloader {len(test_dataloader)}")
print(f"Class names: {class_names}")

Manually created transforms: Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)
Size of train_dataloader: 8 | Size of test_dataloader 3
Class names: ['pizza', 'steak', 'sushi']


### 2.2 Create DataLoader using automatically created transforms

The same principle applies for automatic transforms: we want our custom data in the same format as a pretrained model was trained on

In [8]:
# Setup directories
from pathlib import Path

image_path = Path("data/pizza_steak_sushi")
train_dir = image_path / "train"
test_dir = image_path / "test"

train_dir, test_dir

(PosixPath('data/pizza_steak_sushi/train'),
 PosixPath('data/pizza_steak_sushi/test'))

In [9]:
# Setup pretrained weights (plenty of these weights available in torchvision.models)
from torchvision import models

weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT # best available

# Get transforms from weights (these are the transforms used to train a particualr or obtained particular set of weights)
automatic_transforms = weights.transforms()

print(f"Automatically created transforms: {automatic_transforms}")

# Create DataLoaders
train_dataloader, test_dataloader, class_names = setup_data.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=automatic_transforms,
    batch_size=32,
    num_workers=1,
)

print(f"Size of train_dataloader: {len(train_dataloader)} | Size of test_dataloader {len(test_dataloader)}")
print(f"Class names: {class_names}")

Automatically created transforms: ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)
Size of train_dataloader: 8 | Size of test_dataloader 3
Class names: ['pizza', 'steak', 'sushi']


## 3. Getting a pretrained model, freeze the base layers and change the classifier head

In [10]:
# Old way
model = torchvision.models.efficientnet_b0(pretrained=True).to(device)



In [11]:
# New way
# Download the pretrained weights for EfficientNet_B0
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT

# Setup the model with pretrained weights and send it to the target model
model = torchvision.models.efficientnet_b0(
    weights=weights
).to(device)

In [12]:
model

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [13]:
# Freeze all base layers by setting their requires_grad attribute to False
for param in model.features.parameters():
    param.requires_grad = False
    
# Change classifier head
set_seeds()
model.classifier = torch.nn.Sequential(
    torch.nn.Dropout(p=0.2, inplace=True),
    torch.nn.Linear(in_features=1280, out_features=len(class_names), bias=True)
).to(device)

model

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [14]:
from torchinfo import summary
summary(
    model=model,
    input_size=(1, 3, 224, 224),
    verbose=0,
    col_names=["input_size", "output_size", "num_params", "trainable"],
    col_width=20,
    row_settings=["var_names"],
)

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [1, 3, 224, 224]     [1, 3]               --                   Partial
├─Sequential (features)                                      [1, 3, 224, 224]     [1, 1280, 7, 7]      --                   False
│    └─Conv2dNormActivation (0)                              [1, 3, 224, 224]     [1, 32, 112, 112]    --                   False
│    │    └─Conv2d (0)                                       [1, 3, 224, 224]     [1, 32, 112, 112]    (864)                False
│    │    └─BatchNorm2d (1)                                  [1, 32, 112, 112]    [1, 32, 112, 112]    (64)                 False
│    │    └─SiLU (2)                                         [1, 32, 112, 112]    [1, 32, 112, 112]    --                   --
│    └─Sequential (1)                                        [1, 32, 112, 112]    [1, 1

## 4. Train a single model and track results

In [15]:
# Define a loss function and optimizer
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
    params=model.parameters(),
    lr=0.001,
)

In [16]:
# Setup a SummaryWriter
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()
writer

<torch.utils.tensorboard.writer.SummaryWriter at 0x118ebb230>

In [17]:
from src.engine import train_step, test_step
from tqdm.auto import tqdm
from typing import Dict, List, Tuple

def train(
    model: torch.nn.Module, 
    train_dataloader: torch.utils.data.DataLoader, 
    test_dataloader: torch.utils.data.DataLoader, 
    optimizer: torch.optim.Optimizer,
    loss_fn: torch.nn.Module,
    epochs: int,
    device: torch.device,
) -> Dict[str, List[float]]:
    """Trains and tests a PyTorch model.

    Args:
        model (torch.nn.Module): A PyTorch model to be trained
            and tested.
        train_dataloader (torch.utils.data.DataLoader): A DataLoader
            instance for the model to be trained on.
        test_dataloader (torch.utils.data.DataLoader): A DataLoader
            instance for the model to be tested on.
        optimizer (torch.optim.Optimizer): A PyTorch optimizer to
            help minimize the loss function.
        loss_fn (torch.nn.Module): A PyTorch loss function to
            calculate loss on both datasets.
        epochs (int): An integer indicating how many epochs
            to train for.
        device (torch.device): A target device to compute
            on (e.g. "cuda" or "cpu").

    Returns:
        Dict[str, List[float]]: A dictionary of training and
        testing loss as well as training and
        testing accuracy metrics. Each metric has a value in
        a list for each epoch.
    """
    # Create empty results dictionary
    results = {"train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": []
    }

    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                            dataloader=train_dataloader,
                                            loss_fn=loss_fn,
                                            optimizer=optimizer,
                                            device=device)
        test_loss, test_acc = test_step(model=model,
            dataloader=test_dataloader,
            loss_fn=loss_fn,
            device=device)

        # Print out what's happening
        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | "
            f"train_acc: {train_acc:.4f} | "
            f"test_loss: {test_loss:.4f} | "
            f"test_acc: {test_acc:.4f}"
        )

        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)
        
        ### New: Experiment tracking ###
        
        writer.add_scalars(
            main_tag="Loss",
            tag_scalar_dict={
                "train_loss": train_loss,
                "test_loss": test_loss
            },
            global_step=epoch
        )
        
        writer.add_scalars(
            main_tag="Accuracy",
            tag_scalar_dict={
                "train_acc": train_acc,
                "test_acc": test_acc
            },
            global_step=epoch
        )
        writer.add_graph(
            model=model,
            input_to_model=torch.randn(32, 3, 224, 224).to(device),
        )
        
    # Close the writer
    writer.close()
    
    ### End new ###
        
    # Return the filled results at the end of the epochs
    return results

In [18]:
# Train model
set_seeds()

results = train(
    model=model,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    optimizer=optimizer,
    loss_fn=loss_fn,
    epochs=5,
    device="cpu",
)

  0%|          | 0/5 [00:00<?, ?it/s]



Epoch: 1 | train_loss: 1.0883 | train_acc: 0.4180 | test_loss: 0.8914 | test_acc: 0.6818
Epoch: 2 | train_loss: 0.8937 | train_acc: 0.6641 | test_loss: 0.8082 | test_acc: 0.7746
Epoch: 3 | train_loss: 0.7450 | train_acc: 0.8438 | test_loss: 0.7433 | test_acc: 0.7538
Epoch: 4 | train_loss: 0.7797 | train_acc: 0.6992 | test_loss: 0.6849 | test_acc: 0.8040
Epoch: 5 | train_loss: 0.6322 | train_acc: 0.7695 | test_loss: 0.6428 | test_acc: 0.8362


In [19]:
results

{'train_loss': [1.0882933884859085,
  0.8936692178249359,
  0.7449564784765244,
  0.7797179967164993,
  0.6321720480918884],
 'train_acc': [0.41796875, 0.6640625, 0.84375, 0.69921875, 0.76953125],
 'test_loss': [0.8914491534233093,
  0.8082305391629537,
  0.7433454990386963,
  0.6849217017491659,
  0.6428378820419312],
 'test_acc': [0.6818181818181818,
  0.774621212121212,
  0.7537878787878788,
  0.8039772727272728,
  0.8361742424242425]}

## 5. View our model's results with TensorBoard

There are a fwe ways to view TensorBoard results

In [20]:
# Let's view our experiments from withing the notebook
%load_ext tensorboard
%tensorboard --logdir runs

## 6. Create a functino to prepare a `SummaryWriter()` instance

By default our `SummaryWriter()` class saves to `log_dir` parameter

How about if we wanted to save different experiments to different folders

In essence, one experiment = one folder

For example, we'd like to track
* Experiment date/timestamp
* Experiment name
* Model name
* Extra - is there anything else that should be tracker?

Let's create a function to create a `SummaryWriter()` instance to take all of these things into account.

So ideally we end up tracking experiments to a directory:

`runs/YYYY-MM-DD/experiment_name/model_name/extra`

In [21]:
from torch.utils.tensorboard import SummaryWriter

def create_writer(
    experiment_name: str,
    model_name: str,
    extra: str = None
):
    """Creates a torch.utils.tensorboard.writer.SummaryWriter() instance tracking to a specific directory."""
    
    from datetime import datetime
    import os
    
    # Get timestamp of current date in reverse order
    timestamp = datetime.now().strftime("%Y-%m-%d")
    
    if extra:
        # Create log directory path
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name, extra)
    else:
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name)
    print(f"[INFO] Created SummaryWriter saving to {log_dir}")


    return SummaryWriter(log_dir=log_dir)

In [22]:
example_writer = create_writer(
    experiment_name="data_10_percent",
    model_name="effnetb0",
    extra="5_epochs"
)
example_writer

[INFO] Created SummaryWriter saving to runs/2025-08-18/data_10_percent/effnetb0/5_epochs


<torch.utils.tensorboard.writer.SummaryWriter at 0x118efbd90>

### 6.1 Update `train()` function to include `writer` parameter

In [23]:
from src.engine import train_step, test_step
from tqdm.auto import tqdm
from typing import Dict, List, Tuple

def train(
    model: torch.nn.Module, 
    train_dataloader: torch.utils.data.DataLoader, 
    test_dataloader: torch.utils.data.DataLoader, 
    optimizer: torch.optim.Optimizer,
    loss_fn: torch.nn.Module,
    epochs: int,
    device: torch.device,
    writer: torch.utils.tensorboard.writer.SummaryWriter = None
) -> Dict[str, List[float]]:
    """Trains and tests a PyTorch model.

    Args:
        model (torch.nn.Module): A PyTorch model to be trained
            and tested.
        train_dataloader (torch.utils.data.DataLoader): A DataLoader
            instance for the model to be trained on.
        test_dataloader (torch.utils.data.DataLoader): A DataLoader
            instance for the model to be tested on.
        optimizer (torch.optim.Optimizer): A PyTorch optimizer to
            help minimize the loss function.
        loss_fn (torch.nn.Module): A PyTorch loss function to
            calculate loss on both datasets.
        epochs (int): An integer indicating how many epochs
            to train for.
        device (torch.device): A target device to compute
            on (e.g. "cuda" or "cpu").

    Returns:
        Dict[str, List[float]]: A dictionary of training and
        testing loss as well as training and
        testing accuracy metrics. Each metric has a value in
        a list for each epoch.
    """
    # Create empty results dictionary
    results = {
        "train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": [],
    }

    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                            dataloader=train_dataloader,
                                            loss_fn=loss_fn,
                                            optimizer=optimizer,
                                            device=device)
        test_loss, test_acc = test_step(model=model,
            dataloader=test_dataloader,
            loss_fn=loss_fn,
            device=device)

        # Print out what's happening
        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | "
            f"train_acc: {train_acc:.4f} | "
            f"test_loss: {test_loss:.4f} | "
            f"test_acc: {test_acc:.4f}"
        )

        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)
        
        ### New: Experiment tracking ###
        if writer:
        
            writer.add_scalars(
                main_tag="Loss",
                tag_scalar_dict={
                    "train_loss": train_loss,
                    "test_loss": test_loss
                },
                global_step=epoch
            )
            
            writer.add_scalars(
                main_tag="Accuracy",
                tag_scalar_dict={
                    "train_acc": train_acc,
                    "test_acc": test_acc
                },
                global_step=epoch
            )
            writer.add_graph(
                model=model,
                input_to_model=torch.randn(32, 3, 224, 224).to(device),
            )
            
            # Close the writer
            writer.close()
        else:
            pass
    
    ### End new ###
        
    # Return the filled results at the end of the epochs
    return results

## 7. Setting up a series of modelling experiments

* Setup 2x modeling experiments with effnetb0, pizza, steak, sushi data and one model for 5 epochs and another model for 10 epochs

### 7.1 What kind of experiments should you rum?

Tje number of machine learning experiments you can run, is like the number of different models you can build... almost limitless

However, you can't test everything...

So what should you test?
* Change the number of epochs
* Change the number of hidden layers / units
* Change the amount of data (right now we're using 10% of the Food101 dataset for pizza, steak, sushi)
* Change the learning rate
* Try different kinds of data augmentation
* Choose a different model architecture

This is why transfer learning is so powerful, because it's a working model you can apply to your own problem

### 7.2 What experiments are we going to run?

We're going to turn three dials:
1. Model size - EffNetB0 vs EffNetB2 (in terms of number of parameters)
2. Dataset size - 10% of pizza, steak, sushi images vs 20% (generally mode data = better results)
3. Training time - 5 epochs vs 10 epochs (generally longer training time = better reults, up to a point)

To begin, we're sill keeping things relatively small so that our experiment run quickly

Our goal: a model that is well performing but sill small enougth to run on a mobile device or web browser, so FoodVision Mini can come to life

If you had infinite computer + time, you should basically always choose the biggest model and the biggest dataset you can, see: https://www.cs.utexas.edu/~eunsol/courses/data/bitter_lesson.pdf

### 7.3 Download different datasets

We want two datasets:
1. Pizza, steak, sushi 10%
2. Pizza, steak, sushi 20%

In [27]:
from src import get_data

get_data.get_data(
    data_dir_str="data/",
    image_path_str="pizza_steak_sushi_10",
    data_url_str="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip",
    file_name_str="pizza_steak_sushi.zip",
)

data/pizza_steak_sushi_10 does not exists, creating...
Downloading pizza_steak_sushi.zip...
Extracting pizza_steak_sushi.zip...
Deleting pizza_steak_sushi.zip...
Finished getting data...


In [28]:
get_data.get_data(
    data_dir_str="data/",
    image_path_str="pizza_steak_sushi_20",
    data_url_str="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip",
    file_name_str="pizza_steak_sushi_20_percent.zip",
)

data/pizza_steak_sushi_20 does not exists, creating...
Downloading pizza_steak_sushi_20_percent.zip...
Extracting pizza_steak_sushi_20_percent.zip...
Deleting pizza_steak_sushi_20_percent.zip...
Finished getting data...


### 7.4 Transforms Datasets and create DataLoaders

We'll to transform our data in a few ways:

1. Resize the images to (224, 224)
2. Make sure image tensor values are between [0, 1]
3. Normalize the images so they have the same data distribution as ImageNet

In [31]:
normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
)

# Compose transforms into a pipeline
simple_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    normalize
])

In [32]:
from src import setup_data

train_dataloader_10, test_dataloader_10, class_names_10 = setup_data.create_dataloaders(
    train_dir="data/pizza_steak_sushi_10/train",
    test_dir="data/pizza_steak_sushi_10/test",
    transform=simple_transform,
    batch_size=32,
    num_workers=1
)

len(train_dataloader_10), len(test_dataloader_10), class_names_10

(8, 3, ['pizza', 'steak', 'sushi'])

In [33]:
train_dataloader_20, test_dataloader_20, class_names_20 = setup_data.create_dataloaders(
    train_dir="data/pizza_steak_sushi_20/train",
    test_dir="data/pizza_steak_sushi_20/test",
    transform=simple_transform,
    batch_size=32,
    num_workers=1
)

len(train_dataloader_20), len(test_dataloader_20), class_names_20

(15, 5, ['pizza', 'steak', 'sushi'])

### 7.5 Create feature extractor models

We want two functions
1. Create a `torchvision.models.efficientnet_b0()` feature extractor with a frozen backbone/base layer and a custom classifier head
2. Create a `torchvision.models.efficientnet_b2()` feature extractor with a frozen backbone/base layer and a custom classifier head

In [None]:
OUT_FEATURE = len(class_names)

# Create and EffNetB0 feature extractor
def create_effnetb0() -> torch.nn.Module:
    # Get the weights and setup a model
    weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
    model = torchvision.models.efficientnet_b0(weights=weights).to(device)
    
    # Freeze the base model layers
    for param in model.parameters():
        param.requires_grad = False
        
    # Change the classifier head
    set_seeds()
    model.classifier = torch.nn.Sequential(
        torch.nn.Dropout(p=0.2, inplace=True),
        torch.nn.Linear(in_features=1280, out_features=OUT_FEATURE)
    ).to(device)
    
    # Give the model a name
    model.name = "effnetb0"
    
    print(f"[INFO] Create new {model.name} model...")
    
    return model

In [45]:
OUT_FEATURE = len(class_names)

# Create and EffNetB0 feature extractor
def create_effnetb2() -> torch.nn.Module:
    # Get the weights and setup a model
    weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
    model = torchvision.models.efficientnet_b2(weights=weights).to(device)
    
    # Freeze the base model layers
    for param in model.parameters():
        param.requires_grad = False
        
    # Change the classifier head
    set_seeds()
    model.classifier = torch.nn.Sequential(
        torch.nn.Dropout(p=0.3, inplace=True),
        torch.nn.Linear(in_features=1408, out_features=OUT_FEATURE)
    ).to(device)
    
    # Give the model a name
    model.name = "effnetb2"
    
    print(f"[INFO] Create new {model.name} model...")
    
    return model

In [46]:
test_effnetb0 = create_effnetb0()
test_effnetb2 = create_effnetb2()

[INFO] Create new effnetb0 model...
Downloading: "https://download.pytorch.org/models/efficientnet_b2_rwightman-c35c1473.pth" to /Users/mchojna/.cache/torch/hub/checkpoints/efficientnet_b2_rwightman-c35c1473.pth


100%|██████████| 35.2M/35.2M [00:00<00:00, 64.3MB/s]


[INFO] Create new effnetb2 model...


In [47]:
summary(test_effnetb0)

Layer (type:depth-idx)                                  Param #
EfficientNet                                            --
├─Sequential: 1-1                                       --
│    └─Conv2dNormActivation: 2-1                        --
│    │    └─Conv2d: 3-1                                 (864)
│    │    └─BatchNorm2d: 3-2                            (64)
│    │    └─SiLU: 3-3                                   --
│    └─Sequential: 2-2                                  --
│    │    └─MBConv: 3-4                                 (1,448)
│    └─Sequential: 2-3                                  --
│    │    └─MBConv: 3-5                                 (6,004)
│    │    └─MBConv: 3-6                                 (10,710)
│    └─Sequential: 2-4                                  --
│    │    └─MBConv: 3-7                                 (15,350)
│    │    └─MBConv: 3-8                                 (31,290)
│    └─Sequential: 2-5                                  --
│    │    └─MBConv

In [48]:
summary(test_effnetb2)

Layer (type:depth-idx)                                  Param #
EfficientNet                                            --
├─Sequential: 1-1                                       --
│    └─Conv2dNormActivation: 2-1                        --
│    │    └─Conv2d: 3-1                                 (864)
│    │    └─BatchNorm2d: 3-2                            (64)
│    │    └─SiLU: 3-3                                   --
│    └─Sequential: 2-2                                  --
│    │    └─MBConv: 3-4                                 (1,448)
│    │    └─MBConv: 3-5                                 (612)
│    └─Sequential: 2-3                                  --
│    │    └─MBConv: 3-6                                 (6,004)
│    │    └─MBConv: 3-7                                 (10,710)
│    │    └─MBConv: 3-8                                 (10,710)
│    └─Sequential: 2-4                                  --
│    │    └─MBConv: 3-9                                 (16,518)
│    │    └─MBC

### 7.6 Create experiments and set up training code

In [52]:
# Create epoch list
num_epochs = [5, 10]

# Create models list (need to create a new model for each experiment)
models = ["effnetb0", "effnetb2"]

# Create a DataLoaders dictionary
train_dataloaders = {
    "data_10_percent": train_dataloader_10,
    "data_20_percent": train_dataloader_20,
}

In [53]:
from src import utils

# Set seeds
set_seeds()

# Keep track of experiment numberes
experiment_number = 0

# Loop through each DataLoader
for dataloader_name, train_dataloader in train_dataloaders.items():
    # Loop through the epochs
    for epochs in num_epochs:
        # Loop through each model name and create a new model instance
        for model_name in models:
            
            # Print out info
            experiment_number += 1
            print(f"[INFO] Experiment number: {experiment_number}")
            print(f"[INFO] Model: {model_name}")
            print(f"[INFO] DataLoader: {dataloader_name}")
            print(f"[INFO] Number of epochs: {epochs}")
            
            # Select and create model
            if model_name == "effnetb0":
                model = create_effnetb0()
            else:
                model = create_effnetb2()
                
            # Create a new loss and optimizer for every model
            loss_fn = torch.nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(
                params=model.parameters(),
                lr=0.001,
            )
            
            # Train target model with target dataloader and track experiments
            train(
                model=model, 
                train_dataloader=train_dataloader, 
                test_dataloader=test_dataloader, 
                optimizer=optimizer,
                loss_fn=loss_fn,
                epochs=epochs,
                device=device,
                writer=create_writer(
                    experiment_name=dataloader_name,
                    model_name=model.name,
                    extra=f"{epochs}_epochs"
                )
            )
            
            # Save model to file so we can import it later if need be
            save_filepath = f"07_{model_name}_{dataloader_name}_{epochs}_epochs.pth"
            utils.save_model(
                model=model,
                target_dir="models",
                model_name=model.name
            )
            
            print("\n")

[INFO] Experiment number: 1
[INFO] Model: effnetb0
[INFO] DataLoader: data_10_percent
[INFO] Number of epochs: 5
[INFO] Create new effnetb0 model...
[INFO] Created SummaryWriter saving to runs/2025-08-18/data_10_percent/effnetb0/5_epochs


  0%|          | 0/5 [00:00<?, ?it/s]



RuntimeError: linear(): input and weight.T shapes cannot be multiplied (32x1280 and 1024x3)