# 07. PyTorch Experiment Tracking Exercises

Resource: https://www.learnpytorch.io/07_pytorch_experiment_tracking/

## 0. Get imports and helper function

In [1]:
import torch
import torchvision

from torch import nn
from torchvision import transforms

from torch.utils.tensorboard import SummaryWriter

In [2]:
import matplotlib.pyplot as plt

In [3]:
from src import get_data, setup_data, engine

In [4]:
# Set device agnostic code
device = "cuda" if torch.cuda.is_available() else (
    "mps" if torch.mps.is_available() else "cpu"
)
device

'cuda'

In [5]:
# Set seeds
def set_seeds(seed: int=42):
    """Sets random sets for torch operations.

    Args:
        seed (int, optional): Random seed to set. Defaults to 42.
    """
    # Set the seed for general torch operations
    torch.manual_seed(seed)
    # Set the seed for CUDA torch operations (ones that happen on the GPU)
    torch.cuda.manual_seed(seed)


In [6]:
def create_writer(
    experiment_name: str,
    model_name: str,
    extra: str=None,
):
    """Creates a torch.utils.tensorboard.SummaryWriter() instance saving to a specific log_dir"""

    from datetime import datetime
    import os
    
    timestampe = datetime.now().strftime("%y-%m-%d")
    
    if extra:
        log_dir = os.path.join("runs", timestampe, experiment_name, model_name, extra)
    else:
        log_dir = os.path.join("runs", timestampe, experiment_name, model_name)
    
    print(f"[INFO] Created SummaryWriter, saving to {log_dir}...")
    return SummaryWriter(log_dir=log_dir)

In [7]:
from typing import Dict, List
import torch.utils.tensorboard
from tqdm.auto import tqdm

def train(
    model: torch.nn.Module,
    train_dataloader: torch.utils.data.DataLoader,
    test_dataloader: torch.utils.data.DataLoader,
    optimizer: torch.optim.Optimizer,
    loss_fn: torch.nn.Module,
    epochs: int,
    device: torch.device,
    writer: torch.utils.tensorboard.writer.SummaryWriter
) -> Dict[str, List]:
    """Trains and test PyTorch model"""
    # Create empty results dictionary
    results = {
        "train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": []
    }
    
    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = engine.train_step(
            model=model,
            dataloader=train_dataloader,
            loss_fn=loss_fn,
            optimizer=optimizer,
            device=device
        )
        
        test_loss, test_acc = engine.test_step(
            model=model,
            dataloader=test_dataloader,
            loss_fn=loss_fn,
            device=device
        )

        # Print out what's happening
        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | "
            f"train_acc: {train_acc:.4f} | "
            f"test_loss: {test_loss:.4f} | "
            f"test_acc: {test_acc:.4f}"
        )

        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)


        ### New: Use the writer parameter to track experiments ###
        # See if there's a writer, if so, log to it
        if writer:
            # Add results to SummaryWriter
            writer.add_scalars(
                main_tag="Loss", 
                tag_scalar_dict={
                    "train_loss": train_loss,
                    "test_loss": test_loss
                },
                global_step=epoch
            )
            
            writer.add_scalars(
                main_tag="Accuracy", 
                tag_scalar_dict={
                    "train_acc": train_acc,
                    "test_acc": test_acc
                }, 
                global_step=epoch
            )

            # Close the writer
            writer.close()
        else:
            pass
    ### End new ###

    # Return the filled results at the end of the epochs
    return results

## 1. Download data

In [8]:
get_data.get_data(
    data_dir_str="data/",
    image_path_str="pizza_steak_sushi_10_percent",
    data_url_str="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip",
    file_name_str="pizza_steak_sushi.zip"
)

get_data.get_data(
    data_dir_str="data/",
    image_path_str="pizza_steak_sushi_20_percent",
    data_url_str="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip",
    file_name_str="pizza_steak_sushi_20_percent.zip"
)

data/pizza_steak_sushi_10_percent exists...
Data in data/pizza_steak_sushi_10_percent already exits, skipping downloading and unzipping...
Finished getting data...
data/pizza_steak_sushi_20_percent exists...
Data in data/pizza_steak_sushi_20_percent already exits, skipping downloading and unzipping...
Finished getting data...


In [9]:
from pathlib import Path

data_10_percent_path = Path("data/pizza_steak_sushi_10_percent")
data_20_percent_path = Path("data/pizza_steak_sushi_20_percent")

# Setup training directory paths
train_dir_10_percent = data_10_percent_path / "train"
train_dir_20_percent = data_20_percent_path / "train"

# Setup testing directory paths (note: use the same test dataset for both to compare the results)
test_dir = data_10_percent_path / "test"

# Check the directories
print(f"Training directory 10%: {train_dir_10_percent}")
print(f"Training directory 20%: {train_dir_20_percent}")
print(f"Testing directory: {test_dir}")

Training directory 10%: data/pizza_steak_sushi_10_percent/train
Training directory 20%: data/pizza_steak_sushi_20_percent/train
Testing directory: data/pizza_steak_sushi_10_percent/test


In [10]:

# Create a transform to normalize data distribution to be inline with ImageNet
normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406], # values per colour channel [red, green, blue]
    std=[0.229, 0.224, 0.225]
)

# Create a transform pipeline
simple_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(), # get image values between 0 & 1
    normalize
])

## 2. Turn data into DataLoaders

In [11]:
BATCH_SIZE = 32

# Create 10% training and test DataLoaders
train_dataloader_10_percent, test_dataloader, class_names = setup_data.create_dataloaders(
    train_dir=train_dir_10_percent,
    test_dir=test_dir,
    transform=simple_transform,
    batch_size=BATCH_SIZE
)

# Create 20% training and test DataLoaders
train_dataloader_20_percent, test_dataloader, class_names = setup_data.create_dataloaders(
    train_dir=train_dir_20_percent,
    test_dir=test_dir,
    transform=simple_transform,
    batch_size=BATCH_SIZE
)

# Find the number of samples/batches per dataloader (using the same test_dataloader for both experiments)
print(f"Number of batches of size {BATCH_SIZE} in 10 percent training data: {len(train_dataloader_10_percent)}")
print(f"Number of batches of size {BATCH_SIZE} in 20 percent training data: {len(train_dataloader_20_percent)}")
print(f"Number of batches of size {BATCH_SIZE} in testing data: {len(train_dataloader_10_percent)} (all experiments will use the same test set)")
print(f"Number of classes: {len(class_names)}, class names: {class_names}")

Number of batches of size 32 in 10 percent training data: 8
Number of batches of size 32 in 20 percent training data: 15
Number of batches of size 32 in testing data: 8 (all experiments will use the same test set)
Number of classes: 3, class names: ['pizza', 'steak', 'sushi']


## 3. Exercise 1: Pick a larger model from torchvision.models to add to the list of experiments (for example, EffNetB3 or higher)

In [12]:
def create_effnetb0() -> nn.Module:
    weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
    model = torchvision.models.efficientnet_b0(weights=weights).to(device)

    for param in model.features.parameters():
        param.requires_grad=False
    
    set_seeds()
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2, inplace=True),
        nn.Linear(in_features=1280, out_features=len(class_names), bias=True)
    ).to(device)
    
    print(f"[INFO] Created EfficientNetB0...")
    return model
    
def create_effnetb1() -> nn.Module:
    weights = torchvision.models.EfficientNet_B1_Weights.DEFAULT
    model = torchvision.models.efficientnet_b1(weights=weights).to(device)

    for param in model.features.parameters():
        param.requires_grad=False
    
    set_seeds()
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2, inplace=True),
        nn.Linear(in_features=1280, out_features=len(class_names), bias=True)
    ).to(device)
    
    print(f"[INFO] Created EfficientNetB1...")
    return model

def create_effnetb2() -> nn.Module:
    weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
    model = torchvision.models.efficientnet_b2(weights=weights).to(device)

    for param in model.features.parameters():
        param.requires_grad=False
    
    set_seeds()
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True),
        nn.Linear(in_features=1408, out_features=len(class_names), bias=True)
    ).to(device)
    
    print(f"[INFO] Created EfficientNetB2...")
    return model

In [13]:
from src import utils
import gc

def run_experiment(experiments: Dict):

    for model_name, model_ in experiments["models"].items():
        
        for epochs_num, epochs in experiments["epochs"].items():
            
            for data_name, data in experiments["data"].items():
                
                model = model_()
                        
                optimizer = torch.optim.Adam(
                    params=model.parameters(),
                    lr=0.001
                )
                loss_fn = torch.nn.CrossEntropyLoss()
                
                train(
                    model=model,
                    train_dataloader=data,
                    test_dataloader=test_dataloader,
                    optimizer=optimizer,
                    loss_fn=loss_fn,
                    epochs=epochs,
                    device=device,
                    writer=create_writer(
                        experiment_name=data_name,
                        model_name=model_name,
                        extra=f"{epochs}_epochs"
                    )
                )
                
                # Save model to file so we can import it later if need be
                save_filepath = f"07_{model_name}_{data_name}_{epochs}_epochs.pth"
                utils.save_model(
                    model=model,
                    target_dir="models",
                    model_name=save_filepath
                )

                if device == "cuda":
                    torch.cuda.empty_cache()

                del model, optimizer, loss_fn, data
                gc.collect()

                print("\n")

In [14]:
experiments = {
    "models": {
        "effnetb0": create_effnetb0,
        "effnetb1": create_effnetb1,
    },
    "epochs": {
        "5": 5,
        "10": 10
    },
    "data": {
        "10_percent": train_dataloader_10_percent,
        "20_percent": train_dataloader_20_percent
    }
}

In [15]:
run_experiment(experiments=experiments)

[INFO] Created EfficientNetB0...
[INFO] Created SummaryWriter, saving to runs/25-08-19/10_percent/effnetb0/5_epochs...


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0434 | train_acc: 0.4844 | test_loss: 0.9274 | test_acc: 0.4782
Epoch: 2 | train_loss: 0.8841 | train_acc: 0.6602 | test_loss: 0.8153 | test_acc: 0.6108
Epoch: 3 | train_loss: 0.7880 | train_acc: 0.6719 | test_loss: 0.7110 | test_acc: 0.8352
Epoch: 4 | train_loss: 0.7326 | train_acc: 0.7461 | test_loss: 0.5842 | test_acc: 0.8864
Epoch: 5 | train_loss: 0.6158 | train_acc: 0.9102 | test_loss: 0.5591 | test_acc: 0.8968
[INFO] Saving model to: models/07_effnetb0_10_percent_5_epochs.pth


[INFO] Created EfficientNetB0...
[INFO] Created SummaryWriter, saving to runs/25-08-19/20_percent/effnetb0/5_epochs...


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9632 | train_acc: 0.6062 | test_loss: 0.6667 | test_acc: 0.8864
Epoch: 2 | train_loss: 0.7031 | train_acc: 0.8063 | test_loss: 0.5944 | test_acc: 0.8665
Epoch: 3 | train_loss: 0.5831 | train_acc: 0.8438 | test_loss: 0.4716 | test_acc: 0.9072
Epoch: 4 | train_loss: 0.5008 | train_acc: 0.8438 | test_loss: 0.4583 | test_acc: 0.8968
Epoch: 5 | train_loss: 0.5042 | train_acc: 0.8479 | test_loss: 0.4054 | test_acc: 0.9176
[INFO] Saving model to: models/07_effnetb0_20_percent_5_epochs.pth


[INFO] Created EfficientNetB0...
[INFO] Created SummaryWriter, saving to runs/25-08-19/10_percent/effnetb0/10_epochs...


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0434 | train_acc: 0.4844 | test_loss: 0.9274 | test_acc: 0.4782
Epoch: 2 | train_loss: 0.8841 | train_acc: 0.6602 | test_loss: 0.8153 | test_acc: 0.6108
Epoch: 3 | train_loss: 0.7880 | train_acc: 0.6719 | test_loss: 0.7110 | test_acc: 0.8352
Epoch: 4 | train_loss: 0.7326 | train_acc: 0.7461 | test_loss: 0.5842 | test_acc: 0.8864
Epoch: 5 | train_loss: 0.6158 | train_acc: 0.9102 | test_loss: 0.5591 | test_acc: 0.8968
Epoch: 6 | train_loss: 0.5523 | train_acc: 0.8945 | test_loss: 0.5858 | test_acc: 0.8968
Epoch: 7 | train_loss: 0.5588 | train_acc: 0.8008 | test_loss: 0.5410 | test_acc: 0.8968
Epoch: 8 | train_loss: 0.4760 | train_acc: 0.9336 | test_loss: 0.5043 | test_acc: 0.8864
Epoch: 9 | train_loss: 0.6039 | train_acc: 0.7656 | test_loss: 0.5024 | test_acc: 0.8873
Epoch: 10 | train_loss: 0.4965 | train_acc: 0.8047 | test_loss: 0.4587 | test_acc: 0.9176
[INFO] Saving model to: models/07_effnetb0_10_percent_10_epochs.pth


[INFO] Created EfficientNetB0...
[INFO]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9632 | train_acc: 0.6062 | test_loss: 0.6667 | test_acc: 0.8864
Epoch: 2 | train_loss: 0.7031 | train_acc: 0.8063 | test_loss: 0.5944 | test_acc: 0.8665
Epoch: 3 | train_loss: 0.5831 | train_acc: 0.8438 | test_loss: 0.4716 | test_acc: 0.9072
Epoch: 4 | train_loss: 0.5008 | train_acc: 0.8438 | test_loss: 0.4583 | test_acc: 0.8968
Epoch: 5 | train_loss: 0.5042 | train_acc: 0.8479 | test_loss: 0.4054 | test_acc: 0.9176
Epoch: 6 | train_loss: 0.3748 | train_acc: 0.9021 | test_loss: 0.3729 | test_acc: 0.8968
Epoch: 7 | train_loss: 0.3685 | train_acc: 0.9167 | test_loss: 0.3322 | test_acc: 0.9072
Epoch: 8 | train_loss: 0.3660 | train_acc: 0.8958 | test_loss: 0.3458 | test_acc: 0.9072
Epoch: 9 | train_loss: 0.3014 | train_acc: 0.9313 | test_loss: 0.3143 | test_acc: 0.9072
Epoch: 10 | train_loss: 0.3551 | train_acc: 0.8854 | test_loss: 0.2844 | test_acc: 0.9072
[INFO] Saving model to: models/07_effnetb0_20_percent_10_epochs.pth


[INFO] Created EfficientNetB1...
[INFO]

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0542 | train_acc: 0.5625 | test_loss: 0.9792 | test_acc: 0.6932
Epoch: 2 | train_loss: 0.9665 | train_acc: 0.7031 | test_loss: 0.9296 | test_acc: 0.6629
Epoch: 3 | train_loss: 0.9260 | train_acc: 0.5312 | test_loss: 0.8595 | test_acc: 0.7652
Epoch: 4 | train_loss: 0.8831 | train_acc: 0.7773 | test_loss: 0.7717 | test_acc: 0.9489
Epoch: 5 | train_loss: 0.7697 | train_acc: 0.8945 | test_loss: 0.7457 | test_acc: 0.9176
[INFO] Saving model to: models/07_effnetb1_10_percent_5_epochs.pth


[INFO] Created EfficientNetB1...
[INFO] Created SummaryWriter, saving to runs/25-08-19/20_percent/effnetb1/5_epochs...


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0039 | train_acc: 0.6042 | test_loss: 0.8834 | test_acc: 0.8873
Epoch: 2 | train_loss: 0.8129 | train_acc: 0.8458 | test_loss: 0.7443 | test_acc: 0.8570
Epoch: 3 | train_loss: 0.6999 | train_acc: 0.8396 | test_loss: 0.6074 | test_acc: 0.9489
Epoch: 4 | train_loss: 0.6056 | train_acc: 0.8667 | test_loss: 0.5538 | test_acc: 0.9280
Epoch: 5 | train_loss: 0.5495 | train_acc: 0.8771 | test_loss: 0.4986 | test_acc: 0.9280
[INFO] Saving model to: models/07_effnetb1_20_percent_5_epochs.pth


[INFO] Created EfficientNetB1...
[INFO] Created SummaryWriter, saving to runs/25-08-19/10_percent/effnetb1/10_epochs...


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0542 | train_acc: 0.5625 | test_loss: 0.9792 | test_acc: 0.6932
Epoch: 2 | train_loss: 0.9665 | train_acc: 0.7031 | test_loss: 0.9296 | test_acc: 0.6629
Epoch: 3 | train_loss: 0.9260 | train_acc: 0.5312 | test_loss: 0.8595 | test_acc: 0.7652
Epoch: 4 | train_loss: 0.8831 | train_acc: 0.7773 | test_loss: 0.7717 | test_acc: 0.9489
Epoch: 5 | train_loss: 0.7697 | train_acc: 0.8945 | test_loss: 0.7457 | test_acc: 0.9176
Epoch: 6 | train_loss: 0.7382 | train_acc: 0.8867 | test_loss: 0.7189 | test_acc: 0.9072
Epoch: 7 | train_loss: 0.7297 | train_acc: 0.7578 | test_loss: 0.6579 | test_acc: 0.9280
Epoch: 8 | train_loss: 0.6705 | train_acc: 0.9414 | test_loss: 0.6407 | test_acc: 0.9186
Epoch: 9 | train_loss: 0.6554 | train_acc: 0.7734 | test_loss: 0.6249 | test_acc: 0.8570
Epoch: 10 | train_loss: 0.5845 | train_acc: 0.9180 | test_loss: 0.5879 | test_acc: 0.9280
[INFO] Saving model to: models/07_effnetb1_10_percent_10_epochs.pth


[INFO] Created EfficientNetB1...
[INFO]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0039 | train_acc: 0.6042 | test_loss: 0.8834 | test_acc: 0.8873
Epoch: 2 | train_loss: 0.8129 | train_acc: 0.8458 | test_loss: 0.7443 | test_acc: 0.8570
Epoch: 3 | train_loss: 0.6999 | train_acc: 0.8396 | test_loss: 0.6074 | test_acc: 0.9489
Epoch: 4 | train_loss: 0.6056 | train_acc: 0.8667 | test_loss: 0.5538 | test_acc: 0.9280
Epoch: 5 | train_loss: 0.5495 | train_acc: 0.8771 | test_loss: 0.4986 | test_acc: 0.9280
Epoch: 6 | train_loss: 0.4680 | train_acc: 0.9292 | test_loss: 0.4342 | test_acc: 0.9384
Epoch: 7 | train_loss: 0.4479 | train_acc: 0.9167 | test_loss: 0.4046 | test_acc: 0.9280
Epoch: 8 | train_loss: 0.4591 | train_acc: 0.8875 | test_loss: 0.4458 | test_acc: 0.9072
Epoch: 9 | train_loss: 0.3866 | train_acc: 0.9187 | test_loss: 0.4164 | test_acc: 0.8977
Epoch: 10 | train_loss: 0.4144 | train_acc: 0.8896 | test_loss: 0.3480 | test_acc: 0.9384
[INFO] Saving model to: models/07_effnetb1_20_percent_10_epochs.pth




In [16]:
experiments = {
    "models": {
        "effnetb2": create_effnetb2,
    },
    "epochs": {
        "5": 5,
        "10": 10
    },
    "data": {
        "10_percent": train_dataloader_10_percent,
        "20_percent": train_dataloader_20_percent
    }
}

In [17]:
run_experiment(experiments=experiments)

[INFO] Created EfficientNetB2...
[INFO] Created SummaryWriter, saving to runs/25-08-19/10_percent/effnetb2/5_epochs...


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0869 | train_acc: 0.3984 | test_loss: 0.9611 | test_acc: 0.6903
Epoch: 2 | train_loss: 0.9222 | train_acc: 0.6445 | test_loss: 0.8637 | test_acc: 0.8144
Epoch: 3 | train_loss: 0.8365 | train_acc: 0.7227 | test_loss: 0.7447 | test_acc: 0.9688
Epoch: 4 | train_loss: 0.7069 | train_acc: 0.8906 | test_loss: 0.7121 | test_acc: 0.9081
Epoch: 5 | train_loss: 0.6768 | train_acc: 0.7812 | test_loss: 0.7047 | test_acc: 0.8873
[INFO] Saving model to: models/07_effnetb2_10_percent_5_epochs.pth


[INFO] Created EfficientNetB2...
[INFO] Created SummaryWriter, saving to runs/25-08-19/20_percent/effnetb2/5_epochs...


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9894 | train_acc: 0.5292 | test_loss: 0.7819 | test_acc: 0.8049
Epoch: 2 | train_loss: 0.7408 | train_acc: 0.7604 | test_loss: 0.6632 | test_acc: 0.8873
Epoch: 3 | train_loss: 0.6080 | train_acc: 0.8229 | test_loss: 0.5614 | test_acc: 0.9384
Epoch: 4 | train_loss: 0.5478 | train_acc: 0.8458 | test_loss: 0.5675 | test_acc: 0.8674
Epoch: 5 | train_loss: 0.4397 | train_acc: 0.8708 | test_loss: 0.4475 | test_acc: 0.9489
[INFO] Saving model to: models/07_effnetb2_20_percent_5_epochs.pth


[INFO] Created EfficientNetB2...
[INFO] Created SummaryWriter, saving to runs/25-08-19/10_percent/effnetb2/10_epochs...


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0869 | train_acc: 0.3984 | test_loss: 0.9611 | test_acc: 0.6903
Epoch: 2 | train_loss: 0.9222 | train_acc: 0.6445 | test_loss: 0.8637 | test_acc: 0.8144
Epoch: 3 | train_loss: 0.8365 | train_acc: 0.7227 | test_loss: 0.7447 | test_acc: 0.9688
Epoch: 4 | train_loss: 0.7069 | train_acc: 0.8906 | test_loss: 0.7121 | test_acc: 0.9081
Epoch: 5 | train_loss: 0.6768 | train_acc: 0.7812 | test_loss: 0.7047 | test_acc: 0.8873
Epoch: 6 | train_loss: 0.6048 | train_acc: 0.7773 | test_loss: 0.6300 | test_acc: 0.9280
Epoch: 7 | train_loss: 0.5829 | train_acc: 0.8086 | test_loss: 0.6334 | test_acc: 0.8873
Epoch: 8 | train_loss: 0.5261 | train_acc: 0.9336 | test_loss: 0.6156 | test_acc: 0.8977
Epoch: 9 | train_loss: 0.5385 | train_acc: 0.8125 | test_loss: 0.6248 | test_acc: 0.8466
Epoch: 10 | train_loss: 0.4922 | train_acc: 0.9219 | test_loss: 0.5833 | test_acc: 0.9176
[INFO] Saving model to: models/07_effnetb2_10_percent_10_epochs.pth


[INFO] Created EfficientNetB2...
[INFO]

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9894 | train_acc: 0.5292 | test_loss: 0.7819 | test_acc: 0.8049
Epoch: 2 | train_loss: 0.7408 | train_acc: 0.7604 | test_loss: 0.6632 | test_acc: 0.8873
Epoch: 3 | train_loss: 0.6080 | train_acc: 0.8229 | test_loss: 0.5614 | test_acc: 0.9384
Epoch: 4 | train_loss: 0.5478 | train_acc: 0.8458 | test_loss: 0.5675 | test_acc: 0.8674
Epoch: 5 | train_loss: 0.4397 | train_acc: 0.8708 | test_loss: 0.4475 | test_acc: 0.9489
Epoch: 6 | train_loss: 0.3890 | train_acc: 0.9104 | test_loss: 0.4610 | test_acc: 0.9280
Epoch: 7 | train_loss: 0.3699 | train_acc: 0.9062 | test_loss: 0.4225 | test_acc: 0.9384
Epoch: 8 | train_loss: 0.3872 | train_acc: 0.8896 | test_loss: 0.4414 | test_acc: 0.8674
Epoch: 9 | train_loss: 0.3256 | train_acc: 0.9250 | test_loss: 0.4315 | test_acc: 0.8977
Epoch: 10 | train_loss: 0.3702 | train_acc: 0.9021 | test_loss: 0.3958 | test_acc: 0.9280
[INFO] Saving model to: models/07_effnetb2_20_percent_10_epochs.pth




In [18]:
# Let's view oru experiments from within notebook
%load_ext tensorboard
%tensorboard --logdir runs

## 4. Exercise 2. Introduce data augmentation to the list of experiments using the 20% pizza, steak, sushi training and test datasets, does this change anything?

In [19]:
simple_transform

Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)

In [20]:
non_aug_transform = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225]
    )
])

In [None]:
non_aug_transform = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.ToTensor(),
    transforms.TrivialAugmentWide(num_magnitude_bins=31),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225]
    )
])

In [None]:
BATCH_SIZE = 32

# Create train dataloader *without* data augmentation
train_dataloader_non_aug, test_dataloader, class_names = setup_data.create_dataloaders(
    train_dir=train_dir_20_percent,
    test_dir=test_dir,
    transform=simple_transform,
    batch_size=BATCH_SIZE
)

# Create train dataloader *with* data augmentation
train_dataloader_aug, test_dataloader_aug, class_names = setup_data.create_dataloaders(
    train_dir=train_dir_20_percent,
    test_dir=test_dir,
    transform=simple_transform,
    batch_size=BATCH_SIZE
)

# Find the number of samples/batches per dataloader (using the same test_dataloader for both experiments)
print(f"Number of batches of size {BATCH_SIZE} in 20 percent training data w/o augmentation: {len(train_dataloader_non_aug)}")
print(f"Number of batches of size {BATCH_SIZE} in 20 percent training data with augomentation: {len(train_dataloader_aug)}")
print(f"Number of batches of size {BATCH_SIZE} in testing data: {len(train_dataloader_10_percent)} (all experiments will use the same test set)")
print(f"Number of classes: {len(class_names)}, class names: {class_names}")

Number of batches of size 32 in 20 percent training data w/o augmentation: 15
Number of batches of size 32 in 20 percent training data with augomentation: 15
Number of batches of size 32 in testing data: 8 (all experiments will use the same test set)
Number of classes: 3, class names: ['pizza', 'steak', 'sushi']


In [23]:
experiments = {
    "models": {
        "effnetb0": create_effnetb0,
        "effnetb1": create_effnetb1,
    },
    "epochs": {
        "5": 5,
        "10": 10
    },
    "data": {
        "non_aug": train_dataloader_non_aug,
        "aug": train_dataloader_aug
    }
}

In [24]:
run_experiment(experiments=experiments)

[INFO] Created EfficientNetB0...
[INFO] Created SummaryWriter, saving to runs/25-08-19/non_aug/effnetb0/5_epochs...


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9632 | train_acc: 0.6062 | test_loss: 0.6667 | test_acc: 0.8864
Epoch: 2 | train_loss: 0.7031 | train_acc: 0.8063 | test_loss: 0.5944 | test_acc: 0.8665
Epoch: 3 | train_loss: 0.5831 | train_acc: 0.8438 | test_loss: 0.4716 | test_acc: 0.9072
Epoch: 4 | train_loss: 0.5008 | train_acc: 0.8438 | test_loss: 0.4583 | test_acc: 0.8968
Epoch: 5 | train_loss: 0.5042 | train_acc: 0.8479 | test_loss: 0.4054 | test_acc: 0.9176
[INFO] Saving model to: models/07_effnetb0_non_aug_5_epochs.pth


[INFO] Created EfficientNetB0...
[INFO] Created SummaryWriter, saving to runs/25-08-19/aug/effnetb0/5_epochs...


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9632 | train_acc: 0.6062 | test_loss: 0.6667 | test_acc: 0.8864
Epoch: 2 | train_loss: 0.7031 | train_acc: 0.8063 | test_loss: 0.5944 | test_acc: 0.8665
Epoch: 3 | train_loss: 0.5831 | train_acc: 0.8438 | test_loss: 0.4716 | test_acc: 0.9072
Epoch: 4 | train_loss: 0.5008 | train_acc: 0.8438 | test_loss: 0.4583 | test_acc: 0.8968
Epoch: 5 | train_loss: 0.5042 | train_acc: 0.8479 | test_loss: 0.4054 | test_acc: 0.9176
[INFO] Saving model to: models/07_effnetb0_aug_5_epochs.pth


[INFO] Created EfficientNetB0...
[INFO] Created SummaryWriter, saving to runs/25-08-19/non_aug/effnetb0/10_epochs...


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9632 | train_acc: 0.6062 | test_loss: 0.6667 | test_acc: 0.8864
Epoch: 2 | train_loss: 0.7031 | train_acc: 0.8063 | test_loss: 0.5944 | test_acc: 0.8665
Epoch: 3 | train_loss: 0.5831 | train_acc: 0.8438 | test_loss: 0.4716 | test_acc: 0.9072
Epoch: 4 | train_loss: 0.5008 | train_acc: 0.8438 | test_loss: 0.4583 | test_acc: 0.8968
Epoch: 5 | train_loss: 0.5042 | train_acc: 0.8479 | test_loss: 0.4054 | test_acc: 0.9176
Epoch: 6 | train_loss: 0.3748 | train_acc: 0.9021 | test_loss: 0.3729 | test_acc: 0.8968
Epoch: 7 | train_loss: 0.3685 | train_acc: 0.9167 | test_loss: 0.3322 | test_acc: 0.9072
Epoch: 8 | train_loss: 0.3660 | train_acc: 0.8958 | test_loss: 0.3458 | test_acc: 0.9072
Epoch: 9 | train_loss: 0.3014 | train_acc: 0.9313 | test_loss: 0.3143 | test_acc: 0.9072
Epoch: 10 | train_loss: 0.3551 | train_acc: 0.8854 | test_loss: 0.2844 | test_acc: 0.9072
[INFO] Saving model to: models/07_effnetb0_non_aug_10_epochs.pth


[INFO] Created EfficientNetB0...
[INFO] Cr

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9632 | train_acc: 0.6062 | test_loss: 0.6667 | test_acc: 0.8864
Epoch: 2 | train_loss: 0.7031 | train_acc: 0.8063 | test_loss: 0.5944 | test_acc: 0.8665
Epoch: 3 | train_loss: 0.5831 | train_acc: 0.8438 | test_loss: 0.4716 | test_acc: 0.9072
Epoch: 4 | train_loss: 0.5008 | train_acc: 0.8438 | test_loss: 0.4583 | test_acc: 0.8968
Epoch: 5 | train_loss: 0.5042 | train_acc: 0.8479 | test_loss: 0.4054 | test_acc: 0.9176
Epoch: 6 | train_loss: 0.3748 | train_acc: 0.9021 | test_loss: 0.3729 | test_acc: 0.8968
Epoch: 7 | train_loss: 0.3685 | train_acc: 0.9167 | test_loss: 0.3322 | test_acc: 0.9072
Epoch: 8 | train_loss: 0.3660 | train_acc: 0.8958 | test_loss: 0.3458 | test_acc: 0.9072
Epoch: 9 | train_loss: 0.3014 | train_acc: 0.9313 | test_loss: 0.3143 | test_acc: 0.9072
Epoch: 10 | train_loss: 0.3551 | train_acc: 0.8854 | test_loss: 0.2844 | test_acc: 0.9072
[INFO] Saving model to: models/07_effnetb0_aug_10_epochs.pth


[INFO] Created EfficientNetB1...
[INFO] Create

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0039 | train_acc: 0.6042 | test_loss: 0.8834 | test_acc: 0.8873
Epoch: 2 | train_loss: 0.8129 | train_acc: 0.8458 | test_loss: 0.7443 | test_acc: 0.8570
Epoch: 3 | train_loss: 0.6999 | train_acc: 0.8396 | test_loss: 0.6074 | test_acc: 0.9489
Epoch: 4 | train_loss: 0.6056 | train_acc: 0.8667 | test_loss: 0.5538 | test_acc: 0.9280
Epoch: 5 | train_loss: 0.5495 | train_acc: 0.8771 | test_loss: 0.4986 | test_acc: 0.9280
[INFO] Saving model to: models/07_effnetb1_non_aug_5_epochs.pth


[INFO] Created EfficientNetB1...
[INFO] Created SummaryWriter, saving to runs/25-08-19/aug/effnetb1/5_epochs...


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0039 | train_acc: 0.6042 | test_loss: 0.8834 | test_acc: 0.8873
Epoch: 2 | train_loss: 0.8129 | train_acc: 0.8458 | test_loss: 0.7443 | test_acc: 0.8570
Epoch: 3 | train_loss: 0.6999 | train_acc: 0.8396 | test_loss: 0.6074 | test_acc: 0.9489
Epoch: 4 | train_loss: 0.6056 | train_acc: 0.8667 | test_loss: 0.5538 | test_acc: 0.9280
Epoch: 5 | train_loss: 0.5495 | train_acc: 0.8771 | test_loss: 0.4986 | test_acc: 0.9280
[INFO] Saving model to: models/07_effnetb1_aug_5_epochs.pth


[INFO] Created EfficientNetB1...
[INFO] Created SummaryWriter, saving to runs/25-08-19/non_aug/effnetb1/10_epochs...


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0039 | train_acc: 0.6042 | test_loss: 0.8834 | test_acc: 0.8873
Epoch: 2 | train_loss: 0.8129 | train_acc: 0.8458 | test_loss: 0.7443 | test_acc: 0.8570
Epoch: 3 | train_loss: 0.6999 | train_acc: 0.8396 | test_loss: 0.6074 | test_acc: 0.9489
Epoch: 4 | train_loss: 0.6056 | train_acc: 0.8667 | test_loss: 0.5538 | test_acc: 0.9280
Epoch: 5 | train_loss: 0.5495 | train_acc: 0.8771 | test_loss: 0.4986 | test_acc: 0.9280
Epoch: 6 | train_loss: 0.4680 | train_acc: 0.9292 | test_loss: 0.4342 | test_acc: 0.9384
Epoch: 7 | train_loss: 0.4479 | train_acc: 0.9167 | test_loss: 0.4046 | test_acc: 0.9280
Epoch: 8 | train_loss: 0.4591 | train_acc: 0.8875 | test_loss: 0.4458 | test_acc: 0.9072
Epoch: 9 | train_loss: 0.3866 | train_acc: 0.9187 | test_loss: 0.4164 | test_acc: 0.8977
Epoch: 10 | train_loss: 0.4144 | train_acc: 0.8896 | test_loss: 0.3480 | test_acc: 0.9384
[INFO] Saving model to: models/07_effnetb1_non_aug_10_epochs.pth


[INFO] Created EfficientNetB1...
[INFO] Cr

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0039 | train_acc: 0.6042 | test_loss: 0.8834 | test_acc: 0.8873
Epoch: 2 | train_loss: 0.8129 | train_acc: 0.8458 | test_loss: 0.7443 | test_acc: 0.8570
Epoch: 3 | train_loss: 0.6999 | train_acc: 0.8396 | test_loss: 0.6074 | test_acc: 0.9489
Epoch: 4 | train_loss: 0.6056 | train_acc: 0.8667 | test_loss: 0.5538 | test_acc: 0.9280
Epoch: 5 | train_loss: 0.5495 | train_acc: 0.8771 | test_loss: 0.4986 | test_acc: 0.9280
Epoch: 6 | train_loss: 0.4680 | train_acc: 0.9292 | test_loss: 0.4342 | test_acc: 0.9384
Epoch: 7 | train_loss: 0.4479 | train_acc: 0.9167 | test_loss: 0.4046 | test_acc: 0.9280
Epoch: 8 | train_loss: 0.4591 | train_acc: 0.8875 | test_loss: 0.4458 | test_acc: 0.9072
Epoch: 9 | train_loss: 0.3866 | train_acc: 0.9187 | test_loss: 0.4164 | test_acc: 0.8977
Epoch: 10 | train_loss: 0.4144 | train_acc: 0.8896 | test_loss: 0.3480 | test_acc: 0.9384
[INFO] Saving model to: models/07_effnetb1_aug_10_epochs.pth




In [25]:
# Let's view oru experiments from within notebook
%load_ext tensorboard
%tensorboard --logdir runs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 60567), started 0:02:11 ago. (Use '!kill 60567' to kill it.)

## Exercise 3. Scale up the dataset to turn FoodVision Mini into FoodVision Big using the entire Food101 dataset from torchvision.models

In [None]:
# Download and transform Food101 data
train_data = torchvision.datasets.Food101(
    root="data",
    # download=True,
    split="train",
    transform=simple_transform,
)

test_data = torchvision.datasets.Food101(
    root="data",
    # download=True,
    split="test",
    transform=simple_transform,
)

In [None]:
# Check sample numbers
len(train_data), len(test_data)

(75750, 25250)

In [None]:
import os

# Create DataLoaders
BATCH_SIZE = 512

train_dataloader_big = torch.utils.data.DataLoader(
    train_data,
    shuffle=True,
    batch_size=BATCH_SIZE,
    num_workers=os.cpu_count(),
    pin_memory=True
)

test_dataloader_big = torch.utils.data.DataLoader(
    test_data,
    shuffle=False,
    batch_size=BATCH_SIZE,
    num_workers=os.cpu_count(),
    pin_memory=True
)

In [33]:
len(train_dataloader_big), len(test_dataloader_big)

(148, 50)

In [34]:
# Create model
effnetv2_s_weights = torchvision.models.EfficientNet_V2_S_Weights.DEFAULT
foodvision_big_model = torchvision.models.efficientnet_v2_s(weights=effnetv2_s_weights).to(device)

# Freeze the base layers
for param in foodvision_big_model.features.parameters():
  param.requires_grad = False

# Change the classifier head to suit 101 different classes
foodvision_big_model.classifier = nn.Sequential(
    nn.Dropout(p=0.2),
    nn.Linear(in_features=1280, out_features=101) # 101 output classes for Food101 
).to(device)

from torchinfo import summary

summary(
  model=foodvision_big_model,
    input_size=(1, 3, 224, 224)
)

Downloading: "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth" to /home/michal-chojna/.cache/torch/hub/checkpoints/efficientnet_v2_s-dd5fe13b.pth


100%|██████████| 82.7M/82.7M [00:08<00:00, 9.95MB/s]


Layer (type:depth-idx)                                  Output Shape              Param #
EfficientNet                                            [1, 101]                  --
├─Sequential: 1-1                                       [1, 1280, 7, 7]           --
│    └─Conv2dNormActivation: 2-1                        [1, 24, 112, 112]         --
│    │    └─Conv2d: 3-1                                 [1, 24, 112, 112]         (648)
│    │    └─BatchNorm2d: 3-2                            [1, 24, 112, 112]         (48)
│    │    └─SiLU: 3-3                                   [1, 24, 112, 112]         --
│    └─Sequential: 2-2                                  [1, 24, 112, 112]         --
│    │    └─FusedMBConv: 3-4                            [1, 24, 112, 112]         (5,232)
│    │    └─FusedMBConv: 3-5                            [1, 24, 112, 112]         (5,232)
│    └─Sequential: 2-3                                  [1, 48, 56, 56]           --
│    │    └─FusedMBConv: 3-6                 

In [None]:
foodvision_big_results = train(
    model=foodvision_big_model,
    train_dataloader=train_dataloader_big,
    test_dataloader=test_dataloader_big,
    optimizer=torch.optim.Adam(params=foodvision_big_model.parameters(), lr=0.001),
    loss_fn=torch.nn.CrossEntropyLoss(),
    epochs=5,
    device=device,
    writer=create_writer(
        experiment_name="food101_all_data",
        model_name="foodvision_big",
        extra=f"{5}_epochs")
)

[INFO] Created SummaryWriter, saving to runs/25-08-19/food101_all_data/foodvision_big/5_epochs...


  0%|          | 0/5 [00:00<?, ?it/s]

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs