In [None]:
!pip install --upgrade torch torchvision torchdata  torchtext

In [1]:
import torch
import torchvision
print(f"torch version: {torch.__version__}")
print(f"torchvision version: {torchvision.__version__}")

torch version: 2.6.0+cu124
torchvision version: 0.21.0+cu124


In [2]:
# Continue with regular imports
import matplotlib.pyplot as plt
import torch
import torchvision

from torch import nn
from torchvision import transforms

# Try to get torchinfo, install it if it doesn't work
try:
    from torchinfo import summary
except:
    print("[INFO] Couldn't find torchinfo... installing it.")
    !pip install -q torchinfo
    from torchinfo import summary

# Try to import the going_modular directory, download it from GitHub if it doesn't work
try:
    from going_modular.going_modular import data_setup, engine
except:
    # Get the going_modular scripts
    print("[INFO] Couldn't find going_modular scripts... downloading them from GitHub.")
    !git clone https://github.com/mrdbourke/pytorch-deep-learning
    !mv pytorch-deep-learning/going_modular .
    !rm -rf pytorch-deep-learning
    from going_modular.going_modular import data_setup, engine

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [4]:
# Set seeds
def set_seeds(seed: int=42):
    """Sets random sets for torch operations.

    Args:
        seed (int, optional): Random seed to set. Defaults to 42.
    """
    # Set the seed for general torch operations
    torch.manual_seed(seed)
    # Set the seed for CUDA torch operations (ones that happen on the GPU)
    torch.cuda.manual_seed(seed)

In [5]:
import os
import zipfile

from pathlib import Path
data_path = Path("data/")



In [6]:
data_path

PosixPath('data')

In [7]:

target_file= "images_data.zip"
source=data_path / target_file
source

PosixPath('data/images_data.zip')

In [8]:
# image_path = Path("data")

# # Unzip pizza, steak, sushi data
# with zipfile.ZipFile(source, "r") as zip_ref:
#     zip_ref.extractall(image_path)
            

In [9]:
from pathlib import Path

# Setup directories
image_path = Path("data/version2")
train_dir = image_path / "train"
test_dir = image_path / "test"

train_dir


PosixPath('data/version2/train')

In [10]:
# Setup ImageNet normalization levels (turns all images into similar distribution as ImageNet)
# normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
#                                  std=[0.229, 0.224, 0.225])

# # Create transform pipeline manually
# manual_transforms = transforms.Compose([
#     transforms.Resize((224, 224)),
#     transforms.ToTensor(),
#     normalize
# ])

# print(f"Manually created transforms: {manual_transforms}")

# image_size=(224,224)
# transforms_train = transforms.Compose([transforms.ToPILImage(),
#                                        transforms.Resize(image_size),
#                                        transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5),
#                                        transforms.RandomHorizontalFlip(p=0.5),
#                                        transforms.RandomVerticalFlip(p=0.1),
#                                        transforms.RandomRotation(degrees=30),
#                                        transforms.ToTensor()
#                                        ])

# transforms_test = transforms.Compose([transforms.ToPILImage(),
#                                       transforms.Resize(image_size),
#                                       transforms.ToTensor()
#                                        ])


# Setup pretrained weights (plenty of these available in torchvision.models)
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT

# Get transforms from weights (these are the transforms that were used to obtain the weights)
automatic_transforms = weights.transforms() 
print(f"Automatically created transforms: {automatic_transforms}")


# Create data loaders
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    #transform=automatic_transforms, # use automatic created transforms
    transforms_train=automatic_transforms, # use automatic created transforms
    transforms_test=automatic_transforms, # use automatic created transforms
    batch_size=32
)

train_dataloader, test_dataloader, class_names

Automatically created transforms: ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)


(<torch.utils.data.dataloader.DataLoader at 0x7f319c1c3550>,
 <torch.utils.data.dataloader.DataLoader at 0x7f319dd79c60>,
 ['airplane', 'car', 'cat', 'dog', 'flower', 'motorbike', 'person'])

In [11]:
class_names

['airplane', 'car', 'cat', 'dog', 'flower', 'motorbike', 'person']

In [12]:
# Note: This is how a pretrained model would be created in torchvision > 0.13, it will be deprecated in future versions.
# model = torchvision.models.efficientnet_b0(pretrained=True).to(device) # OLD 

# Download the pretrained weights for EfficientNet_B0
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT # NEW in torchvision 0.13, "DEFAULT" means "best weights available"

# Setup the model with the pretrained weights and send it to the target device
model = torchvision.models.efficientnet_b0(weights=weights).to(device)

# View the output of the model
# model

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /home/workbench/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 28.4MB/s]


In [13]:
get_ipython().system_raw("mlflow ui --port 5000 &")

In [14]:
from pyngrok import ngrok

In [15]:
ngrok.kill()

In [16]:
!ngrok config add-authtoken 2sVwQYmf9x00hLx9OsT04T4Vk1R_3gyuN842eLrdSZYgebt5F

Downloading ngrok: 30%

[2025-02-07 05:39:36 +0000] [4073] [INFO] Starting gunicorn 23.0.0
[2025-02-07 05:39:36 +0000] [4073] [INFO] Listening at: http://127.0.0.1:5000 (4073)
[2025-02-07 05:39:36 +0000] [4073] [INFO] Using worker: sync
[2025-02-07 05:39:36 +0000] [4074] [INFO] Booting worker with pid: 4074
[2025-02-07 05:39:36 +0000] [4075] [INFO] Booting worker with pid: 4075
[2025-02-07 05:39:36 +0000] [4076] [INFO] Booting worker with pid: 4076
[2025-02-07 05:39:36 +0000] [4077] [INFO] Booting worker with pid: 4077


Authtoken saved to configuration file: /home/workbench/.config/ngrok/ngrok.yml                      


In [17]:
ngrok_tunnel = ngrok.connect(addr="5000", proto="http", bind_tls=True)

In [18]:
print("MLflow UI ", ngrok_tunnel.public_url)

MLflow UI  https://fd50-67-254-220-141.ngrok-free.app


In [19]:
from IPython.display import IFrame

url = ngrok_tunnel.public_url
IFrame(url, width=1400, height=600)

In [20]:
# Freeze all base layers by setting requires_grad attribute to False
for param in model.features.parameters():
    param.requires_grad = False
    
# Since we're creating a new layer with random weights (torch.nn.Linear), 
# let's set the seeds
set_seeds() 

# Update the classifier head to suit our problem
model.classifier = torch.nn.Sequential(
    nn.Dropout(p=0.2, inplace=True),
    nn.Linear(in_features=1280, 
              out_features=len(class_names),
              bias=True).to(device))

In [21]:
from torchinfo import summary


In [22]:
# Define loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [23]:
try:
    from torch.utils.tensorboard import SummaryWriter
except:
    print("[INFO] Couldn't find tensorboard... installing it.")
    !pip install -q tensorboard
    from torch.utils.tensorboard import SummaryWriter


# Create a writer with all default settings
writer = SummaryWriter()

In [24]:
from typing import Dict, List
from tqdm.auto import tqdm
from typing import List, Tuple, Dict  # Type hinting for clarity
from going_modular.going_modular.engine import train_step, test_step

# Import train() function from: 
# https://github.com/mrdbourke/pytorch-deep-learning/blob/main/going_modular/going_modular/engine.py
def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device) -> Dict[str, List]: 
    """Trains and tests a PyTorch model.

    Passes a target PyTorch models through train_step() and test_step()
    functions for a number of epochs, training and testing the model
    in the same epoch loop.

    Calculates, prints and stores evaluation metrics throughout.

    Args:
      model: A PyTorch model to be trained and tested.
      train_dataloader: A DataLoader instance for the model to be trained on.
      test_dataloader: A DataLoader instance for the model to be tested on.
      optimizer: A PyTorch optimizer to help minimize the loss function.
      loss_fn: A PyTorch loss function to calculate loss on both datasets.
      epochs: An integer indicating how many epochs to train for.
      device: A target device to compute on (e.g. "cuda" or "cpu").
      
    Returns:
      A dictionary of training and testing loss as well as training and
      testing accuracy metrics. Each metric has a value in a list for 
      each epoch.
      In the form: {train_loss: [...],
                train_acc: [...],
                test_loss: [...],
                test_acc: [...]} 
      For example if training for epochs=2: 
              {train_loss: [2.0616, 1.0537],
                train_acc: [0.3945, 0.3945],
                test_loss: [1.2641, 1.5706],
                test_acc: [0.3400, 0.2973]} 
    """
 
    # Create empty results dictionary
    results = {"train_loss": [],
               "train_acc": [],
               "test_loss": [],
               "test_acc": []
    }
    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                           dataloader=train_dataloader,
                                           loss_fn=loss_fn,
                                           optimizer=optimizer,
                                           device=device)
        test_loss, test_acc = test_step(model=model,
                                        dataloader=test_dataloader,
                                        loss_fn=loss_fn,
                                        device=device)

        # Print out what's happening
        print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
        )

        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

 

        ### New: Experiment tracking ###
        # Add loss results to SummaryWriter
        writer.add_scalars(main_tag="Loss", 
                           tag_scalar_dict={"train_loss": train_loss,
                                            "test_loss": test_loss},
                           global_step=epoch)

        # Add accuracy results to SummaryWriter
        writer.add_scalars(main_tag="Accuracy", 
                           tag_scalar_dict={"train_acc": train_acc,
                                            "test_acc": test_acc}, 
                           global_step=epoch)
        
        # Track the PyTorch model architecture
        writer.add_graph(model=model, 
                         # Pass in an example input
                         input_to_model=torch.randn(32, 3, 224, 224).to(device))
    
    # Close the writer
    writer.close()
    
    ### End new ###

    # Return the filled results at the end of the epochs
    return results

In [26]:
import random
import string
import yaml
import requests
import json

In [27]:
def generate_random_experiment_name():
    """Generate a random experiment name."""
    prefix = "experiment"
    random_suffix = ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
    return f"{prefix}-{random_suffix}"

In [29]:
%%time

import mlflow
import mlflow.pytorch
from going_modular.going_modular.utils import save_model
#mlflow.pytorch.autolog()

#mlflow set experiement
#mlflow.tracking.set_tracking_uri("https://a1e3-67-254-220-141.ngrok-free.app/")
experiment_name=generate_random_experiment_name()
print(experiment_name)
mlflow.set_experiment(experiment_name=experiment_name)


# 1. Set the random seeds
set_seeds(seed=42)


with mlflow.start_run(run_name = f"{experiment_name}-run") as run:  
    results = train(model=model,
                    train_dataloader=train_dataloader,
                    test_dataloader=test_dataloader,
                    optimizer=optimizer,
                    loss_fn=loss_fn,
                    epochs=5,
                    device=device)
    # print(results)
    for key, values in results.items():
        for i, val in enumerate(values, start=1): # start=1 makes the index start at 1
            mlflow.log_metric(key,val, step=i, synchronous=False)

    # 10. Save the model to file so we can get back the best model
    save_filepath = f"{experiment_name}.pth"
    mlflow.log_param("save_filepath", save_filepath)
    save_model(model=model,
               target_dir="models",
               model_name=save_filepath)
    mlflow.pytorch.log_model(model, "models")
        
mlflow.end_run()
print("-"*50 + "\n")

2025/02/07 05:47:47 INFO mlflow.tracking.fluent: Experiment with name 'experiment-0z2i5zza' does not exist. Creating a new experiment.


experiment-0z2i5zza


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.7861 | train_acc: 0.4292 | test_loss: 1.5758 | test_acc: 0.7083


 20%|██        | 1/5 [00:08<00:33,  8.31s/it]

Epoch: 2 | train_loss: 1.3432 | train_acc: 0.8479 | test_loss: 1.2386 | test_acc: 0.9375


 40%|████      | 2/5 [00:15<00:22,  7.65s/it]

Epoch: 3 | train_loss: 0.9790 | train_acc: 0.9500 | test_loss: 0.9561 | test_acc: 0.9625


 60%|██████    | 3/5 [00:23<00:15,  7.70s/it]

Epoch: 4 | train_loss: 0.7770 | train_acc: 0.9771 | test_loss: 0.7671 | test_acc: 0.9688


 80%|████████  | 4/5 [00:30<00:07,  7.71s/it]

Epoch: 5 | train_loss: 0.5903 | train_acc: 1.0000 | test_loss: 0.6403 | test_acc: 0.9688


100%|██████████| 5/5 [00:39<00:00,  7.86s/it]


[INFO] Saving model to: models/experiment-0z2i5zza.pth




--------------------------------------------------

CPU times: user 21.8 s, sys: 17.7 s, total: 39.6 s
Wall time: 49.7 s


In [30]:
results

{'train_loss': [1.7860800743103027,
  1.3431644201278687,
  0.9789897680282593,
  0.777011489868164,
  0.5903068900108337],
 'train_acc': [0.4291666666666667,
  0.8479166666666667,
  0.95,
  0.9770833333333334,
  1.0],
 'test_loss': [1.5757891178131103,
  1.238572120666504,
  0.9560967803001403,
  0.7671110212802887,
  0.6403304934501648],
 'test_acc': [0.7083333333333333, 0.9375, 0.9625, 0.96875, 0.96875]}