<a href="https://colab.research.google.com/github/pouyarahmti/Pytorch-Deep-Learning-Bootcamp/blob/main/07_PyTorch_Experiment_Tracking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 07 PyTorch Experiment Tracking

Machine Learning is very experimental.
In order to figure out which experiments are worth pursuing, that's where **Experiment Tracking** comes in. It helps you to figure out what does not work so you can figure out what does work.


In [1]:
import torch
import torchvision

print(torch.__version__)
print(torchvision.__version__)

2.3.0+cu121
0.18.0+cu121


In [2]:
# Continue with regular imports
import matplotlib.pyplot as plt
import torch
import torchvision

from torch import nn
from torchvision import transforms

# Try to get torchinfo, install it if it doesn't work
try:
    from torchinfo import summary
except:
    print("[INFO] Couldn't find torchinfo... installing it.")
    !pip install -q torchinfo
    from torchinfo import summary

# Try to import the going_modular directory, download it from GitHub if it doesn't work
try:
    from going_modular.going_modular import data_setup, engine
except:
    # Get the going_modular scripts
    print("[INFO] Couldn't find going_modular scripts... downloading them from GitHub.")
    !git clone https://github.com/mrdbourke/pytorch-deep-learning
    !mv pytorch-deep-learning/going_modular .
    !rm -rf pytorch-deep-learning
    from going_modular.going_modular import data_setup, engine

[INFO] Couldn't find torchinfo... installing it.
[INFO] Couldn't find going_modular scripts... downloading them from GitHub.
Cloning into 'pytorch-deep-learning'...
remote: Enumerating objects: 4056, done.[K
remote: Total 4056 (delta 0), reused 0 (delta 0), pack-reused 4056[K
Receiving objects: 100% (4056/4056), 646.90 MiB | 34.36 MiB/s, done.
Resolving deltas: 100% (2371/2371), done.
Updating files: 100% (248/248), done.


In [3]:
# setup device-agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [4]:
# Set seeds
def set_seeds(seed: int=42):
    """Sets random sets for torch operations.

    Args:
        seed (int, optional): Random seed to set. Defaults to 42.
    """
    # Set the seed for general torch operations
    torch.manual_seed(seed)
    # Set the seed for CUDA torch operations (ones that happen on the GPU)
    torch.cuda.manual_seed(seed)

## 1. Get data

In [15]:
import os
import zipfile

from pathlib import Path

import requests

def download_data(source: str,
                  destination: str,
                  remove_source: bool = True) -> Path:

    """Download a zipped file from source, unzip it and save it to the
    destination folder. Source and destination should be URLs. """

    # Setup path to data folder
    data_path = Path("data/")
    image_path = data_path / destination

    # If the image folder doesn't exist, download
    if image_path.is_dir():
        print("Found existing image folder")
    else:
        print("Did not find existing image folder, downloading...")
        image_path.parent.mkdir(parents=True, exist_ok=True)

        # Download the target data
        target_file = Path(source).name

        with open(data_path / target_file, "wb") as f:
            request = requests.get(source)
            print(f"Downloading {target_file} from {source}")
            f.write(request.content)

        # Unzip the target file
        with zipfile.ZipFile(data_path / target_file, "r") as zip_file:
            print("Unzipping data")
            zip_file.extractall(image_path)

        # Remove the .zip file
        if remove_source:
            os.remove(data_path / target_file)

    return image_path

In [16]:
image_path = download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip",
                            destination="pizza_steak_sushi"
                            )
image_path

Found existing image folder


PosixPath('data/pizza_steak_sushi')

## 2. Create DataSet & DataLoader

### 2.1 Create Dataloaders with manual transforms

the goal with transforms is to make sure our custom data is formatted is a way that is suited for our pretrained models.

In [17]:
# Setup directories
train_dir = image_path / "train"
test_dir = image_path / "test"

train_dir, test_dir

(PosixPath('data/pizza_steak_sushi/train'),
 PosixPath('data/pizza_steak_sushi/test'))

In [18]:
# Setup ImageNet Normalization mean and std

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

# Create tranform pipeline manually
from torchvision import transforms

manual_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    normalize
])

print(f"Manually created transforms: {manual_transforms}")

Manually created transforms: Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)


In [20]:
# Create DataLoaders
from going_modular.going_modular import data_setup
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=manual_transforms,
    batch_size=32,
    )

train_dataloader, test_dataloader, class_names

(<torch.utils.data.dataloader.DataLoader at 0x7932ceac8370>,
 <torch.utils.data.dataloader.DataLoader at 0x7932ceac8130>,
 ['pizza', 'steak', 'sushi'])