<a href="https://colab.research.google.com/github/sachinthadilshann/pytorch_course_by_DanielBourke/blob/main/CIFAR10_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [95]:
import torch
import torchvision

print(f"PyTorch version: {torch.__version__}")
print(f"TorchVision version: {torchvision.__version__}")

# Set the target device
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Using device: {device}")

PyTorch version: 2.9.0+cu126
TorchVision version: 0.24.0+cu126
Using device: cuda


In [96]:
# Create model weights and transforms
model_weights = torchvision.models.ResNet50_Weights.IMAGENET1K_V2 # <- use the latest weights (could also use .DEFAULT)
transforms = model_weights.transforms()

# Setup model
model = torchvision.models.resnet50(weights=model_weights)

# Count the number of parameters in the model
total_params = sum(
    param.numel() for param in model.parameters()
)

print(f"Total parameters of model: {total_params} (the more parameters, the more GPU memory the model will use, the more *relative* of a speedup you'll get)")
print(f"Model transforms:\n{transforms}")

Total parameters of model: 25557032 (the more parameters, the more GPU memory the model will use, the more *relative* of a speedup you'll get)
Model transforms:
ImageClassification(
    crop_size=[224]
    resize_size=[232]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)


In [97]:
def create_model(num_classes=10):

  model_weights = torchvision.models.ResNet50_Weights.IMAGENET1K_V2
  transforms = model_weights.transforms()
  model = torchvision.models.resnet50(weights=model_weights)

  model.fc = torch.nn.Linear(in_features=2048,
                             out_features=num_classes)
  return model, transforms

model, transforms = create_model()

In [98]:
import torch

total_free_gpu_memory, total_gpu_memory = torch.cuda.mem_get_info()
print(f"Total free GPU memory: {round(total_free_gpu_memory * 1e-9, 3)} GB")
print(f"Total GPU memory: {round(total_gpu_memory * 1e-9, 3)} GB")

Total free GPU memory: 14.433 GB
Total GPU memory: 15.828 GB


In [99]:

total_free_gpu_memory_gb = round(total_free_gpu_memory * 1e-9, 3)
if total_free_gpu_memory_gb >= 16:
  BATCH_SIZE = 128
  IMAGE_SIZE = 224
  print(f"GPU memory available is {total_free_gpu_memory_gb} GB, using batch size of {BATCH_SIZE} and image size {IMAGE_SIZE}")
else:
  BATCH_SIZE = 32
  IMAGE_SIZE = 128
  print(f"GPU memory available is {total_free_gpu_memory_gb} GB, using batch size of {BATCH_SIZE} and image size {IMAGE_SIZE}")

GPU memory available is 14.433 GB, using batch size of 32 and image size 128


In [100]:
transforms.crop_size = IMAGE_SIZE
transforms.resize_size = IMAGE_SIZE
print(f"Updated data transforms:\n{transforms}")

Updated data transforms:
ImageClassification(
    crop_size=128
    resize_size=128
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)


In [101]:
import torch
import torchvision

train_dataset = torchvision.datasets.CIFAR10(root='',
                                             train=True,
                                             download=True,
                                             transform=torchvision.transforms.ToTensor())

test_datset = torchvision.datasets.CIFAR10(root='',
                                           train=False,
                                           download=True,
                                           transform=torchvision.transforms.ToTensor())



In [102]:
train_len = len(train_dataset)
test_len = len(test_datset)

print(f"Train dataset length: {train_len}")
print(f"Test dataset length: {test_len}")

Train dataset length: 50000
Test dataset length: 10000


In [103]:
train_dataset[0][1]

6

In [104]:
from torch.utils.data import DataLoader
import os

NUM_WORKERS= os.cpu_count()
NUM_WORKERS

2

In [105]:


train_dataloader = DataLoader(dataset=train_dataset,
                             batch_size=BATCH_SIZE,
                             shuffle=True,
                             num_workers=NUM_WOKERS)

train_dataloader = DataLoader(dataset=test_datset,
                             batch_size=BATCH_SIZE,
                             shuffle=True,
                             num_workers=NUM_WOKERS)

print(f"Train dataloader length: {len(train_dataloader)} batches of size {BATCH_SIZE}")
print(f"Test dataloader length: {len(test_dataloader)} batches of size {BATCH_SIZE}")
print(f"Using number of workers: {NUM_WORKERS} (generally more workers means faster dataloading from CPU to GPU)")

Train dataloader length: 313 batches of size 32
Test dataloader length: 313 batches of size 32
Using number of workers: 2 (generally more workers means faster dataloading from CPU to GPU)


In [106]:
from prompt_toolkit.shortcuts import progress_bar
import time
from tqdm.auto import tqdm
from typing import Dict, List, Tuple
import torch

def train_step(epoch: int,
               model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn:torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device: torch.device,
               disable_progress_bar: bool = False) -> Tuple[float,float]:

  model.train()

  train_loss,train_acc = 0,0

  ProgressBar = tqdm(enumerate(dataloader),
                   desc=f"Training Epoch {epoch}",
                   total = len(dataloader),
                   disable = disable_progress_bar )


  for batch,(X,y) in ProgressBar:

    X,y = X.to(device), y.to(device)

    y_pred = model(X)

    loss = loss_fn(y_pred,y)
    train_loss += loss.item()

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    y_pred_class = torch.argmax(torch.softmax(y_pred,dim=1),dim=1)
    train_acc += (y_pred_class == y).sum().item()/len(y_pred)

    ProgressBar.set_postfix(
            {
                "train_loss": train_loss / (batch + 1),
                "train_acc": train_acc / (batch + 1),
            }
        )


  train_loss  = train_loss / len(dataloader)
  train_acc = train_acc / len(dataloader)

  return train_loss, train_acc


def test_step(  epoch:int,
                model: torch.nn.Module,
                dataloader: torch.utils.data.DataLoader,
                loss_fn: torch.nn.Module,
                device: torch.device,
                disable_progress_bar: bool = False)  -> Tuple[float,float]:

    model.eval()

    test_loss, test_acc = 0,0

    progress_bar = tqdm(enumerate(dataloader),
                        desc=f"Testing Epoch {epoch}",
                        total=len(dataloader),
                        disable=disable_progress_bar)

    with torch.inference_mode():
      for batch,(X,y) in progress_bar:
        X,y = X.to(device), y.to(device)

        test_pred_logits = model(X)

        loss = loss_fn(test_pred_logits,y)
        test_loss += loss.item()

        test_pred_labels = test_pred_logits.argmax(dim=1)
        test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))

        progress_bar.set_postfix(
            {
                  "test_loss": test_loss / (batch + 1),
                  "test_acc": test_acc / (batch + 1),
              }
          )

      test_loss = test_loss/len(dataloader)
      test_acc = test_acc/len(dataloader)
      return test_loss, test_acc


def train(model: torch.nn.Module,
           train_dataloader: torch.utils.data.DataLoader,
           test_dataloader: torch.utils.data.DataLoader,
           optimizer: torch.optim.Optimizer,
           loss_fn: torch.nn.Module,
           epochs: int,
           device: torch.device,
           disable_progress_bar: bool = False) -> Dict[str, List]:


  results = {"train_loss": [],
               "train_acc": [],
               "test_loss": [],
               "test_acc": [],
               "training_epochs_time":[],
               "testing_epochs_time":[]
    }

  for epoch in tqdm(range(epochs),disable=disable_progress_bar):

      train_epochs_start_time = time.time()
      train_loss, train_acc = train_step(epoch=epoch,
                                         model = model,
                                         dataloader = train_dataloader,
                                         loss_fn = loss_fn,
                                         optimizer = optimizer,
                                         device = device,
                                         disable_progress_bar = disable_progress_bar)


      train_epochs_end_time = time.time()
      train_epoch_time = train_epochs_end_time - train_epochs_start_time

      test_epoch_start_time = time.time()
      test_loss, test_acc = test_step(epoch=epoch,
                                      model=model,
                                      dataloader=test_dataloader,
                                      loss_fn=loss_fn,
                                      device=device,
                                      disable_progress_bar=disable_progress_bar)

      test_epoch_end_time = time.time()
      test_epoch_time = test_epoch_end_time - test_epoch_start_time


      print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f} | "
          f"train_time: {train_epoch_time:.3f} | "
          f"test_time: {test_epoch_time:.3f}"
      )


      results["train_loss"].append(train_loss)
      results["train_acc"].append(train_acc)
      results["test_loss"].append(test_loss)
      results["test_acc"].append(test_acc)
      results["training_epochs_time"].append(train_epoch_time)
      results["testing_epochs_time"].append(test_epoch_time)


  return results

In [110]:
NUM_EPOCHS = 15
LEARNING_RATE = 0.003


model,transforms = create_model()
model.to(device)

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr=LEARNING_RATE)

compile_start_time = time.time()

compiled_model = torch.compile(model)

compile_end_time = time.time()
compile_time = compile_end_time - compile_start_time
print(f"Model compiled in {compile_time:.3f} seconds")

single_run_compile_results = train(model = compiled_model,
                                   train_dataloader = train_dataloader,
                                   test_dataloader = test_dataloader,
                                   loss_fn = loss_fn,
                                   optimizer = optimizer,
                                   epochs = NUM_EPOCHS,
                                   device = device)


Model compiled in 0.003 seconds


  0%|          | 0/20 [00:00<?, ?it/s]

Training Epoch 0:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 0:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 2.0422 | train_acc: 0.2528 | test_loss: 2.3967 | test_acc: 0.2921 | train_time: 12.772 | test_time: 3.986


Training Epoch 1:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 1:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 2 | train_loss: 1.7510 | train_acc: 0.3395 | test_loss: 1.6065 | test_acc: 0.3978 | train_time: 12.792 | test_time: 3.918


Training Epoch 2:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 2:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 3 | train_loss: 1.6741 | train_acc: 0.3797 | test_loss: 1.4178 | test_acc: 0.4712 | train_time: 12.693 | test_time: 4.491


Training Epoch 3:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 3:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 4 | train_loss: 1.4732 | train_acc: 0.4540 | test_loss: 1.3510 | test_acc: 0.4986 | train_time: 12.665 | test_time: 3.878


Training Epoch 4:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 4:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 5 | train_loss: 1.3153 | train_acc: 0.5280 | test_loss: 1.2971 | test_acc: 0.5304 | train_time: 12.586 | test_time: 3.900


Training Epoch 5:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 5:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 6 | train_loss: 1.2305 | train_acc: 0.5620 | test_loss: 1.1934 | test_acc: 0.5677 | train_time: 12.641 | test_time: 4.622


Training Epoch 6:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 6:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 7 | train_loss: 1.1165 | train_acc: 0.6137 | test_loss: 1.0594 | test_acc: 0.6290 | train_time: 12.593 | test_time: 3.927


Training Epoch 7:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 7:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 8 | train_loss: 1.0144 | train_acc: 0.6430 | test_loss: 1.1978 | test_acc: 0.6228 | train_time: 12.776 | test_time: 4.200


Training Epoch 8:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 8:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 9 | train_loss: 0.9411 | train_acc: 0.6759 | test_loss: 1.0604 | test_acc: 0.6215 | train_time: 12.826 | test_time: 4.266


Training Epoch 9:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 9:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 10 | train_loss: 0.8311 | train_acc: 0.7131 | test_loss: 0.8861 | test_acc: 0.6904 | train_time: 12.693 | test_time: 4.044


Training Epoch 10:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 10:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 11 | train_loss: 0.7585 | train_acc: 0.7446 | test_loss: 0.8000 | test_acc: 0.7182 | train_time: 12.801 | test_time: 4.761


Training Epoch 11:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 11:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 12 | train_loss: 0.7779 | train_acc: 0.7384 | test_loss: 0.8587 | test_acc: 0.6949 | train_time: 13.511 | test_time: 4.068


Training Epoch 12:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 12:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 13 | train_loss: 0.6745 | train_acc: 0.7700 | test_loss: 0.6077 | test_acc: 0.8051 | train_time: 12.829 | test_time: 4.156


Training Epoch 13:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 13:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 14 | train_loss: 0.6231 | train_acc: 0.7903 | test_loss: 0.7258 | test_acc: 0.7512 | train_time: 12.970 | test_time: 4.474


Training Epoch 14:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 14:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 15 | train_loss: 0.5051 | train_acc: 0.8275 | test_loss: 0.4307 | test_acc: 0.8526 | train_time: 12.903 | test_time: 4.055


Training Epoch 15:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 15:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 16 | train_loss: 0.4505 | train_acc: 0.8499 | test_loss: 0.5456 | test_acc: 0.8216 | train_time: 12.866 | test_time: 4.782


Training Epoch 16:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 16:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 17 | train_loss: 0.4133 | train_acc: 0.8640 | test_loss: 0.4325 | test_acc: 0.8562 | train_time: 12.890 | test_time: 4.117


Training Epoch 17:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 17:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 18 | train_loss: 0.3712 | train_acc: 0.8737 | test_loss: 0.2321 | test_acc: 0.9309 | train_time: 12.852 | test_time: 4.097


Training Epoch 18:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 18:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 19 | train_loss: 0.3125 | train_acc: 0.8968 | test_loss: 0.3662 | test_acc: 0.8834 | train_time: 12.920 | test_time: 4.668


Training Epoch 19:   0%|          | 0/313 [00:00<?, ?it/s]

Testing Epoch 19:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 20 | train_loss: 0.3136 | train_acc: 0.8974 | test_loss: 0.2932 | test_acc: 0.9028 | train_time: 12.886 | test_time: 4.109


In [111]:
import pandas as pd

result_single_df = pd.DataFrame(single_run_compile_results)
result_single_df

Unnamed: 0,train_loss,train_acc,test_loss,test_acc,training_epochs_time,testing_epochs_time
0,2.042188,0.252796,2.396663,0.292133,12.771522,3.986284
1,1.750984,0.339457,1.606473,0.397764,12.79208,3.918298
2,1.674118,0.379692,1.41778,0.471246,12.693152,4.49083
3,1.473203,0.453974,1.350956,0.498602,12.664521,3.877919
4,1.315313,0.527955,1.297112,0.530351,12.585737,3.899777
5,1.230505,0.562001,1.193399,0.567692,12.641139,4.621962
6,1.116492,0.613718,1.059357,0.628994,12.593165,3.927472
7,1.014353,0.642971,1.197773,0.622804,12.776303,4.199967
8,0.941093,0.675919,1.060442,0.621506,12.825904,4.26598
9,0.831081,0.713059,0.886091,0.690395,12.693098,4.043967


In [111]:
model = create_compiled_model()

# Create an empty list to store compiled model results
compiled_results_multiple_runs = []

# Run compiled model for multiple runs
for i in tqdm(range(NUM_RUNS)):
    print(f"[INFO] Run {i+1} of {NUM_RUNS} for compiled model")

    # Train the compiled model (note: the model will only be compiled once and then re-used for subsequent runs)
    results = train_compiled_model(model=model, epochs=NUM_EPOCHS, disable_progress_bar=True)
    compiled_results_multiple_runs.append(results)