<a href="https://colab.research.google.com/github/sachinthadilshann/pytorch_course_by_DanielBourke/blob/main/CIFAR10_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
import torch
import torchvision

print(f"PyTorch version: {torch.__version__}")
print(f"TorchVision version: {torchvision.__version__}")

# Set the target device
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Using device: {device}")

PyTorch version: 2.9.0+cu126
TorchVision version: 0.24.0+cu126
Using device: cuda


In [22]:
# Create model weights and transforms
model_weights = torchvision.models.ResNet50_Weights.IMAGENET1K_V2 # <- use the latest weights (could also use .DEFAULT)
transforms = model_weights.transforms()

# Setup model
model = torchvision.models.resnet50(weights=model_weights)

# Count the number of parameters in the model
total_params = sum(
    param.numel() for param in model.parameters()
)

print(f"Total parameters of model: {total_params} (the more parameters, the more GPU memory the model will use, the more *relative* of a speedup you'll get)")
print(f"Model transforms:\n{transforms}")

Total parameters of model: 25557032 (the more parameters, the more GPU memory the model will use, the more *relative* of a speedup you'll get)
Model transforms:
ImageClassification(
    crop_size=[224]
    resize_size=[232]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)


In [23]:
def create_model(num_classes=10):

  model_weights = torchvision.models.ResNet50_Weights.IMAGENET1K_V2
  transforms = model_weights.transforms()
  model = torchvision.models.resnet50(weights=model_weights)

  model.fc = torch.nn.Linear(in_features=2048,
                             out_features=num_classes)
  return model, transforms

model, transforms = create_model()

In [24]:
import torch

total_free_gpu_memory, total_gpu_memory = torch.cuda.mem_get_info()
print(f"Total free GPU memory: {round(total_free_gpu_memory * 1e-9, 3)} GB")
print(f"Total GPU memory: {round(total_gpu_memory * 1e-9, 3)} GB")

Total free GPU memory: 14.921 GB
Total GPU memory: 15.828 GB


In [25]:

total_free_gpu_memory_gb = round(total_free_gpu_memory * 1e-9, 3)
if total_free_gpu_memory_gb >= 16:
  BATCH_SIZE = 128
  IMAGE_SIZE = 224
  print(f"GPU memory available is {total_free_gpu_memory_gb} GB, using batch size of {BATCH_SIZE} and image size {IMAGE_SIZE}")
else:
  BATCH_SIZE = 32
  IMAGE_SIZE = 128
  print(f"GPU memory available is {total_free_gpu_memory_gb} GB, using batch size of {BATCH_SIZE} and image size {IMAGE_SIZE}")

GPU memory available is 14.921 GB, using batch size of 32 and image size 128


In [26]:
transforms.crop_size = IMAGE_SIZE
transforms.resize_size = IMAGE_SIZE
print(f"Updated data transforms:\n{transforms}")

Updated data transforms:
ImageClassification(
    crop_size=128
    resize_size=128
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)


In [27]:
import torch
import torchvision

train_dataset = torchvision.datasets.CIFAR10(root='.',
                                             train=True,
                                             download=True,
                                             transform=torchvision.transforms.ToTensor())

test_datset = torchvision.datasets.CIFAR10(root='.',
                                           train=False,
                                           download=True,
                                           transform=torchvision.transforms.ToTensor())



In [28]:
train_len = len(train_dataset)
test_len = len(test_datset)

print(f"Train dataset length: {train_len}")
print(f"Test dataset length: {test_len}")

Train dataset length: 50000
Test dataset length: 10000


In [29]:
train_dataset[0][1]

6

In [30]:
from torch.utils.data import DataLoader
import os

NUM_WOKERS= os.cpu_count()
NUM_WOKERS

2

In [31]:


train_dataloader = DataLoader(dataset=train_dataset,
                             batch_size=BATCH_SIZE,
                             shuffle=True,
                             num_workers=NUM_WOKERS)

test_dataloader = DataLoader(dataset=test_datset,
                             batch_size=BATCH_SIZE,
                             shuffle=True,
                             num_workers=NUM_WOKERS)

print(f"Train dataloader length: {len(train_dataloader)} batches of size {BATCH_SIZE}")
print(f"Test dataloader length: {len(test_dataloader)} batches of size {BATCH_SIZE}")
print(f"Using number of workers: {NUM_WOKERS} (generally more workers means faster dataloading from CPU to GPU)")

Train dataloader length: 1563 batches of size 32
Test dataloader length: 313 batches of size 32
Using number of workers: 2 (generally more workers means faster dataloading from CPU to GPU)


In [32]:
from prompt_toolkit.shortcuts import progress_bar
import time
from tqdm.auto import tqdm
from typing import Dict, List, Tuple
import torch

def train_step(epoch: int,
               model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn:torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device: torch.device,
               disable_progress_bar: bool = False) -> Tuple[float,float]:

  model.train()

  train_loss,train_acc = 0,0

  ProgressBar = tqdm(enumerate(dataloader),
                   desc=f"Training Epoch {epoch}",
                   total = len(dataloader),
                   disable = disable_progress_bar )


  for batch,(X,y) in ProgressBar:

    X,y = X.to(device), y.to(device)

    y_pred = model(X)

    loss = loss_fn(y_pred,y)
    train_loss += loss.item()

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    y_pred_class = torch.argmax(torch.softmax(y_pred,dim=1),dim=1)
    train_acc += (y_pred_class == y).sum().item()/len(y_pred)

    ProgressBar.set_postfix(
            {
                "train_loss": train_loss / (batch + 1),
                "train_acc": train_acc / (batch + 1),
            }
        )


  train_loss  = train_loss / len(dataloader)
  train_acc = train_acc / len(dataloader)

  return train_loss, train_acc


def test_step(  epoch:int,
                model: torch.nn.Module,
                dataloader: torch.utils.data.DataLoader,
                loss_fn: torch.nn.Module,
                device: torch.device,
                disable_progress_bar: bool = False)  -> Tuple[float,float]:

    model.eval()

    test_loss, test_acc = 0,0

    progress_bar = tqdm(enumerate(dataloader),
                        desc=f"Testing Epoch {epoch}",
                        total=len(dataloader),
                        disable=disable_progress_bar)

    with torch.inference_mode():
      for batch,(X,y) in progress_bar:
        X,y = X.to(device), y.to(device)

        test_pred_logits = model(X)

        loss = loss_fn(test_pred_logits,y)
        test_loss += loss.item()

        test_pred_labels = test_pred_logits.argmax(dim=1)
        test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))

        progress_bar.set_postfix(
            {
                  "test_loss": test_loss / (batch + 1),
                  "test_acc": test_acc / (batch + 1),
              }
          )

      test_loss = test_loss/len(dataloader)
      test_acc = test_acc/len(dataloader)
      return test_loss, test_acc


def train(model: torch.nn.Module,
           train_dataloader: torch.utils.data.DataLoader,
           test_dataloader: torch.utils.data.DataLoader,
           optimizer: torch.optim.Optimizer,
           loss_fn: torch.nn.Module,
           epochs: int,
           device: torch.device,
           disable_progress_bar: bool = False) -> Dict[str, List]:


  results = {"train_loss": [],
               "train_acc": [],
               "test_loss": [],
               "test_acc": [],
               "training_epochs_time":[],
               "testing_epochs_time":[]
    }

  for epoch in tqdm(range(epochs),disable=disable_progress_bar):

      train_epochs_start_time = time.time()
      train_loss, train_acc = train_step(epoch=epoch,
                                         model = model,
                                         dataloader = train_dataloader,
                                         loss_fn = loss_fn,
                                         optimizer = optimizer,
                                         device = device,
                                         disable_progress_bar = disable_progress_bar)


      train_epochs_end_time = time.time()
      train_epoch_time = train_epochs_end_time - train_epochs_start_time

      test_epoch_start_time = time.time()
      test_loss, test_acc = test_step(epoch=epoch,
                                      model=model,
                                      dataloader=test_dataloader,
                                      loss_fn=loss_fn,
                                      device=device,
                                      disable_progress_bar=disable_progress_bar)

      test_epoch_end_time = time.time()
      test_epoch_time = test_epoch_end_time - test_epoch_start_time


      print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f} | "
          f"train_time: {train_epoch_time:.3f} | "
          f"test_time: {test_epoch_time:.3f}"
      )


      results["train_loss"].append(train_loss)
      results["train_acc"].append(train_acc)
      results["test_loss"].append(test_loss)
      results["test_acc"].append(test_acc)
      results["training_epochs_time"].append(train_epoch_time)
      results["testing_epochs_time"].append(test_epoch_time)


  return results

In [33]:
NUM_EPOCHS = 10
LEARNING_RATE = 0.003


model,transforms = create_model()
model.to(device)

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr=LEARNING_RATE)

compile_start_time = time.time()

compiled_model = torch.compile(model)

compile_end_time = time.time()
compile_time = compile_end_time - compile_start_time
print(f"Model compiled in {compile_time:.3f} seconds")

single_run_compile_results = train(model = compiled_model,
                                   train_dataloader = train_dataloader,
                                   test_dataloader = test_dataloader,
                                   loss_fn = loss_fn,
                                   optimizer = optimizer,
                                   epochs = NUM_EPOCHS,
                                   device = device)


Model compiled in 0.003 seconds


  0%|          | 0/10 [00:00<?, ?it/s]

Training Epoch 0:   0%|          | 0/1563 [00:00<?, ?it/s]

Testing Epoch 0:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.8421 | train_acc: 0.3216 | test_loss: 1.9882 | test_acc: 0.3779 | train_time: 61.552 | test_time: 3.883


Training Epoch 1:   0%|          | 0/1563 [00:00<?, ?it/s]

Testing Epoch 1:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 2 | train_loss: 1.2343 | train_acc: 0.5638 | test_loss: 1.1926 | test_acc: 0.5886 | train_time: 61.627 | test_time: 4.471


Training Epoch 2:   0%|          | 0/1563 [00:00<?, ?it/s]

Testing Epoch 2:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 3 | train_loss: 1.0032 | train_acc: 0.6532 | test_loss: 1.0581 | test_acc: 0.6322 | train_time: 62.178 | test_time: 3.971


Training Epoch 3:   0%|          | 0/1563 [00:00<?, ?it/s]

Testing Epoch 3:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 4 | train_loss: 0.8677 | train_acc: 0.7043 | test_loss: 0.9678 | test_acc: 0.6646 | train_time: 62.546 | test_time: 3.956


Training Epoch 4:   0%|          | 0/1563 [00:00<?, ?it/s]

Testing Epoch 4:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 5 | train_loss: 0.7562 | train_acc: 0.7459 | test_loss: 0.8452 | test_acc: 0.7105 | train_time: 62.491 | test_time: 4.679


Training Epoch 5:   0%|          | 0/1563 [00:00<?, ?it/s]

Testing Epoch 5:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 6 | train_loss: 0.6523 | train_acc: 0.7796 | test_loss: 0.7663 | test_acc: 0.7486 | train_time: 62.110 | test_time: 3.968


Training Epoch 6:   0%|          | 0/1563 [00:00<?, ?it/s]

Testing Epoch 6:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 7 | train_loss: 0.5769 | train_acc: 0.8068 | test_loss: 0.7139 | test_acc: 0.7690 | train_time: 62.184 | test_time: 4.047


Training Epoch 7:   0%|          | 0/1563 [00:00<?, ?it/s]

Testing Epoch 7:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 8 | train_loss: 0.5117 | train_acc: 0.8283 | test_loss: 0.7462 | test_acc: 0.7569 | train_time: 62.332 | test_time: 4.530


Training Epoch 8:   0%|          | 0/1563 [00:00<?, ?it/s]

Testing Epoch 8:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 9 | train_loss: 0.4414 | train_acc: 0.8510 | test_loss: 0.7097 | test_acc: 0.7746 | train_time: 62.230 | test_time: 4.268


Training Epoch 9:   0%|          | 0/1563 [00:00<?, ?it/s]

Testing Epoch 9:   0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 10 | train_loss: 0.4054 | train_acc: 0.8653 | test_loss: 0.6586 | test_acc: 0.7953 | train_time: 62.086 | test_time: 4.594


In [34]:
import pandas as pd

result_single_df = pd.DataFrame(single_run_compile_results)
result_single_df

Unnamed: 0,train_loss,train_acc,test_loss,test_acc,training_epochs_time,testing_epochs_time
0,1.84213,0.321597,1.988219,0.377895,61.551506,3.882736
1,1.234294,0.56376,1.192572,0.588558,61.626789,4.470502
2,1.003203,0.653211,1.058069,0.632188,62.178335,3.971338
3,0.867737,0.704315,0.967818,0.664637,62.546393,3.956136
4,0.756172,0.745901,0.84522,0.710463,62.490665,4.679305
5,0.652319,0.779551,0.766333,0.748602,62.110089,3.968225
6,0.576868,0.806762,0.713863,0.76897,62.183524,4.046636
7,0.511691,0.828275,0.746231,0.756889,62.33177,4.529892
8,0.441436,0.851048,0.709745,0.774561,62.230251,4.267669
9,0.405375,0.865343,0.658639,0.795327,62.085791,4.593671


In [35]:
def create_compiled_model():
  model,transforms = create_model()
  model.to(device)

  compiled_model = torch.compile(model)
  return compiled_model


def train_compiled_model(model=compiled_model,
                         epochs = NUM_EPOCHS,
                         lerninig_rate = LEARNING_RATE,
                         disable_progress_bar = False):




  loss_fn = torch.nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(),
                               lr=LEARNING_RATE)


  results = train(model = compiled_model,
                  train_dataloader = train_dataloader,
                  test_dataloader = test_dataloader,
                  loss_fn = loss_fn,
                  optimizer = optimizer,
                  epochs = NUM_EPOCHS,
                  device = device,
                  disable_progress_bar = disable_progress_bar)
  return results



In [36]:
NUM_RUNS = 5
NUM_EPOCHS = 5



In [37]:
model = create_compiled_model()


compiled_results_multiple_runs = []

# Run compiled model for multiple runs
for i in tqdm(range(NUM_RUNS)):
    print(f"[INFO] Run {i+1} of {NUM_RUNS} for compiled model")

    # Train the compiled model (note: the model will only be compiled once and then re-used for subsequent runs)
    results = train_compiled_model(model=model, epochs=NUM_EPOCHS, disable_progress_bar=True)
    compiled_results_multiple_runs.append(results)

  0%|          | 0/5 [00:00<?, ?it/s]

[INFO] Run 1 of 5 for compiled model
Epoch: 1 | train_loss: 0.3233 | train_acc: 0.8934 | test_loss: 0.6258 | test_acc: 0.8030 | train_time: 49.812 | test_time: 3.591
Epoch: 2 | train_loss: 0.3204 | train_acc: 0.8929 | test_loss: 0.6192 | test_acc: 0.8007 | train_time: 49.750 | test_time: 3.501
Epoch: 3 | train_loss: 0.3229 | train_acc: 0.8929 | test_loss: 0.6157 | test_acc: 0.8014 | train_time: 50.148 | test_time: 4.063
Epoch: 4 | train_loss: 0.3202 | train_acc: 0.8930 | test_loss: 0.6303 | test_acc: 0.8012 | train_time: 49.936 | test_time: 3.475
Epoch: 5 | train_loss: 0.3207 | train_acc: 0.8941 | test_loss: 0.6250 | test_acc: 0.8017 | train_time: 50.035 | test_time: 3.499
[INFO] Run 2 of 5 for compiled model
Epoch: 1 | train_loss: 0.3203 | train_acc: 0.8925 | test_loss: 0.6191 | test_acc: 0.8052 | train_time: 50.288 | test_time: 3.846
Epoch: 2 | train_loss: 0.3222 | train_acc: 0.8936 | test_loss: 0.6154 | test_acc: 0.8028 | train_time: 50.040 | test_time: 3.471
Epoch: 3 | train_loss: 

In [38]:
compile_result = []

for result in compiled_results_multiple_runs:
  result_df = pd.DataFrame(result)
  compile_result.append(result_df)
compile_result = pd.concat(compile_result)

compile_result

Unnamed: 0,train_loss,train_acc,test_loss,test_acc,training_epochs_time,testing_epochs_time
0,0.323257,0.893354,0.625777,0.803015,49.811639,3.590881
1,0.320428,0.892854,0.619221,0.800719,49.750396,3.500628
2,0.322929,0.892934,0.615659,0.801418,50.147866,4.062627
3,0.320155,0.893014,0.630317,0.801218,49.936329,3.474859
4,0.32072,0.894114,0.624963,0.801717,50.035484,3.499135
0,0.320261,0.892514,0.619073,0.805212,50.28779,3.8465
1,0.322167,0.893594,0.615373,0.802815,50.039864,3.471402
2,0.320948,0.894054,0.614857,0.802416,50.114424,4.071576
3,0.318341,0.894974,0.609097,0.801717,50.000984,3.506621
4,0.320259,0.894794,0.618774,0.803115,49.828467,3.585568
