<a href="https://colab.research.google.com/github/sachinthadilshann/pytorch_course_by_DanielBourke/blob/main/CIFAR10_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Colab link: https://colab.research.google.com/drive/1Odhiw6Y3F5LyV7YltHC9MdL6PxBw7edC?usp=sharing

In [None]:
import torch
import torchvision

print(f"PyTorch version: {torch.__version__}")
print(f"TorchVision version: {torchvision.__version__}")

# Set the target device
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Using device: {device}")

In [None]:
# Create model weights and transforms
model_weights = torchvision.models.ResNet50_Weights.IMAGENET1K_V2 # <- use the latest weights (could also use .DEFAULT)
transforms = model_weights.transforms()

# Setup model
model = torchvision.models.resnet50(weights=model_weights)

# Count the number of parameters in the model
total_params = sum(
    param.numel() for param in model.parameters()
)

print(f"Total parameters of model: {total_params} (the more parameters, the more GPU memory the model will use, the more *relative* of a speedup you'll get)")
print(f"Model transforms:\n{transforms}")

In [None]:
def create_model(num_classes=10):

  model_weights = torchvision.models.ResNet50_Weights.IMAGENET1K_V2
  transforms = model_weights.transforms()
  model = torchvision.models.resnet50(weights=model_weights)

  model.fc = torch.nn.Linear(in_features=2048,
                             out_features=num_classes)
  return model, transforms

model, transforms = create_model()

In [None]:
import torch

total_free_gpu_memory, total_gpu_memory = torch.cuda.mem_get_info()
print(f"Total free GPU memory: {round(total_free_gpu_memory * 1e-9, 3)} GB")
print(f"Total GPU memory: {round(total_gpu_memory * 1e-9, 3)} GB")

In [None]:

total_free_gpu_memory_gb = round(total_free_gpu_memory * 1e-9, 3)
if total_free_gpu_memory_gb >= 16:
  BATCH_SIZE = 128
  IMAGE_SIZE = 224
  print(f"GPU memory available is {total_free_gpu_memory_gb} GB, using batch size of {BATCH_SIZE} and image size {IMAGE_SIZE}")
else:
  BATCH_SIZE = 32
  IMAGE_SIZE = 128
  print(f"GPU memory available is {total_free_gpu_memory_gb} GB, using batch size of {BATCH_SIZE} and image size {IMAGE_SIZE}")

In [None]:
transforms.crop_size = IMAGE_SIZE
transforms.resize_size = IMAGE_SIZE
print(f"Updated data transforms:\n{transforms}")

In [None]:
import torch
import torchvision

train_dataset = torchvision.datasets.CIFAR10(root='.',
                                             train=True,
                                             download=True,
                                             transform=torchvision.transforms.ToTensor())

test_datset = torchvision.datasets.CIFAR10(root='.',
                                           train=False,
                                           download=True,
                                           transform=torchvision.transforms.ToTensor())



In [None]:
train_len = len(train_dataset)
test_len = len(test_datset)

print(f"Train dataset length: {train_len}")
print(f"Test dataset length: {test_len}")

In [None]:
train_dataset[0][1]

In [None]:
from torch.utils.data import DataLoader
import os

NUM_WOKERS= os.cpu_count()
NUM_WOKERS

In [None]:


train_dataloader = DataLoader(dataset=train_dataset,
                             batch_size=BATCH_SIZE,
                             shuffle=True,
                             num_workers=NUM_WOKERS)

test_dataloader = DataLoader(dataset=test_datset,
                             batch_size=BATCH_SIZE,
                             shuffle=True,
                             num_workers=NUM_WOKERS)

print(f"Train dataloader length: {len(train_dataloader)} batches of size {BATCH_SIZE}")
print(f"Test dataloader length: {len(test_dataloader)} batches of size {BATCH_SIZE}")
print(f"Using number of workers: {NUM_WOKERS} (generally more workers means faster dataloading from CPU to GPU)")

In [None]:
from prompt_toolkit.shortcuts import progress_bar
import time
from tqdm.auto import tqdm
from typing import Dict, List, Tuple
import torch

def train_step(epoch: int,
               model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn:torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device: torch.device,
               disable_progress_bar: bool = False) -> Tuple[float,float]:

  model.train()

  train_loss,train_acc = 0,0

  ProgressBar = tqdm(enumerate(dataloader),
                   desc=f"Training Epoch {epoch}",
                   total = len(dataloader),
                   disable = disable_progress_bar )


  for batch,(X,y) in ProgressBar:

    X,y = X.to(device), y.to(device)

    y_pred = model(X)

    loss = loss_fn(y_pred,y)
    train_loss += loss.item()

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    y_pred_class = torch.argmax(torch.softmax(y_pred,dim=1),dim=1)
    train_acc += (y_pred_class == y).sum().item()/len(y_pred)

    ProgressBar.set_postfix(
            {
                "train_loss": train_loss / (batch + 1),
                "train_acc": train_acc / (batch + 1),
            }
        )


  train_loss  = train_loss / len(dataloader)
  train_acc = train_acc / len(dataloader)

  return train_loss, train_acc


def test_step(  epoch:int,
                model: torch.nn.Module,
                dataloader: torch.utils.data.DataLoader,
                loss_fn: torch.nn.Module,
                device: torch.device,
                disable_progress_bar: bool = False)  -> Tuple[float,float]:

    model.eval()

    test_loss, test_acc = 0,0

    progress_bar = tqdm(enumerate(dataloader),
                        desc=f"Testing Epoch {epoch}",
                        total=len(dataloader),
                        disable=disable_progress_bar)

    with torch.inference_mode():
      for batch,(X,y) in progress_bar:
        X,y = X.to(device), y.to(device)

        test_pred_logits = model(X)

        loss = loss_fn(test_pred_logits,y)
        test_loss += loss.item()

        test_pred_labels = test_pred_logits.argmax(dim=1)
        test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))

        progress_bar.set_postfix(
            {
                  "test_loss": test_loss / (batch + 1),
                  "test_acc": test_acc / (batch + 1),
              }
          )

      test_loss = test_loss/len(dataloader)
      test_acc = test_acc/len(dataloader)
      return test_loss, test_acc


def train(model: torch.nn.Module,
           train_dataloader: torch.utils.data.DataLoader,
           test_dataloader: torch.utils.data.DataLoader,
           optimizer: torch.optim.Optimizer,
           loss_fn: torch.nn.Module,
           epochs: int,
           device: torch.device,
           disable_progress_bar: bool = False) -> Dict[str, List]:


  results = {"train_loss": [],
               "train_acc": [],
               "test_loss": [],
               "test_acc": [],
               "training_epochs_time":[],
               "testing_epochs_time":[]
    }

  for epoch in tqdm(range(epochs),disable=disable_progress_bar):

      train_epochs_start_time = time.time()
      train_loss, train_acc = train_step(epoch=epoch,
                                         model = model,
                                         dataloader = train_dataloader,
                                         loss_fn = loss_fn,
                                         optimizer = optimizer,
                                         device = device,
                                         disable_progress_bar = disable_progress_bar)


      train_epochs_end_time = time.time()
      train_epoch_time = train_epochs_end_time - train_epochs_start_time

      test_epoch_start_time = time.time()
      test_loss, test_acc = test_step(epoch=epoch,
                                      model=model,
                                      dataloader=test_dataloader,
                                      loss_fn=loss_fn,
                                      device=device,
                                      disable_progress_bar=disable_progress_bar)

      test_epoch_end_time = time.time()
      test_epoch_time = test_epoch_end_time - test_epoch_start_time


      print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f} | "
          f"train_time: {train_epoch_time:.3f} | "
          f"test_time: {test_epoch_time:.3f}"
      )


      results["train_loss"].append(train_loss)
      results["train_acc"].append(train_acc)
      results["test_loss"].append(test_loss)
      results["test_acc"].append(test_acc)
      results["training_epochs_time"].append(train_epoch_time)
      results["testing_epochs_time"].append(test_epoch_time)


  return results

In [None]:
NUM_EPOCHS = 10
LEARNING_RATE = 0.003


model,transforms = create_model()
model.to(device)

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr=LEARNING_RATE)

compile_start_time = time.time()

compiled_model = torch.compile(model)

compile_end_time = time.time()
compile_time = compile_end_time - compile_start_time
print(f"Model compiled in {compile_time:.3f} seconds")

single_run_compile_results = train(model = compiled_model,
                                   train_dataloader = train_dataloader,
                                   test_dataloader = test_dataloader,
                                   loss_fn = loss_fn,
                                   optimizer = optimizer,
                                   epochs = NUM_EPOCHS,
                                   device = device)


In [None]:
import pandas as pd

result_single_df = pd.DataFrame(single_run_compile_results)
result_single_df

In [None]:
def create_compiled_model():
  model,transforms = create_model()
  model.to(device)

  compiled_model = torch.compile(model)
  return compiled_model


def train_compiled_model(model=compiled_model,
                         epochs = NUM_EPOCHS,
                         lerninig_rate = LEARNING_RATE,
                         disable_progress_bar = False):




  loss_fn = torch.nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(),
                               lr=LEARNING_RATE)


  results = train(model = compiled_model,
                  train_dataloader = train_dataloader,
                  test_dataloader = test_dataloader,
                  loss_fn = loss_fn,
                  optimizer = optimizer,
                  epochs = NUM_EPOCHS,
                  device = device,
                  disable_progress_bar = disable_progress_bar)
  return results



In [None]:
NUM_RUNS = 5
NUM_EPOCHS = 5



In [None]:
model = create_compiled_model()


compiled_results_multiple_runs = []

# Run compiled model for multiple runs
for i in tqdm(range(NUM_RUNS)):
    print(f"[INFO] Run {i+1} of {NUM_RUNS} for compiled model")

    # Train the compiled model (note: the model will only be compiled once and then re-used for subsequent runs)
    results = train_compiled_model(model=model, epochs=NUM_EPOCHS, disable_progress_bar=True)
    compiled_results_multiple_runs.append(results)

In [None]:
compile_result = []

for result in compiled_results_multiple_runs:
  result_df = pd.DataFrame(result)
  compile_result.append(result_df)
compile_result = pd.concat(compile_result)

compile_result