<a href="https://colab.research.google.com/github/sc-nun216/5m-data-3.8-computer-vision/blob/main/notebooks/computer_vision_gpu_lesson.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# We'll use the `torchvision` library to load a dataset and apply data augmentation techniques:
from torchvision import datasets, transforms, models

In [None]:
# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to match the input size of the model
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize the images
])

# Load the CIFAR-10 dataset
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

100%|██████████| 170M/170M [00:03<00:00, 43.0MB/s]


In [None]:
torch.cuda.is_available()

True

In [None]:
# Load a pre-trained ResNet model
model = models.resnet18(weights='DEFAULT')

# Freeze all the parameters in the model
for param in model.parameters():
    param.requires_grad = False

# Replace the top layer for fine-tuning
# CIFAR-10 has 10 classes
model.fc = nn.Linear(model.fc.in_features, 10)

# Switch to GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
# Only optimize the classifier
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 190MB/s]


In [None]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
# Function to train the CNN
def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')
    print('Finished Training')

# Evaluating the CNN
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)    # Finds the class index with the highest raw score/logit for each image.
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy of the network on the test images: {100 * correct // total} %')

In [None]:
train_model(model, train_loader, criterion, optimizer, num_epochs=2)

Epoch 1, Loss: 0.7942947456928987
Epoch 2, Loss: 0.6364268329154202
Finished Training


In [None]:
evaluate_model(model, test_loader)

Accuracy of the network on the test images: 79 %


> Try with higher num of epochs and/or data augmentation and see if you can increase the accuracy.

In [None]:
train_model(model, train_loader, criterion, optimizer, num_epochs=10)

Epoch 1, Loss: 0.6081058047497341
Epoch 2, Loss: 0.6049900157037486
Epoch 3, Loss: 0.6011827529067804
Epoch 4, Loss: 0.5972037499082905
Epoch 5, Loss: 0.5938974065671574
Epoch 6, Loss: 0.5873403680663953
Epoch 7, Loss: 0.5835435379031981
Epoch 8, Loss: 0.587487579316797
Epoch 9, Loss: 0.5829463099354135
Epoch 10, Loss: 0.5900997756596033
Finished Training


In [None]:
evaluate_model(model, test_loader)

Accuracy of the network on the test images: 80 %
