<a href="https://colab.research.google.com/github/manuelescobar-dev/Advanced-Machine-Learning/blob/main/Homework1_AML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Initialization


## Install Requirements

In [None]:
!pip3 install torch torchvision torchaudio



In [None]:
!pip3 install 'tqdm'



## Import libraries

In [None]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet

from PIL import Image
from tqdm import tqdm

## Set Arguments

In [None]:
# Set arguments
DEVICE = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
NUM_CLASSES = 102 # 101 + 1: There is am extra Background class that should be removed

BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 1e-3            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 30      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

## Prepare Data

In [None]:
from torchvision.datasets import VisionDataset

from PIL import Image

import os
import os.path
import sys


def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')


class Caltech(VisionDataset):
    def __init__(self, root, split='train', transform=None, target_transform=None):
        super(Caltech, self).__init__(root, transform=transform, target_transform=target_transform)

        self.split = split # This defines the split you are going to use
                           # (split files are called 'train.txt' and 'test.txt')

        self.images = []
        self.labels = []
        self.classes = {}

        # Read split files and assign labels
        split_file = os.path.join(root, f"{split}.txt")
        with open(split_file, "r") as f:
            for line in f:
                image_path = line.strip()
                class_name = os.path.basename(os.path.dirname(image_path))
                if class_name not in self.classes:
                    self.classes[class_name] = len(self.classes)
                self.images.append(image_path)
                self.labels.append(self.classes[class_name])

    def __getitem__(self, index):
        '''
        __getitem__ should access an element through its index
        Args:
            index (int): Index

        Returns:
            tuple: (sample, target) where target is class_index of the target class.
        '''

        image = pil_loader(
            os.path.join(self.root, "101_ObjectCategories", self.images[index])
        )
        label = self.labels[index]

        # Applies preprocessing when accessing the image
        if self.transform is not None:
            image = self.transform(image)
        if self.target_transform is not None:
            label = self.target_transform(label)

        return image, label

    def __len__(self):
        '''
        The __len__ method returns the length of the dataset
        It is mandatory, as this is used by several other components
        '''
        return len(self.images)

In [None]:
# Clone github repository with data
import pathlib
from sklearn.model_selection import train_test_split
import numpy as np
print(pathlib.Path().resolve())
if not os.path.isdir('./Caltech101'):
  !git clone https://github.com/MachineLearning2020/Homework2-Caltech101.git
  !mv 'Homework2-Caltech101' 'Caltech101'

DATA_DIR = 'Caltech101'

/content
Cloning into 'Homework2-Caltech101'...
remote: Enumerating objects: 9256, done.[K
remote: Total 9256 (delta 0), reused 0 (delta 0), pack-reused 9256[K
Receiving objects: 100% (9256/9256), 129.48 MiB | 30.86 MiB/s, done.
Resolving deltas: 100% (4/4), done.
Updating files: 100% (9149/9149), done.


In [None]:
def load_data(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5), validation=False):
  # Define transforms for training phase
  train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                        transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                    # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                    # Remember this when applying different transformations, otherwise you get an error
                                        transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                        transforms.Normalize(mean, std) # Normalizes tensor with mean and standard deviation
  ])
  # Define transforms for the evaluation phase
  eval_transform = transforms.Compose([transforms.Resize(256),
                                        transforms.CenterCrop(224),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean, std)
  ])

  # Prepare Pytorch train/test Datasets
  train_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform)
  test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)

  if validation:
    train_indexes, val_indexes = train_test_split(
            np.arange(len(train_dataset)),
            stratify=train_dataset.labels,
            test_size=0.25,
            random_state=42,
        )

    new_train_dataset = Subset(train_dataset, train_indexes)
    val_dataset = Subset(train_dataset, val_indexes)

    # Check dataset sizes
    print('Train Dataset: {}'.format(len(new_train_dataset)))
    print('Valid Dataset: {}'.format(len(val_dataset)))
    print('Test Dataset: {}'.format(len(test_dataset)))

    # Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
    train_dataloader = DataLoader(new_train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
    val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
    test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
    return (train_dataloader, len(train_dataset)), (val_dataloader, len(val_dataset)), (test_dataloader, len(test_dataset))
  else:
    # Check dataset sizes
    print('Train Dataset: {}'.format(len(train_dataset)))
    print('Test Dataset: {}'.format(len(test_dataset)))

    # Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
    test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
    return (train_dataloader, len(train_dataset)), (test_dataloader, len(test_dataset))

## Training

In [None]:
def train(train_dataloader, model, loss_fn, optimizer, scheduler, size, val_dataloader=None):
    model = model.to(DEVICE)  # this will bring the network to GPU if DEVICE is cuda
    minval = np.inf
    cudnn.benchmark  # Calling this optimizes runtime

    current_step = 0
    # Start iterating over the epochs
    for epoch in range(NUM_EPOCHS):
        print(
            "Starting epoch {}/{}, LR = {}".format(
                epoch + 1, NUM_EPOCHS, scheduler.get_lr()
            )
        )

        # Iterate over the dataset
        for images, labels in train_dataloader:
            # Bring data over the device of choice
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)

            model.train()  # Sets module in training mode

            # PyTorch, by default, accumulates gradients after each backward pass
            # We need to manually set the gradients to zero before starting a new iteration
            optimizer.zero_grad()  # Zero-ing the gradients

            # Forward pass to the network
            outputs = model(images)

            # Compute loss based on output and ground truth
            loss = loss_fn(outputs, labels)

            # Log loss
            if current_step % LOG_FREQUENCY == 0:
                print("Step {}, Loss {}".format(current_step, loss.item()))

            # Compute gradients for each layer and update weights
            loss.backward()  # backward pass: computes gradients
            optimizer.step()  # update weights based on accumulated gradients

            current_step += 1

        if val_dataloader is not None:
            val = validation(val_dataloader, model)
            if val < minval:
                minval = val
                torch.save(model, "best_model.pth")

        # Step the scheduler
        scheduler.step()

## Validation

In [None]:
def validation(val_dataloader, model,size):
    model = model.to(DEVICE)  # this will bring the network to GPU if DEVICE is cuda
    model.train(False)  # Set Network to evaluation mode

    running_corrects = 0
    for images, labels in tqdm(val_dataloader):
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        # Forward Pass
        outputs = model(images)

        # Get predictions
        _, preds = torch.max(outputs.data, 1)

        # Update Corrects
        running_corrects += torch.sum(preds == labels.data).data.item()

    # Calculate Accuracy
    accuracy = running_corrects / float(size)

    print("Validation Accuracy: {}".format(accuracy))
    return accuracy

## Testing

In [None]:
def test(test_dataloader, model, size):
    model = model.to(DEVICE)  # this will bring the network to GPU if DEVICE is cuda
    model.train(False)  # Set Network to evaluation mode

    running_corrects = 0
    for images, labels in tqdm(test_dataloader):
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        # Forward Pass
        outputs = net(images)

        # Get predictions
        _, preds = torch.max(outputs.data, 1)

        # Update Corrects
        running_corrects += torch.sum(preds == labels.data).data.item()

    # Calculate Accuracy
    accuracy = running_corrects / float(size)

    print("Test Accuracy: {}".format(accuracy))

# From Scratch

**Prepare Network**

In [None]:
net = alexnet() # Loading AlexNet model

# AlexNet has 1000 output neurons, corresponding to the 1000 ImageNet's classes
# We need 101 outputs for Caltech-101
net.classifier[6] = nn.Linear(4096, NUM_CLASSES) # nn.Linear in pytorch is a fully connected layer
                                                 # The convolutional layer is nn.Conv2d

# We just changed the last layer of AlexNet with a new fully connected layer with 101 outputs
# It is strongly suggested to study torchvision.models.alexnet source code

In [None]:
# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;)
parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

In [None]:
import time
start=time.time()
(train_dataloader,train_size), (val_dataloader, val_size), (test_dataloader,test_size) = load_data(validation=True)
train(train_dataloader, net, criterion, optimizer, scheduler, val_dataloader)
test(test_dataloader, net)
end=time.time()
print("Time taken:",end-start)

Train Dataset: 4572
Valid Dataset: 1524
Test Dataset: 3049




Starting epoch 1/30, LR = [0.001]




Step 0, Loss 4.627164840698242
Step 10, Loss 4.625743865966797


100%|██████████| 6/6 [00:05<00:00,  1.12it/s]


Validation Accuracy: 2.0
Starting epoch 2/30, LR = [0.001]
Step 20, Loss 4.6240925788879395
Step 30, Loss 4.621675968170166


100%|██████████| 6/6 [00:05<00:00,  1.16it/s]

Validation Accuracy: 22.166666666666668
Starting epoch 3/30, LR = [0.001]





Step 40, Loss 4.616789817810059
Step 50, Loss 4.615464687347412


100%|██████████| 6/6 [00:05<00:00,  1.14it/s]

Validation Accuracy: 22.166666666666668
Starting epoch 4/30, LR = [0.001]





Step 60, Loss 4.613379955291748


 67%|██████▋   | 4/6 [00:05<00:02,  1.35s/it]


KeyboardInterrupt: 

# Transfer Learning