
**Install requirements**

In [14]:
!pip3 install torch torchvision torchaudio



In [15]:
!pip3 install 'tqdm'



**Import libraries**

In [16]:
import os
import logging
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet

from PIL import Image
from tqdm import tqdm

**Set Arguments**

In [17]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 102 # 101 + 1: There is am extra Background class that should be removed

BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 1e-3            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 10      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

**Define Data Preprocessing**

In [18]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) # Normalizes tensor with mean and standard deviation
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

**Prepare Dataset**

In [19]:
from typing import Hashable
from torchvision.datasets import VisionDataset

from PIL import Image

import os
import os.path
import sys


def pil_loader(path):
    #print(path)
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')


class Caltech(VisionDataset):
    def __init__(self, root, split='train', transform=None, target_transform=None):
        super(Caltech, self).__init__(root, transform=transform, target_transform=target_transform)
        # root is the directory from which we take the data

        self.split = split # This defines the split you are going to use
                           # (split files are called 'train.txt' and 'test.txt')
                            # for ex. split="train" means we are currently defining a Caltech object for the training phase
                            #the alternative would be "test"
        self.samples = []
        self.labels = []
        self.classes = {}
        self.indice = 0


        if split == "train":
          listdir = os.path.join("Caltech101", "train.txt")
        else:
          listdir = os.path.join("Caltech101", "test.txt")

        #print("list_dir: ",listdir)
        file_list = open(listdir,'r')
        # Itera su tutte le sottocartelle nella cartella root
        for file_name in file_list:
            #print("file_name: ",file_name)
            dir_name = file_name.split("/")[0]
            if dir_name not in self.classes:
              self.classes[dir_name] = self.indice
              self.indice +=1
            #print("dir_name: ",dir_name)
            if dir_name != "BACKGROUND_Google":
                dir_path = os.path.join(root, dir_name)
                #print("dir_path: ",dir_path)
                # Assicurati che sia effettivamente una cartella e non un file
                if os.path.isdir(dir_path):
                    # Itera su tutti i file nella sottocartella
                        file_path = os.path.join(root, file_name).split("\n")[0]
                        #print("file_path: ",file_path)
                        # Aggiungi il percorso del file a samples
                        self.samples.append(file_path)
                        # Aggiungi il nome della sottocartella a classes
                        self.labels.append(self.classes[dir_name])

        '''
        - Here you should implement the logic for reading the splits files and accessing elements
        - If the RAM size allows it, it is faster to store all data in memory
        - PyTorch Dataset classes use indexes to read elements
        - You should provide a way for the __getitem__ method to access the image-label pair
          through the index
        - Labels should start from 0, so for Caltech you will have lables 0...100 (excluding the background class)
        '''



    def __getitem__(self, index):
        '''
        __getitem__ should access an element through its index
        Args:
            index (int): Index

        Returns:
            tuple: (sample, target) where target is class_index of the target class.
        '''

       # Provide a way to access image and label via index
                           # Image should be a PIL Image
                           # label can be int
        image_path = self.samples[index]
        label = self.labels[index]

        image = pil_loader(image_path)
        # Applies preprocessing when accessing the image
        if self.transform is not None:
            image = self.transform(image)



        return image, label

    def __len__(self):
        '''
        The __len__ method returns the length of the dataset
        It is mandatory, as this is used by several other components
        '''
        length = len(self.samples) # Provide a way to get the length (number of elements) of the dataset
        return length

In [20]:
from numpy.lib.index_tricks import IndexExpression
# Clone github repository with data
import pathlib
import sklearn.model_selection as ms
print(pathlib.Path().resolve())
if not os.path.isdir('./Caltech101'):
  !git clone https://github.com/MachineLearning2020/Homework2-Caltech101.git
  !mv 'Homework2-Caltech101' 'Caltech101'

DATA_DIR = 'Caltech101/101_ObjectCategories'

# Prepare Pytorch train/test Datasets
train_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform)
test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)

print('Initial Train Dataset: {}'.format(len(train_dataset)))
#img,label= train_dataset.__getitem__(42)
indexes = list(range(train_dataset.__len__()))
train_indexes,val_indexes = ms.train_test_split(indexes) # split the indices for your train split

val_dataset = Subset(train_dataset, val_indexes)
train_dataset = Subset(train_dataset, train_indexes)


# Check dataset sizes
print('Actual Train Dataset: {}'.format(len(train_dataset)))
print('Valid Dataset: {}'.format(len(val_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))


/content
Initial Train Dataset: 5784
Actual Train Dataset: 4338
Valid Dataset: 1446
Test Dataset: 2893


**Prepare Dataloaders**

In [21]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4,drop_last=True)

test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)



**Prepare Network**

In [22]:
net = alexnet(weights= 'IMAGENET1K_V1') # Loading AlexNet model

# AlexNet has 1000 output neurons, corresponding to the 1000 ImageNet's classes
# We need 101 outputs for Caltech-101

#4096 is the number of features the FNN accepts as input
net.classifier[6] = nn.Linear(4096, NUM_CLASSES) # nn.Linear in pytorch is a fully connected layer
                                                 # The convolutional layer is nn.Conv2d

# We just changed the last layer of AlexNet with a new fully connected layer with 101 outputs
# It is strongly suggested to study torchvision.models.alexnet source code

**Prepare Training**

In [23]:
# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;)
parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)
print(len(train_dataloader)) #it will print the number of batches
#here prints 16, because BATCH_SIZE=265 so 265*16=496 as the arg in Linear above

16


**Train**

In [24]:
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime

current_step = 0
# Start iterating over the epochs
for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

  # Iterate over the dataset
  for images, labels in train_dataloader:   #iterate over the batches
    # Bring data over the device of choice

    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    outputs = net(images)

    # Compute loss based on output and ground truth
    loss = criterion(outputs, labels)

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    # Compute gradients for each layer and update weights
    loss.backward()  # backward pass: computes gradients
    optimizer.step() # update weights based on accumulated gradients

    current_step += 1

  # Step the scheduler
  scheduler.step()

Starting epoch 1/10, LR = [0.001]




Step 0, Loss 4.709515571594238
Step 10, Loss 3.298811674118042
Starting epoch 2/10, LR = [0.001]
Step 20, Loss 2.473912477493286
Step 30, Loss 1.8796838521957397
Starting epoch 3/10, LR = [0.001]
Step 40, Loss 1.3452523946762085
Starting epoch 4/10, LR = [0.001]
Step 50, Loss 0.9331698417663574
Step 60, Loss 0.5287448763847351
Starting epoch 5/10, LR = [0.001]
Step 70, Loss 0.4764983057975769
Starting epoch 6/10, LR = [0.001]
Step 80, Loss 0.29053783416748047
Step 90, Loss 0.358356237411499
Starting epoch 7/10, LR = [0.001]
Step 100, Loss 0.3183382451534271
Step 110, Loss 0.23424965143203735
Starting epoch 8/10, LR = [0.001]
Step 120, Loss 0.2037646323442459
Starting epoch 9/10, LR = [0.001]
Step 130, Loss 0.20551839470863342
Step 140, Loss 0.1752626746892929
Starting epoch 10/10, LR = [0.001]
Step 150, Loss 0.1000041663646698


In [25]:
print(len(val_dataset.dataset))

5784


**Validation**

In [26]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
#print("Ciao",len(val_dataloader))

# for images, labels in tqdm(train_dataloader):

#  # print(len(images))
#   #print(len(labels))
#   images = images.to(DEVICE)
#   labels = labels.to(DEVICE)

for images, labels in tqdm(val_dataloader):
  #print(images)
  #print(labels)
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(val_dataset))
print('Validation Accuracy: {}'.format(accuracy))

100%|██████████| 5/5 [00:06<00:00,  1.36s/it]

Validation Accuracy: 0.7510373443983402





**Test**

In [27]:
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(test_dataset))

print('Test Accuracy: {}'.format(accuracy))

100%|██████████| 12/12 [00:14<00:00,  1.21s/it]

Test Accuracy: 0.8461804355340476



