## 1 - Data preparation

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn
from torchvision import transforms
from torchvision.models import alexnet
from torchvision.models import resnet34, vgg16
from caltech_dataset import Caltech
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [9]:
DEVICE = "cuda"

NUM_CLASSES = 101
BATCH_SIZE = 64
LR = 0.001
MOMENTUM = 0.9
WEIGHT_DECAY = 5e-5
NUM_EPOCHS = 30
STEP_SIZE = 20
GAMMA = 0.1
LOG_FREQUENCY = 10
DATA_DIR = '101_ObjectCategories/'

#### Define transformations

In [3]:
train_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                      ])

eval_transform = transforms.Compose([transforms.Resize(256),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                     ])

#### Data preparation

In [4]:
train_val_dataset = Caltech(DATA_DIR, split='train', transform=train_transform)

test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)

train_indexes, val_indexes = train_test_split(train_val_dataset.indexes, train_size=0.5, random_state=42,
                                              stratify=train_val_dataset.labels)

# split the indices for the train / val split
train_dataset = Subset(train_val_dataset, train_indexes)
val_dataset = Subset(train_val_dataset, val_indexes)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Valid Dataset: {}'.format(len(val_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

Train Dataset: 2892
Valid Dataset: 2892
Test Dataset: 2893


## 2 - Training from scratch

#### Model loading

In [19]:
net = alexnet() 
net.classifier[6] = nn.Linear(4096, NUM_CLASSES) 
criterion = nn.CrossEntropyLoss()
parameters_to_optimize = net.parameters()
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

#### Training phase

In [None]:
net = net.to(DEVICE)

cudnn.benchmark
current_step = 0
min_accuracy = 0

for epoch in range(NUM_EPOCHS):
    print('Starting epoch {}/{}, LR = {}'.format(epoch + 1, NUM_EPOCHS, scheduler.get_lr()))

    for images, labels in train_dataloader:
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        net.train()  

        optimizer.zero_grad() 

        outputs = net(images)

        loss = criterion(outputs, labels)

        if current_step % LOG_FREQUENCY == 0:
            print('Step {}, Loss {}'.format(current_step, loss.item()))

        loss.backward() 
        optimizer.step() 
        current_step += 1

    net.train(False)
    val_loss_epoch = 0
    val_iter = 0
    val_samples = 0
    numCorr = 0
    for inputs, targets in val_dataloader:
        inputs = inputs.to(DEVICE)
        targets = targets.to(DEVICE)
        val_iter += 1
        val_samples += inputs.size(0)
        optimizer.zero_grad()
        outputs = net(inputs)
        val_loss = criterion(outputs, targets)
        val_loss_epoch += val_loss.data.item()
        _, predicted = torch.max(outputs.data, 1)
        numCorr += torch.sum(predicted == targets.data).data.item()
    val_accuracy = (numCorr / val_samples)
    avg_val_loss = val_loss_epoch / val_iter
    print('Validation: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
    if val_accuracy > min_accuracy:
        min_accuracy = val_accuracy
        best_model = net
    scheduler.step()

#### Validation accuracy

In [None]:
best_model = best_model.to(DEVICE)
best_model.train(False)

running_corrects = 0
for images, labels in tqdm(val_dataloader):
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    # Forward Pass
    outputs = best_model(images)

    # Get predictions
    _, preds = torch.max(outputs.data, 1)

    # Update Corrects
    running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(test_dataset))

print('Validation Accuracy: {}'.format(accuracy))

#### Test accuracy

In [None]:
best_model = best_model.to(DEVICE) 
best_model.train(False) 

running_corrects = 0
for images, labels in tqdm(test_dataloader):
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    # Forward Pass
    outputs = best_model(images)

    # Get predictions
    _, preds = torch.max(outputs.data, 1)

    # Update Corrects
    running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(test_dataset))

print('Test Accuracy: {}'.format(accuracy))


## 3 - Transfer learning

#### Loading pre trained model

In [10]:
net = alexnet(pretrained=True)
net.classifier[6] = nn.Linear(4096, NUM_CLASSES)

criterion = nn.CrossEntropyLoss()
parameters_to_optimize = net.parameters()
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

#### Change the mean and std to the ImageNet ones

In [11]:
train_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
                                      ])

eval_transform = transforms.Compose([transforms.Resize(256),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
                                     ])

#### Run experiments with hyperparameters with this cell

In [21]:
BATCH_SIZE = 128
LR = 0.01
NUM_EPOCHS = 30
STEP_SIZE = 20
GAMMA = 0.05

#### Freeze convolutional layers

In [14]:
conv_layers = [0, 3, 6, 8, 10]
for layer in conv_layers:
    net.features[layer].weight.requires_grad = False

#### Freeze fully connected layers

In [15]:
fc_layers = [1, 4, 6]
for layer in fc_layers:
    net.classifier[layer].weight.requires_grad = False

#### Unfreeze all layers

In [None]:
conv_layers = [0, 3, 6, 8, 10]
fc_layers = [1, 4, 6]
for layer in conv_layers:
    net.features[layer].weight.requires_grad = True
for layer in fc_layers:
    net.classifier[layer].weight.requires_grad = True

## 4 - Data augmentation

#### Run experiments on transformations with this cell

In [18]:
train_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.RandomRotation((-20, +20)),
                                      transforms.ColorJitter(),
                                      transforms.RandomHorizontalFlip(p=0.5),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
                                      ])

## 5 - Beyond Alexnet

#### Resnet34 implementation

In [6]:
net = resnet34(pretrained=True)
net.fc = nn.Linear(512, NUM_CLASSES)

#### VGG16 implementation

In [10]:
net = vgg16(pretrained=True)