In [0]:
#!pip3 install 'torch==1.4.0'
#!pip3 install 'torchvision==0.5.0'
#!pip3 install 'Pillow-SIMD'
#!pip3 install 'tqdm'

Collecting torch==1.4.0
[?25l  Downloading https://files.pythonhosted.org/packages/24/19/4804aea17cd136f1705a5e98a00618cb8f6ccc375ad8bfa437408e09d058/torch-1.4.0-cp36-cp36m-manylinux1_x86_64.whl (753.4MB)
[K     |████████████████████████████████| 753.4MB 19kB/s 
[31mERROR: torchvision 0.6.0+cu101 has requirement torch==1.5.0, but you'll have torch 1.4.0 which is incompatible.[0m
[?25hInstalling collected packages: torch
  Found existing installation: torch 1.5.0+cu101
    Uninstalling torch-1.5.0+cu101:
      Successfully uninstalled torch-1.5.0+cu101
Successfully installed torch-1.4.0
Collecting torchvision==0.5.0
[?25l  Downloading https://files.pythonhosted.org/packages/7e/90/6141bf41f5655c78e24f40f710fdd4f8a8aff6c8b7c6f0328240f649bdbe/torchvision-0.5.0-cp36-cp36m-manylinux1_x86_64.whl (4.0MB)
[K     |████████████████████████████████| 4.0MB 8.2MB/s 
Installing collected packages: torchvision
  Found existing installation: torchvision 0.6.0+cu101
    Uninstalling torchvision-0



Cloning into 'Homework2-Caltech101'...
remote: Enumerating objects: 18, done.[K
remote: Counting objects: 100% (18/18), done.[K
remote: Compressing objects: 100% (12/12), done.[K
remote: Total 9280 (delta 10), reused 14 (delta 6), pack-reused 9262[K
Receiving objects: 100% (9280/9280), 129.49 MiB | 34.07 MiB/s, done.
Resolving deltas: 100% (18/18), done.
Checking out files: 100% (9149/9149), done.


In [0]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet

from PIL import Image
from tqdm import tqdm

from sklearn.model_selection import train_test_split
import numpy as np

In [0]:
# Clone github repository with data
if not os.path.isdir('./Caltech101'):
	!git clone https://github.com/rm-wu/Homework2-Caltech101.git
	!mv 'Homework2-Caltech101' 'Caltech101'

from Caltech101.caltech_dataset import Caltech
from torch.utils.data.dataset import random_split

In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 102 # 101 + 1: There is an extra Background class that should be removed 

BATCH_SIZE = 256    # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
					# the batch size, learning rate should change by the same factor to have comparable results

LR = 1e-3           # The initial Learning Rate
MOMENTUM = 0.9      # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5 # Regularization, you can keep this at the default

NUM_EPOCHS = 30     # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20      # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1         # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

In [0]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalizes tensor with mean and standard deviation
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])

In [0]:
DATA_DIR = './Caltech101'

In [0]:
#from Caltech101.caltech_dataset import Caltech#
#from torch.utils.data.dataset import random_split

# Prepare Pytorch train/test Datasets
train_dataset_ = Caltech(DATA_DIR, split='train',  transform=train_transform)
test_dataset= Caltech(DATA_DIR, split='test', transform=eval_transform)

#lengths = [int(len(train_dataset)*0.5), int(len(train_dataset)*0.5)]
#train_dataset, val_dataset = random_split(train_dataset, lengths)

train_idx, val_idx = train_test_split(np.arange(0, len(train_dataset_)), train_size=0.5,
									shuffle=True, random_state=42, stratify=train_dataset_.y)
train_dataset = Subset(train_dataset_, train_idx)
val_dataset = Subset(train_dataset_, val_idx)


#train_indexes = # split the indices for your train split
#val_indexes = # split the indices for your val split

#train_dataset = Subset(train_dataset, train_indexes)
#val_dataset = Subset(train_dataset, val_indexes)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Valid Dataset: {}'.format(len(val_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 2892
Valid Dataset: 2892
Test Dataset: 2893


In [0]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [0]:
net = alexnet() # Loading AlexNet model

# AlexNet has 1000 output neurons, corresponding to the 1000 ImageNet's classes
# We need 101 outputs for Caltech-101
net.classifier[6] = nn.Linear(4096, NUM_CLASSES) # nn.Linear in pytorch is a fully connected layer
                                                 # The convolutional layer is nn.Conv2d

# We just changed the last layer of AlexNet with a new fully connected layer with 101 outputs
# It is strongly suggested to study torchvision.models.alexnet source code

In [0]:
LR = 1e-2           # The initial Learning Rate
MOMENTUM = 0.9      # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 1e-4 # Regularization, you can keep this at the default

NUM_EPOCHS = 45     # Total number of training epochs (iterations over dataset)
STEP_SIZE = 30      # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1         # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

In [0]:
# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 
parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

In [26]:
import copy
#import time

dataloader = {'train': train_dataloader, 'val': val_dataloader}
dataset = {'train': train_dataset, 'val': val_dataset}
path_1 = "./Caltech101/weights.pth" #TODO: for all paths


val_info = [[], []]
train_info = [[], []]


def train_model(model, criterion, optimizer, scheduler, num_epochs=30, save_weights=False, ):
    #since = time.time()
    cudnn.benchmark
    model.to(DEVICE)
    best_model_w = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs} LR = {scheduler.get_last_lr()}")
        print('=' * 50)
        
        for phase in ['train', 'val']:
            #print(phase)
            if phase == 'train':
                model.train()
            else:
                model.eval()
                
            running_loss = 0.0
            running_corrects = 0
            
            for images, labels in dataloader[phase]:
                images = images.to(DEVICE)
                labels = labels.to(DEVICE)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase=='train'):
                    outputs = model(images)
                    _, preds = torch.max(outputs.data, 1)
                    loss = criterion(outputs, labels)
                    
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        
                running_loss += loss.item() * images.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
            if phase == 'train':
                scheduler.step()
            epoch_loss = running_loss / len(dataset[phase])
            epoch_acc = running_corrects.double() / len(dataset[phase])

            if phase == 'train':
                train_info[0].append(epoch_loss)
                train_info[1].append(epoch_acc)
            else:
                val_info[0].append(epoch_loss)
                val_info[1].append(epoch_acc)
            
            print(f"{phase}\tLoss: {epoch_loss:.4f}\tAcc: {epoch_acc:.4f}")
            
            if phase == 'val'and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_w = copy.deepcopy(model.state_dict())
                if save_weights:
                    torch.save(net.state_dict(), path_1)

        print()
    
    print('Best val Acc: {:4f}'.format(best_acc))
    
    # load best model weights
    model.load_state_dict(best_model_w)
    return model




model_scratch = train_model(net, criterion, optimizer, scheduler, num_epochs=NUM_EPOCHS)


Epoch 1/45 LR = [0.01]
train	Loss: 4.4988	Acc: 0.0526
val	Loss: 4.6106	Acc: 0.0920

Epoch 2/45 LR = [0.01]
train	Loss: 4.4779	Acc: 0.0906
val	Loss: 4.5826	Acc: 0.0920

Epoch 3/45 LR = [0.01]
train	Loss: 4.4419	Acc: 0.0906
val	Loss: 4.5274	Acc: 0.0920

Epoch 4/45 LR = [0.01]
train	Loss: 4.2945	Acc: 0.0899
val	Loss: 4.2486	Acc: 0.0920

Epoch 5/45 LR = [0.01]
train	Loss: 4.1391	Acc: 0.0947
val	Loss: 4.2159	Acc: 0.0920

Epoch 6/45 LR = [0.01]
train	Loss: 4.0968	Acc: 0.0864
val	Loss: 4.2095	Acc: 0.0923

Epoch 7/45 LR = [0.01]
train	Loss: 4.0751	Acc: 0.0882
val	Loss: 4.1299	Acc: 0.1058

Epoch 8/45 LR = [0.01]
train	Loss: 4.0174	Acc: 0.1252
val	Loss: 4.0822	Acc: 0.1470

Epoch 9/45 LR = [0.01]
train	Loss: 3.9523	Acc: 0.1442
val	Loss: 4.0410	Acc: 0.1971

Epoch 10/45 LR = [0.01]
train	Loss: 3.8916	Acc: 0.1805
val	Loss: 3.9226	Acc: 0.1943

Epoch 11/45 LR = [0.01]
train	Loss: 3.7981	Acc: 0.1909
val	Loss: 3.8384	Acc: 0.2123

Epoch 12/45 LR = [0.01]
train	Loss: 3.7159	Acc: 0.2106
val	Loss: 3.7216	Ac

In [0]:
torch.save(net.state_dict(), path_1)

In [0]:
len(train_dataset.dataset)

5784