In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

#
# Declare everything
# 

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
}

# Datasets are images stored in `train/` and `val/` folders
data_dir = '/content/drive/MyDrive/neural_network_data/hymenoptera_data'

sets = ['train', 'val'] # Names of each dataset

image_datasets = { x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) 
                for x in sets}

dataloaders = { x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=0) 
                for x in sets}

dataset_sizes = { x: len(image_datasets[x]) for x in sets} # dictionary that holds length of each data set (train, val)
class_names = image_datasets['train'].classes

#
# Training loop
#

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    '''
        Basic training loop.

        Params:
        model - NN model, holds layers and forward pass function
        criterion - loss function, handles backprop and calculates loss
        optimizer - optimization algorithm, updates params and optim steps
        scheduler - updates learning rate as our loss changes
        num_epochs - how many train/eval loops we should do
    '''

    # Track how long training took
    since = time.time()

    # We will save the best model state in case we move away from it
    best_model_wts = copy.deepcopy(model.state_dict())

    best_acc = 0.0

    for epoch in range(num_epochs):
        # Begin training loops
        print(f'Epoch: {epoch+1}')

        # The training phase will go first
        # then an evaluation phase
        # then epoch will end
        for phase in sets:
            if phase == 'train':
                model.train()
            elif phase == 'val':
                model.eval()
        
            running_loss = 0.0
            running_corrects = 0.0
            
            # Use dataloader to split dataset into batches
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Forward pass
                # track track gradients if train phase
                with torch.set_grad_enabled(phase == 'train'):           
                    output = model(inputs)
                    _, preds = torch.max(output, 1)
                    loss = criterion(output, labels)

                    # Backward pass and optim
                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            # If it is the end of the training phase update learning rate
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'Phase: {phase}, Loss: {epoch_loss}, Epoch Loss: {epoch_acc}')

            # Deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                '''
                    If it is the training phase 
                    AND the accuracy from this epoch is better than the currect best

                    Update best accuracy
                    Copy model `state_dict`
                '''
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training completed in {(time_elapsed // 60):.0f}m {(time_elapsed % 60):.0f}s')
    print(f'Best val Acc: {best_acc:.4f}')

    # Load best model weights
    model.load_state_dict(best_model_wts)
    return model

#
# Transfer learning
#

# Finetuning the Convnet
# Load the pretrained model and reset the fully connected layer
# When gradients are calculated weights are tuned through out the whole network
# This is finetuning
finetune_model = models.resnet18(pretrained=True)

num_features = finetune_model.fc.in_features 
# reassign model.fc || output size is 2 (ants, bees)
finetune_model.fc = nn.Linear(in_features=num_features, out_features=2) 

finetune_model.to(device)

# Criterion
criterion = nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.SGD(finetune_model.parameters(), lr=0.001)

# Step scheduler 
# every 7 epochs lr=lr*gamma
step_lr_scheduler = lr_scheduler.StepLR( 
    optimizer=optimizer, step_size=7, gamma=0.1)

print('Training for finetuning')
finetune_model = train_model(
    finetune_model, criterion, optimizer, scheduler=step_lr_scheduler, num_epochs=5)

print()
print()

# Fixed feature extractor model
# Freeze gradients in ConvNet so that only the 
# fully connected layer has gradients computed during backprop
ff_model = models.resnet18(pretrained=True)

# Freeze ConvNet
for param in ff_model.parameters():
    param.requires_grad = False

num_features = ff_model.fc.in_features 
# reassign model.fc || output size is 2 (ants, bees)
ff_model.fc = nn.Linear(in_features=num_features, out_features=2) 

ff_model.to(device)

# Criterion
criterion = nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.SGD(ff_model.parameters(), lr=0.001)

# Step scheduler 
# every 7 epochs lr=lr*gamma
step_lr_scheduler = lr_scheduler.StepLR( 
    optimizer=optimizer, step_size=7, gamma=0.1)

print('Training for fixed feature extractor')
ff_model = train_model(
    ff_model, criterion, optimizer, scheduler=step_lr_scheduler, num_epochs=5)



Training for finetuning
Epoch: 1
Phase: train, Loss: 0.5780056065223256, Epoch Loss: 0.6844262295081968
Phase: val, Loss: 0.4445327759957781, Epoch Loss: 0.8562091503267973

Epoch: 2
Phase: train, Loss: 0.5315434365976052, Epoch Loss: 0.7295081967213115
Phase: val, Loss: 0.3638048918029062, Epoch Loss: 0.9084967320261438

Epoch: 3
Phase: train, Loss: 0.4505011085115495, Epoch Loss: 0.7991803278688525
Phase: val, Loss: 0.2901371446699878, Epoch Loss: 0.9150326797385621

Epoch: 4
Phase: train, Loss: 0.4464163724027696, Epoch Loss: 0.7909836065573771
Phase: val, Loss: 0.25486304849581004, Epoch Loss: 0.9411764705882353

Epoch: 5
Phase: train, Loss: 0.38370965195239565, Epoch Loss: 0.8483606557377049
Phase: val, Loss: 0.22569214912995794, Epoch Loss: 0.9411764705882353

Training completed in 6m 33s
Best val Acc: 0.9412
Training for fixed feature extractor
Epoch: 1
Phase: train, Loss: 0.6215453436140155, Epoch Loss: 0.6721311475409836
Phase: val, Loss: 0.4959676240394318, Epoch Loss: 0.7712