In [1]:
from IPython.core.interactiveshell import InteractiveShell
# import seaborn as sns
# PyTorch
from torchvision import transforms, datasets, models
import torch
from torch import optim, cuda
from torch.utils.data import DataLoader, sampler
import torch.nn as nn

from scipy.spatial import distance
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

# Data science tools
import numpy as np
import pandas as pd
import os

# Image manipulations
from PIL import Image
# Useful for examining network
from torchsummary import summary
# Timing utility
from timeit import default_timer as timer

# Visualizations
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['font.size'] = 14

# Printing out all outputs
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
checkpoint_path = 'models/model2.pth'
save_file_name = 'models/model2.pt'
# Change to fit hardware
batch_size = 64
# Whether to train on a gpu
train_on_gpu = cuda.is_available()
train_on_gpu

# Image transformations
image_transforms = {
    # Train uses data augmentation
    'train': transforms.Compose([
        transforms.Resize(256),
        transforms.RandomRotation(45),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    # Test does not use augmentation
    'test':
    transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
}

True

In [3]:
# Datasets from each folder
data = {
    'train':
    datasets.ImageFolder(root=  "data/Train",      transform=image_transforms['train']),
    'valid':
    datasets.ImageFolder(root=  "data/Validation", transform=image_transforms['valid']),
    'test':
    datasets.ImageFolder(root=  "data/Test",       transform=image_transforms['test'])
}

dataloaders = {
    'train': DataLoader(data['train'], batch_size=batch_size, shuffle=True),
    'valid': DataLoader(data['valid'], batch_size=batch_size, shuffle=True),
    'test': DataLoader(data['test'], batch_size=batch_size, shuffle=True)
}

In [4]:
n_classes = len(data['train'].classes)
n_classes
trainiter = iter(dataloaders['train'])
features, labels = next(trainiter)
features.shape, labels.shape

199

(torch.Size([64, 3, 224, 224]), torch.Size([64]))

In [5]:
 ### Execute below cell only when you make new model. If you want to load a saved model, then run load_checkpoint method

In [5]:
model = models.resnet18(pretrained=True)
#res_model.to(device)
for param in model.parameters():
    param.requires_grad = False

num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 199)
model.load_state_dict(torch.load('models/model2.pt'))

if train_on_gpu:
    model = model.to('cuda')

summary(model, input_size=(3,224,224), batch_size=batch_size)

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [64, 64, 112, 112]           9,408
       BatchNorm2d-2         [64, 64, 112, 112]             128
              ReLU-3         [64, 64, 112, 112]               0
         MaxPool2d-4           [64, 64, 56, 56]               0
            Conv2d-5           [64, 64, 56, 56]          36,864
       BatchNorm2d-6           [64, 64, 56, 56]             128
              ReLU-7           [64, 64, 56, 56]               0
            Conv2d-8           [64, 64, 56, 56]          36,864
       BatchNorm2d-9           [64, 64, 56, 56]             128
             ReLU-10           [64, 64, 56, 56]               0
       BasicBlock-11           [64, 64, 56, 56]               0
           Conv2d-12           [64, 64, 56, 56]          36,864
      BatchNorm2d-13           [64, 64, 56, 56]             128
             ReLU-14           [64, 64,

In [6]:
def accuracy(output, target, topk=(1, )):
    """Compute the topk accuracy(s)"""
    if train_on_gpu:
        output = output.to('cuda')
        target = target.to('cuda')

    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        # Find the predicted classes and transpose
        _, pred = output.topk(k=maxk, dim=1, largest=True, sorted=True)
        pred = pred.t()

        # Determine predictions equal to the targets
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []

        # For each k, find the percentage of correct
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size).item())
        return res

In [18]:
testiter = iter(dataloaders['test'])
# Get a batch of testing images and labels
features, targets = next(testiter)

print('Accuracy Before Training')
if train_on_gpu:
    accuracy(model(features.to('cuda')), targets, topk=(1, 5))
else:
    accuracy(model(features), targets, topk=(1, 5))

Accuracy Before Training


[65.625, 85.9375]

In [23]:
#In case you load a saved model, it will return an optimizer as well so no need to define here

# criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.00001, momentum=0.99, weight_decay=5e-4)

In [20]:
def train(model,
          criterion,optimizer,train_loader,valid_loader,save_file_name,max_epochs_stop=3,n_epochs=20,print_every=1):

    # Early stopping intialization
    epochs_no_improve = 0
    valid_loss_min = np.Inf

    valid_max_acc = 0
    history = []

    # Number of epochs already trained (if using loaded in model weights)
    try:
        print(f'Model has been trained for: {model.epochs} epochs.\n')
    except:
        model.epochs = 0
        print(f'Starting Training from Scratch.\n')

    overall_start = timer()

    # Main loop
    for epoch in range(n_epochs):

        # keep track of training and validation loss each epoch
        train_loss = 0.0
        valid_loss = 0.0

        train_acc = 0
        valid_acc = 0

        # Set to training
        model.train()
        start = timer()
        
#         if(epoch >20 and epoch <= 40):
#             optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
#         if(epoch >= 1 and epoch <= 20):
#             optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)
#         if(epoch >20 and epoch <= 40):
#             optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9, weight_decay=5e-4)
#         if(epoch >40 and epoch <= 60):
#             optimizer = optim.SGD(model.parameters(), lr=0.00001, momentum=0.9, weight_decay=5e-4)

        
        # Training loop
        for ii, (data, target) in enumerate(train_loader):
            # Tensors to gpu
            if train_on_gpu:
                data, target = data.cuda(), target.cuda()

            # Clear gradients
            optimizer.zero_grad()
            # Predicted outputs are log probabilities
            output = model(data)

            # Loss and backpropagation of gradients
            loss = criterion(output, target)
            loss.backward()

            # Update the parameters
            optimizer.step()

            # Track train loss by multiplying average loss by number of examples in batch
            train_loss += loss.item() * data.size(0)

            # Calculate accuracy by finding max log probability
            _, pred = torch.max(output, dim=1)
            correct_tensor = pred.eq(target.data.view_as(pred))
            # Need to convert correct tensor from int to float to average
            accuracy = torch.mean(correct_tensor.type(torch.FloatTensor))
            # Multiply average accuracy times the number of examples in batch
            train_acc += accuracy.item() * data.size(0)

            # Track training progress
            print(
                f'Epoch: {epoch}\t{100 * (ii + 1) / len(train_loader):.2f}% complete. {timer() - start:.2f} seconds elapsed in epoch.',
                end='\r')

        # After training loops ends, start validation
        else:
            model.epochs += 1

            # Don't need to keep track of gradients
            with torch.no_grad():
                # Set to evaluation mode
                model.eval()

                # Validation loop
                for data, target in valid_loader:
                    # Tensors to gpu
                    if train_on_gpu:
                        data, target = data.cuda(), target.cuda()

                    # Forward pass
                    output = model(data)

                    # Validation loss
                    loss = criterion(output, target)
                    # Multiply average loss times the number of examples in batch
                    valid_loss += loss.item() * data.size(0)

                    # Calculate validation accuracy
                    _, pred = torch.max(output, dim=1)
                    correct_tensor = pred.eq(target.data.view_as(pred))
                    accuracy = torch.mean(
                        correct_tensor.type(torch.FloatTensor))
                    # Multiply average accuracy times the number of examples
                    valid_acc += accuracy.item() * data.size(0)

                # Calculate average losses
                train_loss = train_loss / len(train_loader.dataset)
                valid_loss = valid_loss / len(valid_loader.dataset)

                # Calculate average accuracy
                train_acc = train_acc / len(train_loader.dataset)
                valid_acc = valid_acc / len(valid_loader.dataset)

                history.append([train_loss, valid_loss, train_acc, valid_acc])

                # Print training and validation results
                if (epoch + 1) % print_every == 0:
                    print(
                        f'\nEpoch: {epoch} \tTraining Loss: {train_loss:.4f} \tValidation Loss: {valid_loss:.4f}'
                    )
                    print(
                        f'\t\tTraining Accuracy: {100 * train_acc:.2f}%\t Validation Accuracy: {100 * valid_acc:.2f}%'
                    )

                # Save the model if validation loss decreases
                if valid_loss < valid_loss_min:
                    # Save model
                    torch.save(model.state_dict(), save_file_name)
                    # Track improvement
                    epochs_no_improve = 0
                    valid_loss_min = valid_loss
                    valid_best_acc = valid_acc
                    best_epoch = epoch

                # Otherwise increment count of epochs with no improvement
                else:
                    epochs_no_improve += 1
                    # Trigger early stopping
                    if epochs_no_improve >= max_epochs_stop:
                        print(
                            f'\nEarly Stopping! Total epochs: {epoch}. Best epoch: {best_epoch} with loss: {valid_loss_min:.2f} and acc: {100 * valid_acc:.2f}%'
                        )
                        total_time = timer() - overall_start
                        print(
                            f'{total_time:.2f} total seconds elapsed. {total_time / (epoch+1):.2f} seconds per epoch.'
                        )

                        # Load the best state dict
                        model.load_state_dict(torch.load(save_file_name))
                        # Attach the optimizer
                        model.optimizer = optimizer

                        # Format history
                        history = pd.DataFrame(
                            history,
                            columns=[
                                'train_loss', 'valid_loss', 'train_acc',
                                'valid_acc'
                            ])
                        return model, history

    # Attach the optimizer
    model.optimizer = optimizer
    # Record overall time and print out stats
    total_time = timer() - overall_start
    print(
        f'\nBest epoch: {best_epoch} with loss: {valid_loss_min:.2f} and acc: {100 * valid_acc:.2f}%'
    )
    print(
        f'{total_time:.2f} total seconds elapsed. {total_time / (epoch):.2f} seconds per epoch.'
    )
    # Format history
    history = pd.DataFrame(
        history,
        columns=['train_loss', 'valid_loss', 'train_acc', 'valid_acc'])
    return model, history

In [12]:
def save_checkpoint(model, path):
    # Basic details
    
    model.class_to_idx = data['train'].class_to_idx
    model.idx_to_class = {
    idx: class_
    for class_, idx in model.class_to_idx.items()
    }
    checkpoint = {
        'class_to_idx': model.class_to_idx,
        'idx_to_class': model.idx_to_class,
        'epochs': model.epochs,
    }

    checkpoint['state_dict'] = model.state_dict()
    
    # Add the optimizer
    checkpoint['optimizer'] = model.optimizer
    checkpoint['optimizer_state_dict'] = model.optimizer.state_dict()

    # Save the data to the path
    torch.save(checkpoint, path)

In [13]:
def load_checkpoint(path):
    # Load in checkpoint
    #checkpoint = torch.load(path)
    checkpoint = torch.load(path, map_location=torch.device('cpu'))
        
    model = models.resnet18(pretrained=True)
    # Make sure to set parameters as not trainable
    for param in model.parameters():
        param.requires_grad = False
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 199)
    
    # Load in the state dict
    model.load_state_dict(checkpoint['state_dict'])

    total_params = sum(p.numel() for p in model.parameters())
    print(f'{total_params} total parameters.')
    total_trainable_params = sum(
        p.numel() for p in model.parameters() if p.requires_grad)
    print(f'{total_trainable_params} total gradient parameters.')

    if train_on_gpu:
        model = model.to('cuda')

    # Model basics
    model.class_to_idx = checkpoint['class_to_idx']
    model.idx_to_class = checkpoint['idx_to_class']
    model.epochs = checkpoint['epochs']

    # Optimizer
    optimizer = checkpoint['optimizer']
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    return model, optimizer

In [None]:
#Here the model is automatically saved by torch.save() in train function (extension of the saved file would be .pt)
#Save_checkpoint method is for saving additional info like model classifier, num of epoch etc

model, history = train(
    model,
    criterion,
    optimizer,
    dataloaders['train'],
    dataloaders['valid'],
    save_file_name=save_file_name, #This will save as .pt file
    max_epochs_stop=25,
    n_epochs=100,
    print_every=1)

Starting Training from Scratch.

Epoch: 0	100.00% complete. 248.44 seconds elapsed in epoch.
Epoch: 0 	Training Loss: 5.1347 	Validation Loss: 4.2704
		Training Accuracy: 15.90%	 Validation Accuracy: 28.65%
Epoch: 1	100.00% complete. 85.70 seconds elapsed in epoch.
Epoch: 1 	Training Loss: 4.1549 	Validation Loss: 3.8034
		Training Accuracy: 30.17%	 Validation Accuracy: 37.43%
Epoch: 2	100.00% complete. 86.02 seconds elapsed in epoch.
Epoch: 2 	Training Loss: 3.9344 	Validation Loss: 3.4916
		Training Accuracy: 34.20%	 Validation Accuracy: 38.68%
Epoch: 3	100.00% complete. 85.52 seconds elapsed in epoch.
Epoch: 3 	Training Loss: 3.8118 	Validation Loss: 3.5497
		Training Accuracy: 36.78%	 Validation Accuracy: 40.33%
Epoch: 4	100.00% complete. 85.57 seconds elapsed in epoch.
Epoch: 4 	Training Loss: 3.5482 	Validation Loss: 3.7060
		Training Accuracy: 38.71%	 Validation Accuracy: 41.11%
Epoch: 5	100.00% complete. 85.36 seconds elapsed in epoch.
Epoch: 5 	Training Loss: 3.6089 	Validatio

In [14]:
save_checkpoint(model, path=checkpoint_path)

In [None]:
# model, optimizer = load_checkpoint('models/model1.pth') # This will save model as .pth file

In [34]:
testiter = iter(dataloaders['test'])
# Get a batch of testing images and labels
features, targets = next(testiter)

if train_on_gpu:
    accuracy(model(features.to('cuda')), targets, topk=(1, 5))
else:
    accuracy(model(features), targets, topk=(1, 5))

[68.75, 85.9375]

In [15]:
# save_checkpoint(model, path=checkpoint_path)

In [30]:
# optimizer = optim.SGD(model.parameters(), lr=0.000001, momentum=0.99, weight_decay=5e-4)

In [15]:
model, history = train(
    model,
    criterion,
    optimizer,
    dataloaders['train'],
    dataloaders['valid'],
    save_file_name=save_file_name, #This will save as .pt file
    max_epochs_stop=25,
    n_epochs=60,
    print_every=1)

Starting Training from Scratch.

Epoch: 0	100.00% complete. 86.55 seconds elapsed in epoch.
Epoch: 0 	Training Loss: 1.2726 	Validation Loss: 1.5766
		Training Accuracy: 67.80%	 Validation Accuracy: 60.62%
Epoch: 1	100.00% complete. 86.56 seconds elapsed in epoch.
Epoch: 1 	Training Loss: 1.2881 	Validation Loss: 1.5887
		Training Accuracy: 67.39%	 Validation Accuracy: 60.41%
Epoch: 2	100.00% complete. 86.44 seconds elapsed in epoch.
Epoch: 2 	Training Loss: 1.2624 	Validation Loss: 1.5740
		Training Accuracy: 67.78%	 Validation Accuracy: 60.41%
Epoch: 3	100.00% complete. 87.01 seconds elapsed in epoch.
Epoch: 3 	Training Loss: 1.3139 	Validation Loss: 1.5848
		Training Accuracy: 66.72%	 Validation Accuracy: 60.02%
Epoch: 4	100.00% complete. 86.72 seconds elapsed in epoch.
Epoch: 4 	Training Loss: 1.3068 	Validation Loss: 1.5755
		Training Accuracy: 67.07%	 Validation Accuracy: 60.45%
Epoch: 5	100.00% complete. 86.52 seconds elapsed in epoch.
Epoch: 5 	Training Loss: 1.2923 	Validation

KeyboardInterrupt: 

In [25]:
save_checkpoint(model, path=checkpoint_path)

In [24]:
model, history = train(
    model,
    criterion,
    optimizer,
    dataloaders['train'],
    dataloaders['valid'],
    save_file_name=save_file_name, #This will save as .pt file
    max_epochs_stop=25,
    n_epochs=2,
    print_every=1)

Model has been trained for: 44 epochs.

Epoch: 0	100.00% complete. 86.39 seconds elapsed in epoch.
Epoch: 0 	Training Loss: 1.2777 	Validation Loss: 1.5675
		Training Accuracy: 67.48%	 Validation Accuracy: 60.36%
Epoch: 1	100.00% complete. 86.27 seconds elapsed in epoch.
Epoch: 1 	Training Loss: 1.2365 	Validation Loss: 1.5526
		Training Accuracy: 68.44%	 Validation Accuracy: 60.75%

Best epoch: 1 with loss: 1.55 and acc: 60.75%
216.04 total seconds elapsed. 216.04 seconds per epoch.
