In [1]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
import torchvision
import numpy as np
from glob import glob
import cv2

In [7]:
# Take a first look at data to establish typical image sizes etc.
train_dir = "/data/train"

# Get image file names
files = {}
files['train'] = np.array(glob("./data/train/*/*"))
files['valid'] = np.array(glob("./data/valid/*/*"))
files['test'] = np.array(glob("./data/test/*/*"))

# Get number of images in each set
for key, images in files.items():
    print("There are " + str(len(images)) + " " + key + " images")
    
# Get statistics of first 100 images in training set
height = {'max': 0, 'min': np.inf, 'sum': 0}
width = {'max': 0, 'min': np.inf, 'sum': 0}
for image in files['train'][0:20]:
    img = cv2.imread(image)
    if img.shape[0] > height['max']:
        height['max'] = img.shape[0]
    if img.shape[0] < height['min']:
        height['min'] = img.shape[0]
    if img.shape[1] > width['max']:
        width['max'] = img.shape[1]
    if img.shape[1] < width['min']:
        width['min'] = img.shape[1]
    height['sum'] += img.shape[0]
    width['sum'] += img.shape[1]
print("Image height min-mean-max = {}-{}-{}".format(height['min'], height['sum']/20, height['max']))
print("Image width min-mean-max = {}-{}-{}".format(width['min'], width['sum']/20, width['max']))

There are 150 valid images
There are 2000 train images
There are 600 test images
Image height min-mean-max = 767-1105.75-2112
Image width min-mean-max = 1022-1473.15-2816


In [2]:
# Create dataloaders
loaders = {}
train_transforms = transforms.Compose([transforms.RandomRotation(180), 
                                       transforms.Resize(224), 
                                       transforms.CenterCrop(224),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])])
train_folder = datasets.ImageFolder("./data/train", transform = train_transforms)
loaders['train'] = torch.utils.data.DataLoader(train_folder, batch_size=4, shuffle=True)

valid_transforms = transforms.Compose([transforms.Resize(224), 
                                       transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])])
valid_folder = datasets.ImageFolder("./data/valid", transform = valid_transforms)
loaders['valid'] = torch.utils.data.DataLoader(valid_folder, batch_size=4, shuffle=True)

test_transforms = transforms.Compose([transforms.Resize(224), 
                                       transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])])
test_folder = datasets.ImageFolder("./data/test", transform = test_transforms)
loaders['test'] = torch.utils.data.DataLoader(test_folder, batch_size=4, shuffle=True)

In [3]:
print(torch.cuda.memory_allocated())

0


In [4]:
# Create transfer learning network
model = torchvision.models.vgg16(pretrained=True)
model.classifier = nn.Sequential(nn.Linear(25088, 256),
                                nn.ReLU(),
                                nn.Linear(256, 3))
for param in model.features.parameters():
    param.require_grad = False

if torch.cuda.is_available():
    torch.cuda.empty_cache()
    model.cuda()

In [5]:
# Create loss fcn and optimizer
criterion = nn.modules.loss.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.classifier.parameters(), lr = 0.001)

In [6]:
# Training function
def train_model(model, epochs, loaders, criterion, optimizer, save_name):

    use_cuda = torch.cuda.is_available()
    train_count_total = len(loaders['train'].dataset)
    train_batch_size = loaders['train'].batch_size
    valid_count_total = len(loaders['valid'].dataset)
    valid_batch_size = loaders['valid'].batch_size
    valid_loss_best = np.inf
    print_every = 5

    for e in range(1, epochs+1):

        # Training step
        model.train()
        train_loss = 0
        train_count_correct = 0
        for idx, (inputs, targets) in enumerate(loaders['train']):

            if use_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()

            outputs = model.forward(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            train_loss = (train_loss * idx + loss.item())/(idx + 1)
            _, pred = outputs.max(1)
            train_count_correct += np.sum(np.squeeze(pred.eq(targets.view_as(pred))).cpu().numpy())

            if idx % print_every == 0:
                print("Epoch {}/{} - {:.2f}% complete - Train loss = {}".format(e,
                                                                                epochs,
                                                                                100*idx*train_batch_size/train_count_total,
                                                                                train_loss), end='\r')
                
        del inputs, targets, outputs, loss

        # Validation step
        model.eval()
        valid_loss = 0
        valid_count_correct = 0
        for idx, (inputs, targets) in enumerate(loaders['valid']):

            if use_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()

            outputs = model.forward(inputs)
            loss = criterion(outputs, targets)

            valid_loss = (valid_loss * idx + loss.item())/(idx + 1)
            _, pred = outputs.max(1)
            valid_count_correct += np.sum(np.squeeze(pred.eq(targets.view_as(pred))).cpu().numpy())


        # Print epoch summary
        print("Epoch {}/{} - ".format(e, epochs) +
              "Training: Loss = {:.4f}, Acc = {:.2f}% - ".format(train_loss, 100*train_count_correct/train_count_total) +
              "Validation: Loss = {:.4f}, Acc = {:.2f}%".format(valid_loss, 100*valid_count_correct/valid_count_total))
        print("Cuda memory allocated = {}".format(torch.cuda.memory_allocated()))

        if valid_loss < valid_loss_best:
            valid_loss_best = valid_loss
            torch.save(model.state_dict, save_name)
            print("Validation loss decreased...saving checkpoint")
        
        del inputs, targets, outputs, loss
        
    return model

In [7]:
# Run training
model = train_model(model, 5, loaders, criterion, optimizer, "checkpoint.pt")

Epoch 1/5 - Training: Loss = 2.1942, Acc = 62.90% - Validation: Loss = 1.0706, Acc = 52.00%
Cuda memory allocated = 369939968
Validation loss decreased...saving checkpoint
Epoch 2/5 - Training: Loss = 1.0918, Acc = 68.45% - Validation: Loss = 1.3209, Acc = 52.00%
Cuda memory allocated = 369677824
Validation loss decreased...saving checkpoint
Epoch 3/5 - 61.00% complete - Train loss = 2.1560652859849867

KeyboardInterrupt: 

In [11]:
print(torch.cuda.memory_allocated())

223908352


In [None]:
# Testing function
def test_model(model, loaders, criterion):

    use_cuda = torch.cuda.is_available()
    test_count_total = len(loaders['test'].dataset)
    test_batch_size = loaders['test'].batch_size
  
    model.eval()
    test_loss = 0
    test_count_correct = 0
    for idx, (inputs, targets) in enumerate(loaders['test']):

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()

        outputs = model.forward(inputs)
        loss = criterion(outputs, targets)

        test_loss = (test_loss * idx + loss.item())/(idx + 1)
        _, pred = outputs.max(1)
        test_count_correct += np.sum(np.squeeze(pred.eq(targets.view_as(pred))).cpu().numpy())

    # Print summary
    print("Test: Loss = {:.4f}, Acc = {:.2f}% - ".format(test_loss, 
                                                         100*test_count_correct/test_count_total))
    print("Cuda memory allocated = {}".format(torch.cuda.memory_allocated()))

    del inputs, targets, outputs, loss

In [None]:
# Run testing

# Load model from checkpoint
