This notebook will train a dog breed classification model, using the pretrained resnet50 as the feature extractor, which classifies 133 dog breeds. 

The following dataset is dog breed dataset from Udacity, the trainig code has been borrowd from [Pytorch vision transfer learning tutorial](https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html).

In [None]:
!wget https://s3-us-west-1.amazonaws.com/udacity-aind/dog-project/dogImages.zip

In [None]:
!unzip dogImages.zip

Runing sanity check to verify the folder and number of images in them. 

In [None]:
!ls dogImages/valid/005.Alaskan_malamute

In [None]:
%cd dogImages/valid
!ls | wc -l
%cd -

In [None]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torch.nn.functional as F
import torchvision
import torchvision.models as models
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms
import os
import time
import copy

Setting the dataset, data loader and the image transforms for different sets.

In [None]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}
data_dir = '/content/dogImages/'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'valid', 'test']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=32,
                                             shuffle=True, num_workers=2)
              for x in ['train', 'valid','test']}

Loading the pretrained resnet50 from torch-hub and changing the last layer output to the number of dog breeds in the dataset, 133. 

In [None]:
pretrained_model = torch.hub.load('pytorch/vision', 'resnet50', pretrained=True)

for name, param in pretrained_model.named_parameters():
    if("bn" not in name):
        param.requires_grad = False
        
num_ftrs = pretrained_model.fc.in_features

pretrained_model.fc = nn.Linear(num_ftrs, 133)


Setting the optimizer and learning rate scheduler.

In [None]:
optimizer = optim.Adam(pretrained_model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

Define the training loop.

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda") 
else:
    device = torch.device("cpu")

In [None]:
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid']}
class_names = image_datasets['train'].classes

In [None]:
print(class_names)
print(dataset_sizes)

In [None]:
pretrained_model.to(device)

Run the training for 5 epochs.

In [None]:
pretrained_model = train_model(pretrained_model, criterion, optimizer, exp_lr_scheduler,
                       num_epochs=5)

In [None]:
torch.save(pretrained_model.state_dict(), "./dog_breed_classification.pth")


Load the trained/saved model for sanity check and testing on the test dataset. 

In [None]:
pretrained_model = torch.hub.load('pytorch/vision', 'resnet50')
pretrained_model.fc = pretrained_model.fc = nn.Linear(num_ftrs, 133)
pretrained_model.load_state_dict(torch.load('./dog_breed_classification.pth'))
pretrained_model.eval()
pretrained_model.to(device)

In [None]:
def model_test(model):
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in dataloaders['test']:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('correct: {:d}  total: {:d}'.format(correct, total))
    print('accuracy = {:f}'.format(correct / total))

In [None]:
model_test(pretrained_model)


Running some prediction tests. 

In [None]:
def prediction(model, filename):
    labels = class_names
    img = Image.open(filename)
    img = data_transforms['test'](img)
    img = img.unsqueeze(0)
    prediction = model(img.to(device))
    prediction = prediction.argmax()
    print(labels[prediction])
    
prediction(pretrained_model, '/content/dogImages/valid/005.Alaskan_malamute/Alaskan_malamute_00298.jpg')
prediction(pretrained_model, '/content/dogImages/valid/005.Alaskan_malamute/Alaskan_malamute_00344.jpg')


Downloading the trained model.

In [None]:
from google.colab import files
files.download('./dog_breed_classification.pth') 