In [8]:
import torch
from PIL import Image
from torchvision.models import resnet18, resnet50
import torchvision.transforms as transforms
from torchvision import datasets
from torch.optim import lr_scheduler
import torch.nn as nn
import numpy as np
import json
import requests
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

from pathlib import Path
import logging
import time

In [9]:
%env TORCH_HOME=models
model = resnet50(pretrained=True)

env: TORCH_HOME=models


# Input for pretrained models
---

Images have to have shape (3xHxW), H and W are expected to be at least 224.

Images have to be loaded in to a range of [0, 1] and then normalized using the following.

### Load Data

In [10]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

In [12]:
transforming_data = {
    'train_data': transforms.Compose([
                        transforms.RandomResizedCrop(224),
                        transforms.RandomHorizontalFlip(),
                        # transforms.CenterCrop(224),
                        # transforms.Resize((224, 224))
                        transforms.ToTensor(),
                        normalize,
                    ]),
    'validation_data': transforms.Compose([
                            transforms.Resize(256),
                            transforms.CenterCrop(224),
                            transforms.ToTensor(),
                            normalize,
                        ])
}

In [13]:
directory_data = Path('datasets', 'monkey_pose')

In [14]:
datasets_images = {x: datasets.ImageFolder((directory_data / x), transforming_data[x])
                        for x in ['train_data', 'validation_data']
                    }

FileNotFoundError: Couldn't find any class folder in datasets/monkey_pose/train_data.

In [None]:
loaders_data = {x: torch.utils.data.DataLoader(datasets_images[x], batch_size=4,
                                             shuffle=True, num_workers=4)
                    for x in ['train_data', 'validation_data']
                }

In [None]:
sizes_datasets = {x: len(datasets_images[x]) for x in ['train_data', 'validation_data']}

class_names = datasets_images['train_data'].classes

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Visualize Data *optional*

In [None]:
plt.ion()   # This is the interactive mode
def visualize_data(input, title=None):
    input = input.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    input = std * input + mean
    input = np.clip(input, 0, 1)
    plt.imshow(input)
    if title is not None:
       plt.title(title)
    plt.pause(0.001)  ## Here we are pausing a bit so that plots are updated
inputs_data, classes = next(iter(loaders_data['train_data']))
## This is the code for getting a batch of training data
out = torchvision.utils.make_grid(inputs_data)
## Here we are making a grid from batch
visualize_data(out, title=[class_names[x] for x in classes])

### Train Model

In [None]:
def model_training(res_model, criterion, optimizer, scheduler, number_epochs=25):
    since = time.time()
    best_resmodel_wts = copy.deepcopy(res_model.state_dict())
    best_accuracy = 0.0
    for epochs in range(number_epochs):
        print('Epoch {}/{}'.format(epochs, number_epochs - 1))
        print('-' * 10)
        for phase in ['train_data', 'validation_data']: ## Here each epoch is having a training and validation phase
            if phase == 'train_data':
               res_model.train()  ## Here we are setting our model to training mode
            else:
               res_model.eval()   ## Here we are setting our model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in loaders_data[phase]: ## Iterating over data.
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad() ## here we are making the gradients to zero

                with torch.set_grad_enabled(phase == 'train_data'): ## forwarding and then tracking the history if only in train
                     outputs = res_model(inputs)
                     _, preds = torch.max(outputs, 1)
                     loss = criterion(outputs, labels)

                     if phase == 'train': # backward and then optimizing only if it is in training phase
                         loss.backward()
                         optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

             epoch_loss = running_loss / sizes_datasets[phase]
             epoch_acc = running_corrects.double() / sizes_datasets[phase]

             print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

             if phase == 'val' and epoch_acc > best_acc: ## deep copy the model
                 best_accuracy = epoch_acc
                 best_resmodel_wts = copy.deepcopy(res_model.state_dict())

         print()

     time_elapsed = time.time() - since
     print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
     print('Best val Acc: {:4f}'.format(best_accuracy))

     # load best model weights
     res_model.load_state_dict(best_resmodel_wts)
     return res_model

### Visualize Predictions

In [None]:
def model_visualization(res_model, num_images=6):
    was_training = res_model.training
    res_model.eval()
    images_so_far = 0
    fig = plt.figure()
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(loaders_data['validation_data']):
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = res_model(inputs)
            _, preds = torch.max(outputs, 1)
        for j in range(inputs.size()[0]):
            images_so_far += 1
            ax = plt.subplot(num_images//2, 2, images_so_far)
            ax.axis('off')
            ax.set_title('predicted: {}'.format(class_names[preds[j]]))
            visualize_data(inputs.cpu().data[j])

            if images_so_far == num_images:
               res_model.train(mode=was_training)
               return
        res_model.train(mode=was_training)

### Finetune Convet

In [None]:
finetune_model = models.resnet50(pretrained=True)
num_ftrs = finetune_model.fc.in_features

finetune_model.fc = nn.Linear(num_ftrs, 2)

finetune_model = finetune_model.to(device)

criterion = nn.CrossEntropyLoss()

finetune_optim = optim.SGD(finetune_model.parameters(), lr=0.001, momentum=0.9)

### Training and Evaluation

In [None]:
finetune_model = model_training(finetune_model, criterion, finetune_optim, exp_lr_scheduler,
                       number_epochs=25)

### Visualize final result

In [None]:
model_visualization(finetune_model)