In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

%load_ext watermark
%watermark

2019-06-20T10:00:54-04:00

CPython 3.7.3
IPython 7.5.0

compiler   : GCC 8.3.0
system     : Linux
release    : 5.0.0-15-generic
machine    : x86_64
processor  : x86_64
CPU cores  : 12
interpreter: 64bit


In [2]:
import json
from pathlib import Path
import pdb

from ipywidgets import interact
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import patches, patheffects

import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

import transforms as T
from engine import train_one_epoch, evaluate
import utils

In [3]:
DATA_DIR = Path('.') / 'data'
VAL_DIR = DATA_DIR / 'valid'
TRN_DIR = DATA_DIR / 'train'
MODEL_DIR = DATA_DIR / 'models'

TRN_CSV = DATA_DIR / 'train.csv'
VAL_CSV = DATA_DIR / 'valid.csv'

BOX = 'box'
CAT = 'category'
PATH = 'path'
CLASS = 'class'
SUB_ID = 'subject_id'

SEED = 23

DEVICE = torch.device('cuda')
torch.backends.cudnn.benchmark = True # Optimizes cudnn

CATS = ['background', 'handwritten', 'typewritten']
CLASSES = len(CATS)

STATE = MODEL_DIR / 'checkpoint_02.pth.tar'

In [4]:
train = pd.read_csv(TRN_CSV, index_col='subject_id').reset_index()
valid = pd.read_csv(VAL_CSV, index_col='subject_id').reset_index()

train.head()

Unnamed: 0,subject_id,category,class,box,path,original
0,2995300,typewritten,2,"[231, 446, 368, 564]",data/train/2995300.jpg,data/images/2995300.jpg
1,4128323,typewritten,2,"[156, 322, 250, 382]",data/train/4128323.jpg,data/images/4128323.jpg
2,4128517,handwritten,1,"[155, 321, 248, 382]",data/train/4128517.jpg,data/images/4128517.jpg
3,11783370,handwritten,1,"[552, 1225, 966, 1483]",data/train/11783370.jpg,data/images/11783370.jpg
4,11782469,typewritten,2,"[612, 1182, 977, 1464]",data/train/11782469.jpg,data/images/11782469.jpg


In [5]:
class LabelBabelDataset(torch.utils.data.Dataset):
    def __init__(self, df, transforms):
        df[BOX] = df[BOX].apply(json.loads)
        self.df = df
        self.transforms = transforms

    def __getitem__(self, idx):
        subject = self.df.iloc[idx]

        image = Image.open(subject[PATH]).convert('RGB')

        klass = int(subject[CLASS])
        labels = torch.full((1,), klass, dtype=torch.int64)

        boxes = [subject[BOX]]
        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['image_id'] = torch.tensor([subject[SUB_ID]])
        target['area'] = area
        target['iscrowd'] = torch.zeros((1,), dtype=torch.int64)

        if self.transforms is not None:
            image, target = self.transforms(image, target)

        return image, target

    def __len__(self):
        return self.df.shape[0]

In [6]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

in_features = model.roi_heads.box_predictor.cls_score.in_features

model.roi_heads.box_predictor = FastRCNNPredictor(in_features, CLASSES)

In [7]:
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [8]:
trn_dataset = LabelBabelDataset(train, get_transform(train=True))
val_dataset = LabelBabelDataset(valid, get_transform(train=False))

torch.manual_seed(SEED)
indices = torch.randperm(len(trn_dataset)).tolist()

trn_loader = torch.utils.data.DataLoader(
    trn_dataset, batch_size=1, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

In [9]:
model.to(DEVICE)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
    params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer, step_size=3, gamma=0.1)

In [10]:
# num_epochs = 5

# for epoch in range(num_epochs):
#     train_one_epoch(model, optimizer, trn_loader, DEVICE, epoch, print_freq=100)
#     lr_scheduler.step()
#     evaluate(model, val_loader, device=DEVICE)

In [11]:
# state = {
#     'epoch': epoch + 1,
#     'state_dict': model.state_dict(),
#     'optimizer' : optimizer.state_dict(),
# }

# torch.save(state, STATE)

In [12]:
state = torch.load(STATE)
model.load_state_dict(state['state_dict'])
optimizer.load_state_dict(state['optimizer'])

In [24]:
model.eval()


def show_results(idx):
    image, target = val_dataset[idx]
    target_class = CATS[target['labels'][0].numpy()]

    with torch.no_grad():
        prediction = model([image.to(DEVICE)])

    predicted_class = CATS[prediction[0]['labels'].cpu()[0].numpy()]

    print(f'Target   (blue): {target_class}')
    print(f'Predicted (red): {predicted_class}')

    image = Image.fromarray(image.mul(255).permute(1, 2, 0).byte().numpy())

    draw = ImageDraw.Draw(image)

    box = target['boxes'][0].numpy()
    draw.rectangle(box, width=4, outline='blue')

    for box in prediction[0]['boxes']:
        box = box.cpu().numpy()
        draw.rectangle(box, width=4, outline='red')
        break

    plt.figure(figsize=(10, 10))
    plt.imshow(image)


interact(show_results, idx=(0, valid.shape[0] - 1))
# show_results(754)

interactive(children=(IntSlider(value=607, description='idx', max=1215), Output()), _dom_classes=('widget-inte…

<function __main__.show_results(idx)>