In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

%load_ext watermark
%watermark

2019-06-20T11:01:25-04:00

CPython 3.7.3
IPython 7.5.0

compiler   : GCC 8.3.0
system     : Linux
release    : 5.0.0-15-generic
machine    : x86_64
processor  : x86_64
CPU cores  : 12
interpreter: 64bit


In [50]:
import json
from pathlib import Path
import pdb

from tqdm import tqdm
from ipywidgets import interact
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import patches, patheffects

import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

import transforms as T
from engine import train_one_epoch, evaluate
import utils

In [3]:
DATA_DIR = Path('.') / 'data'
VAL_DIR = DATA_DIR / 'valid'
TRN_DIR = DATA_DIR / 'train'
MODEL_DIR = DATA_DIR / 'models'

TRN_CSV = DATA_DIR / 'train.csv'
VAL_CSV = DATA_DIR / 'valid.csv'

BOX = 'box'
CAT = 'category'
PATH = 'path'
CLASS = 'class'
SUB_ID = 'subject_id'

SEED = 23

DEVICE = torch.device('cuda')
torch.backends.cudnn.benchmark = True # Optimizes cudnn

CATS = ['background', 'handwritten', 'typewritten']
CLASSES = len(CATS)

STATE = MODEL_DIR / 'checkpoint_02.pth.tar'

In [4]:
train = pd.read_csv(TRN_CSV, index_col='subject_id').reset_index()
valid = pd.read_csv(VAL_CSV, index_col='subject_id').reset_index()

train.head()

Unnamed: 0,subject_id,category,class,box,path,original
0,2995300,typewritten,2,"[231, 446, 368, 564]",data/train/2995300.jpg,data/images/2995300.jpg
1,4128323,typewritten,2,"[156, 322, 250, 382]",data/train/4128323.jpg,data/images/4128323.jpg
2,4128517,handwritten,1,"[155, 321, 248, 382]",data/train/4128517.jpg,data/images/4128517.jpg
3,11783370,handwritten,1,"[552, 1225, 966, 1483]",data/train/11783370.jpg,data/images/11783370.jpg
4,11782469,typewritten,2,"[612, 1182, 977, 1464]",data/train/11782469.jpg,data/images/11782469.jpg


In [5]:
class LabelBabelDataset(torch.utils.data.Dataset):
    def __init__(self, df, transforms):
        df[BOX] = df[BOX].apply(json.loads)
        self.df = df
        self.transforms = transforms

    def __getitem__(self, idx):
        subject = self.df.iloc[idx]

        image = Image.open(subject[PATH]).convert('RGB')

        klass = int(subject[CLASS])
        labels = torch.full((1,), klass, dtype=torch.int64)

        boxes = [subject[BOX]]
        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['image_id'] = torch.tensor([subject[SUB_ID]])
        target['area'] = area
        target['iscrowd'] = torch.zeros((1,), dtype=torch.int64)

        if self.transforms is not None:
            image, target = self.transforms(image, target)

        return image, target

    def __len__(self):
        return self.df.shape[0]

In [6]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

in_features = model.roi_heads.box_predictor.cls_score.in_features

model.roi_heads.box_predictor = FastRCNNPredictor(in_features, CLASSES)

In [7]:
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [8]:
trn_dataset = LabelBabelDataset(train, get_transform(train=True))
val_dataset = LabelBabelDataset(valid, get_transform(train=False))

torch.manual_seed(SEED)
indices = torch.randperm(len(trn_dataset)).tolist()

trn_loader = torch.utils.data.DataLoader(
    trn_dataset, batch_size=1, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

In [9]:
model.to(DEVICE)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
    params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer, step_size=3, gamma=0.1)

In [10]:
# num_epochs = 5

# for epoch in range(num_epochs):
#     train_one_epoch(model, optimizer, trn_loader, DEVICE, epoch, print_freq=100)
#     lr_scheduler.step()
#     evaluate(model, val_loader, device=DEVICE)

In [11]:
# state = {
#     'epoch': epoch + 1,
#     'state_dict': model.state_dict(),
#     'optimizer' : optimizer.state_dict(),
# }

# torch.save(state, STATE)

In [12]:
state = torch.load(STATE)
model.load_state_dict(state['state_dict'])
optimizer.load_state_dict(state['optimizer'])

In [102]:
def intersetion_over_area(box1, box2):
    area1 = abs(box1[2] - box1[0]) * abs(box1[3] - box1[1])
    area2 = abs(box2[2] - box2[0]) * abs(box2[3] - box2[1])
    inter = (min(box1[2], box2[2]) - max(box1[0], box2[0])) \
        * (min(box1[3], box2[3]) - max(box1[1], box2[1]))
    inter = max(0, inter)
    return abs(inter / (area1 + area2 - inter))

In [103]:
model.eval()


def show_results(idx, figsize):
    image, target = val_dataset[idx]

    t_class = CATS[target['labels'][0].numpy()]
    t_box = target['boxes'][0].numpy()

    with torch.no_grad():
        prediction = model([image.to(DEVICE)])

    p_class = CATS[prediction[0]['labels'].cpu()[0].numpy()]
    p_box = prediction[0]['boxes'].cpu()[0].numpy()

    iou = round(intersetion_over_area(t_box, p_box), 2)
    class_flag = '' if t_class == p_class else 'NOT EQUAL'
    box_flag = '' if iou >= 0.8 else 'AREA MISMATCH'

    print(f'{target["image_id"][0].numpy()}')
    print(f'Target   (blue): {t_class}')
    print(f'Predicted (red): {p_class}  {class_flag}')
    print(f'IoU:             {iou:0.2}         {box_flag}')

    image = Image.fromarray(image.mul(255).permute(1, 2, 0).byte().numpy())

    draw = ImageDraw.Draw(image)

    draw.rectangle(t_box, width=4, outline='blue')

    boxes = prediction[0]['boxes'].cpu().numpy()
    for i, box in enumerate(boxes[::-1], 1):
        # print(i, box)
        color = 'red' if i == len(boxes) else 'yellow'
        draw.rectangle(box, width=4, outline=color)

    plt.figure(figsize=(figsize, figsize))
    plt.imshow(image)


interact(
    show_results,
    idx=(0, valid.shape[0] - 1),
    figsize=([5, 8, 10, 12, 15, 20, 25]))
# show_results(0)

# Sideways: 518-541  748-754
# Interesting: 763,1122
# Better: 1182,1213

interactive(children=(IntSlider(value=607, description='idx', max=1215), Dropdown(description='figsize', optio…

<function __main__.show_results(idx, figsize)>

In [56]:
model.eval()


def bin_results():
    results = {
        'eq': 0,
        '1->2': [],
        '2->1': [],
        '>=90': 0,
        '<90': 0,
        '<80': [],
        'n': len(val_dataset),
    }

    for i, (image, target) in tqdm(enumerate(val_dataset)):

        image_id = target["image_id"][0].numpy().item()

        t_class = target['labels'][0].numpy()
        t_box = target['boxes'][0].numpy()

        with torch.no_grad():
            prediction = model([image.to(DEVICE)])

        p_class = prediction[0]['labels'].cpu()[0].numpy()
        p_box = prediction[0]['boxes'].cpu()[0].numpy()

        if t_class == p_class:
            results['eq'] += 1
        elif t_class == 1:
            results['1->2'].append((i, image_id))
        else:
            results['2->1'].append((i, image_id))

        iou = round(intersetion_over_area(t_box, p_box), 2)

        if iou < 0.8:
            results['<80'].append((i, image_id))
        elif iou < 0.9:
            results['<90'] += 1
        else:
            results['>=90'] += 1

    return results


# results = bin_results()

print(f'There are {results["n"]} items')
print()

per = round(results["eq"] / results["n"] * 100.0, 2)
print(f'There were {results["eq"]} ({per}%) correctly identified classes')
print()

per = round(len(results["1->2"]) / results["n"] * 100.0, 2)
print(f'Handwritten -> Typewritten errors ({per}%):')
for result in results["1->2"]:
    print(result)
print()

per = round(len(results["2->1"]) / results["n"] * 100.0, 2)
print(f'Typewritten -> Handwritten errors ({per}%):')
for result in results["2->1"]:
    print(result)
print()

per = round(results[">=90"] / results["n"] * 100.0, 2)
print(f'Intersection over union >= 90%: {results[">=90"]} ({per}%)')
print()

per = round(results["<90"] / results["n"] * 100.0, 2)
print(f'Intersection over union 80-90%: {results["<90"]} ({per}%)')
print()

per = round(len(results["<80"]) / results["n"] * 100.0, 2)
print(f'Intersection over union < 80%: {len(results["<80"])} ({per}%)')
for result in results["<80"]:
    print(result)

There are 1216 items

There were 1133 (93.17%) correctly identified classes

Handwritten -> Typewritten errors (4.93%):
(6, 2995232)
(54, 2995528)
(64, 2995577)
(103, 4128276)
(110, 4128309)
(117, 4128343)
(149, 4128476)
(150, 4128490)
(152, 4128495)
(153, 4128501)
(168, 4128577)
(170, 4128594)
(189, 4128718)
(218, 4128891)
(279, 10667640)
(281, 10667646)
(382, 10668130)
(422, 11779826)
(443, 11779934)
(447, 11779943)
(449, 11779945)
(453, 11779971)
(458, 11779986)
(469, 11780060)
(477, 11780091)
(522, 11780315)
(540, 11780399)
(580, 11780657)
(613, 11780818)
(643, 11780960)
(691, 11781208)
(701, 11781263)
(757, 11781525)
(777, 11781624)
(789, 11781683)
(815, 11781762)
(817, 11781766)
(824, 11781792)
(877, 11782044)
(891, 11782100)
(894, 11782122)
(905, 11782164)
(925, 11782306)
(927, 11782329)
(941, 11782373)
(942, 11782375)
(943, 11782380)
(947, 11782398)
(953, 11782433)
(966, 11782507)
(979, 11782571)
(1033, 11782825)
(1054, 11782952)
(1060, 11782989)
(1065, 11783015)
(1115, 1178326