In [0]:
import re

import os
from os import listdir
from os.path import isfile, join

import torch
from torch.utils.data import Dataset

import torchvision
from torchvision import transforms
from torchvision import models
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

import numpy as np

from PIL import Image

import xml.etree.ElementTree as ET

In [16]:
!unzip -q "drive/My Drive/data/train_data.zip"
train_folder = 'train_data/'
print('Number of files in the train folder', len(listdir(train_folder)))

!unzip -q "drive/My Drive/data/annotation.zip"
annotation_folder = 'annotation/'
print('Number of files in the annotation folder', len(listdir(annotation_folder)))

replace train_data/kinder_31.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: N
Number of files in the train folder 267
replace annotation/kinder_19.xml? [y]es, [n]o, [A]ll, [N]one, [r]ename: N
Number of files in the annotation folder 268
Cloning into 'vision'...
remote: Enumerating objects: 2, done.[K
remote: Counting objects: 100% (2/2), done.[K
remote: Compressing objects: 100% (2/2), done.[K
remote: Total 7142 (delta 1), reused 1 (delta 0), pack-reused 7140[K
Receiving objects: 100% (7142/7142), 9.86 MiB | 6.89 MiB/s, done.
Resolving deltas: 100% (4834/4834), done.
fatal: not a git repository (or any of the parent directories): .git
cp: cannot stat 'references/detection/utils.py': No such file or directory
cp: cannot stat 'references/detection/transforms.py': No such file or directory
cp: cannot stat 'references/detection/coco_eval.py': No such file or directory
cp: cannot stat 'references/detection/engine.py': No such file or directory
cp: cannot stat 'references/detection/coco_util

In [18]:
%%shell

git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.3.0

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

HEAD is now at be37608 version check against PyTorch's CUDA version




In [0]:
device = torch.device("cuda:0")

In [0]:
image_pattern = re.compile(r'^kinder_(?P<num>\d*)\.jpg$')
annotation_pattern = re.compile(r'^kinder_(?P<num>\d*)\.xml$')

class KinderDetectionDataset(Dataset):
  
  def __init__(self, image_folder, annotation_folder, transform=None):
    self.image_folder = image_folder
    self.annotation_folder = annotation_folder
    self.transform = transform
    self.image_names = self.__get_file_names__(image_folder, image_pattern)
    self.annotation_names = self.__get_file_names__(annotation_folder, annotation_pattern)

    assert len(self.image_names) == len(self.annotation_names)
    assert self.image_names.keys() == self.annotation_names.keys()

    self.keys = sorted(list(self.image_names.keys()))
  
  def __len__(self):
    return len(self.keys)

  def __getitem__(self, index): 
    key = self.keys[index]

    image_name = self.image_names[key]
    annotation_name = self.annotation_names[key]

    image_path = os.path.join(self.image_folder, image_name)
    annotation_path = os.path.join(self.annotation_folder, annotation_name)

    img = Image.open(image_path).convert("RGB")
    target = self.__get_target__(annotation_path, index)

    if self.transform:
      img, target = self.transform(img, target)

    return img, target

  def __get_target__(self, annotation_path, index):
    target = {}
    boxes = []
    root = ET.parse(annotation_path).getroot()
    for bndbox in root.findall('object/bndbox'):
      xmin = int(bndbox[0].text)
      ymin = int(bndbox[1].text)
      xmax = int(bndbox[2].text)
      ymax = int(bndbox[3].text)
      boxes.append((xmin, ymin, xmax, ymax))

    boxes = torch.as_tensor(boxes, dtype=torch.float32)
    num_objs = len(boxes)
    labels = torch.ones((num_objs), dtype=torch.int64)
    img_id = torch.tensor([index])
    area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
    iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

    target["boxes"] = boxes
    target["labels"] = labels
    target["image_id"] = img_id
    target["area"] = area
    target["iscrowd"] = iscrowd
    return target

  def __get_file_names__(self, folder, file_name_pattern):
    file_names = {}
    for f in listdir(folder):
      match = file_name_pattern.match(f)
      if not match:
        continue
      
      num = match.group('num')
      if num == '':
        continue
      
      file_names[int(num)] = f

    return file_names

In [0]:
from engine import train_one_epoch, evaluate
import utils
import transforms

tfs = transforms.Compose([
                          transforms.ToTensor(),                       
                       ])

num_classes = 2
batch_size = 4

dataset = KinderDetectionDataset(train_folder, annotation_folder, transform=tfs)

validation_size = .3

data_size = len(dataset)
split = int(np.floor(validation_size * data_size))
indices = list(range(data_size))
np.random.shuffle(indices)

train_indices, val_indices = indices[:split], indices[split:]

train_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_indices)
val_sampler = torch.utils.data.sampler.SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, 
                                           sampler=train_sampler, collate_fn=utils.collate_fn)
val_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, 
                                         sampler=val_sampler, collate_fn=utils.collate_fn)

In [0]:
# def train_one_epoch(model, optimizer, data_loader, epoch):
#     model.train()

#     losses_sum = 0
#     for i_step, (images, targets) in enumerate(data_loader):
#         images = list(image.to(device) for image in images)
#         targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

#         loss_dict = model(images, targets)

#         losses = sum(loss for loss in loss_dict.values())
#         losses_sum += losses

#         optimizer.zero_grad()
#         losses.backward()
#         optimizer.step()

#         if lr_scheduler is not None:
#             lr_scheduler.step()

#     train_loss = losses_sum / i_step
#     print('Epoch num: %f, Loss: %f' % (epoch, train_loss))


# def evaluate(model, data_loader):
#     model.eval()
#     res = {}
#     for image, targets in data_loader:
#         image = list(img.to(device) for img in image)
#         targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

#         output = model(image)

#         res[targets["image_id"]] = output
    
#     return res

In [0]:
model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

In [40]:
num_epochs = 10

for epoch in range(num_epochs):
  train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq=10)
  lr_scheduler.step()
  evaluate(model, val_loader, device=device)

Epoch: [0]  [ 0/20]  eta: 0:00:41  lr: 0.000268  loss: 0.0221 (0.0221)  loss_classifier: 0.0115 (0.0115)  loss_box_reg: 0.0085 (0.0085)  loss_objectness: 0.0006 (0.0006)  loss_rpn_box_reg: 0.0016 (0.0016)  time: 2.0555  data: 1.2194  max mem: 9827
Epoch: [0]  [10/20]  eta: 0:00:20  lr: 0.002897  loss: 0.0273 (0.0285)  loss_classifier: 0.0175 (0.0183)  loss_box_reg: 0.0069 (0.0080)  loss_objectness: 0.0003 (0.0006)  loss_rpn_box_reg: 0.0013 (0.0017)  time: 2.0445  data: 1.2423  max mem: 9827
Epoch: [0]  [19/20]  eta: 0:00:02  lr: 0.005000  loss: 0.0264 (0.0270)  loss_classifier: 0.0166 (0.0169)  loss_box_reg: 0.0067 (0.0072)  loss_objectness: 0.0003 (0.0010)  loss_rpn_box_reg: 0.0016 (0.0020)  time: 2.0242  data: 1.2231  max mem: 9827
Epoch: [0] Total time: 0:00:40 (2.0244 s / it)
creating index...
index created!
Test:  [ 0/47]  eta: 0:01:17  model_time: 0.3218 (0.3218)  evaluator_time: 0.0115 (0.0115)  time: 1.6535  data: 1.2340  max mem: 9827
Test:  [46/47]  eta: 0:00:01  model_time: 

In [50]:
for i_step, (images, targets) in enumerate(val_loader):
  images = list(image.to(device) for image in images)
  output = model(images)
  display(targets)
  display(output)
  break

({'area': tensor([683790.]),
  'boxes': tensor([[ 911., 1640., 1601., 2631.]]),
  'image_id': tensor([12]),
  'iscrowd': tensor([0]),
  'labels': tensor([1])},
 {'area': tensor([721876.]),
  'boxes': tensor([[1871., 1321., 2875., 2040.]]),
  'image_id': tensor([33]),
  'iscrowd': tensor([0]),
  'labels': tensor([1])},
 {'area': tensor([283833.]),
  'boxes': tensor([[1359., 1888., 2030., 2311.]]),
  'image_id': tensor([9]),
  'iscrowd': tensor([0]),
  'labels': tensor([1])},
 {'area': tensor([297141.]),
  'boxes': tensor([[1.0450e+03, 1.0000e+00, 1.7860e+03, 4.0200e+02]]),
  'image_id': tensor([174]),
  'iscrowd': tensor([0]),
  'labels': tensor([1])})

[{'boxes': tensor([[ 908.1832, 1626.5590, 1603.8842, 2575.4077]], device='cuda:0',
         grad_fn=<StackBackward>),
  'labels': tensor([1], device='cuda:0'),
  'scores': tensor([0.9994], device='cuda:0', grad_fn=<IndexBackward>)},
 {'boxes': tensor([[1878.2114, 1306.0675, 2874.2874, 2057.9871]], device='cuda:0',
         grad_fn=<StackBackward>),
  'labels': tensor([1], device='cuda:0'),
  'scores': tensor([0.9997], device='cuda:0', grad_fn=<IndexBackward>)},
 {'boxes': tensor([[1363.9120, 1871.8942, 2002.0679, 2318.9912]], device='cuda:0',
         grad_fn=<StackBackward>),
  'labels': tensor([1], device='cuda:0'),
  'scores': tensor([0.9979], device='cuda:0', grad_fn=<IndexBackward>)},
 {'boxes': tensor([[1066.0767,    5.7807, 1808.6274,  419.4587]], device='cuda:0',
         grad_fn=<StackBackward>),
  'labels': tensor([1], device='cuda:0'),
  'scores': tensor([0.9947], device='cuda:0', grad_fn=<IndexBackward>)}]