In [1]:
import os.path

import cv2
import numpy as np
import requests
import torchvision
import torchvision.transforms as transforms
from torch import nn
from net import *
from Dataset import *
from engine import train_one_epoch, evaluate
import utils

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


## Install pycocotools

In [2]:
def get_data(path):
    file = open(path, 'r')
    lines = []
    while True:

        # Get next line from file
        line = file.readline()
        # if line is empty
        # end of file is reached
        if not line:
            break
        lines.append(line)
    return lines

In [3]:
train_lines = get_data('train/files.txt')
test_lines = get_data('test/files.txt')

In [4]:
from sklearn.model_selection import train_test_split
stacks = list(set([d.split()[0].split('_')[0] for d in train_lines]))
train_stacks, val_stacks = train_test_split(list(stacks), random_state=1)
train_stacks = set(train_stacks)
val_stacks = set(val_stacks)

In [5]:
train_data, val_data = [], []
for d in train_lines:
    stack = d.split()[0].split('_')[0]
    if stack in train_stacks:
        train_data.append(d)
    else:
        val_data.append(d)

In [6]:
ds_train = OktoberfestDataset(train_data, 'train')
ds_val = OktoberfestDataset(val_data, 'train')
ds_test = OktoberfestDataset(test_lines, 'test')

In [7]:
dataloader_train = torch.utils.data.DataLoader(
    ds_train, batch_size=9,shuffle=True, num_workers=1, collate_fn=utils.collate_fn)
dataloader_val = torch.utils.data.DataLoader(
    ds_val, batch_size=11,shuffle=False, num_workers=1, collate_fn=utils.collate_fn)
dataloader_test = torch.utils.data.DataLoader(
    ds_test, batch_size=43,shuffle=False, num_workers=1, collate_fn=utils.collate_fn)

In [8]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 16  # 15 class + background

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [9]:
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)


In [10]:
for epoch in range(10):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(model, optimizer, dataloader_train, device, epoch, print_freq=30)
        # update the learning rate
        lr_scheduler.step()
        # evaluate on the test dataset
        evaluate(model, dataloader_val, device=device)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch: [0]  [ 0/93]  eta: 0:05:32  lr: 0.000059  loss: 3.2895 (3.2895)  loss_classifier: 3.0594 (3.0594)  loss_box_reg: 0.0367 (0.0367)  loss_objectness: 0.1758 (0.1758)  loss_rpn_box_reg: 0.0176 (0.0176)  time: 3.5792  data: 0.9906  max mem: 10467
Epoch: [0]  [30/93]  eta: 0:02:50  lr: 0.001688  loss: 0.7739 (1.1667)  loss_classifier: 0.3972 (0.8449)  loss_box_reg: 0.2107 (0.1963)  loss_objectness: 0.0463 (0.1135)  loss_rpn_box_reg: 0.0089 (0.0120)  time: 2.7183  data: 0.0682  max mem: 10731
Epoch: [0]  [60/93]  eta: 0:01:30  lr: 0.003317  loss: 0.6604 (0.9210)  loss_classifier: 0.3311 (0.5960)  loss_box_reg: 0.3024 (0.2495)  loss_objectness: 0.0098 (0.0668)  loss_rpn_box_reg: 0.0050 (0.0088)  time: 2.7395  data: 0.0700  max mem: 10731
Epoch: [0]  [90/93]  eta: 0:00:08  lr: 0.004946  loss: 0.6238 (0.8330)  loss_classifier: 0.3198 (0.5110)  loss_box_reg: 0.2700 (0.2645)  loss_objectness: 0.0092 (0.0495)  loss_rpn_box_reg: 0.0040 (0.0080)  time: 2.7421  data: 0.0675  max mem: 10731
Epoc

KeyboardInterrupt: ignored

In [77]:
torch.save(model.state_dict(), 'faster.pt')

In [43]:
img, target = ds_test[8]

In [46]:
model.eval()
ex_out = model([img.to(device)])

In [45]:
target

{'area': tensor([4.8336e+04, 1.1200e+04, 1.2480e+04, 1.2992e+04, 1.1232e+04, 2.7200e+04,
         1.0000e-02, 1.0000e-02, 1.0000e-02, 1.0000e-02]),
 'boxes': tensor([[9.6000e+02, 4.5600e+02, 1.1880e+03, 6.6800e+02],
         [5.9600e+02, 4.4400e+02, 6.9600e+02, 5.5600e+02],
         [6.6000e+02, 3.6400e+02, 7.6400e+02, 4.8400e+02],
         [7.9200e+02, 5.0400e+02, 9.0800e+02, 6.1600e+02],
         [7.4400e+02, 5.9200e+02, 8.4800e+02, 7.0000e+02],
         [7.6400e+02, 3.6800e+02, 9.6400e+02, 5.0400e+02],
         [0.0000e+00, 0.0000e+00, 1.0000e-01, 1.0000e-01],
         [0.0000e+00, 0.0000e+00, 1.0000e-01, 1.0000e-01],
         [0.0000e+00, 0.0000e+00, 1.0000e-01, 1.0000e-01],
         [0.0000e+00, 0.0000e+00, 1.0000e-01, 1.0000e-01]]),
 'image_id': tensor([8]),
 'iscrowd': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 'labels': tensor([2, 5, 1, 1, 1, 4, 0, 0, 0, 0]),
 'num_objs': tensor(6)}

In [76]:
from collections import Counter
total = 0
rec = 0
for imgs, targets in dataloader_test:
  model.eval()
  for ind, img in enumerate(imgs):
    total += targets[ind]['num_objs'].numpy()
    img = img.to(device)
    out = model([img])
    true_classes=Counter(targets[ind]['labels'].cpu().numpy())
    del true_classes[0]
    pred_classes = out[0]['labels'][out[0]['scores'] > 0.5].cpu().numpy()
    for c in pred_classes:
      if c in true_classes and true_classes[c] > 0:
        true_classes[c] -= 1
    rec += sum(true_classes.values())
print('recall', 1-rec/total)

recall 0.851063829787234
