In [1]:
import glob
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd 
import json
import numpy
import torch
import os
%matplotlib inline

In [2]:
ls data/zai2019_hackaton_train/annotations/

ninedash_keypoints_train.json


In [3]:
ls data/zai2019_hackaton_train/images/train/*.jpg | head

data/zai2019_hackaton_train/images/train/000.jpg
data/zai2019_hackaton_train/images/train/001.jpg
data/zai2019_hackaton_train/images/train/002.jpg
data/zai2019_hackaton_train/images/train/003.jpg
data/zai2019_hackaton_train/images/train/004.jpg
data/zai2019_hackaton_train/images/train/005.jpg
data/zai2019_hackaton_train/images/train/006.jpg
data/zai2019_hackaton_train/images/train/007.jpg
data/zai2019_hackaton_train/images/train/008.jpg
data/zai2019_hackaton_train/images/train/009.jpg


In [4]:
with open('data/zai2019_hackaton_train/annotations/ninedash_keypoints_train.json', 'r') as f:
    data = json.load(f)

In [5]:
file_names = [img['file_name'] for img in data['images']]

In [6]:
file_id = [d['id'] for d in data['images']]
file_id_names = {k: v for k, v in zip(file_id, file_names)}

In [7]:
annotations = data['annotations']

In [8]:
map_annotations = dict()
for anno in annotations:
    id = file_id_names[anno['image_id']]
    if id in map_annotations:
        map_annotations[id].append(anno)
    else:
        map_annotations[id] = [anno]

In [9]:
class MyDataset(object):
    def __init__(self, root, transforms, file_names, file_annotations):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = []
        self.file_names = file_names
        self.file_annotations = file_annotations

    def __getitem__(self, idx):
        # load images ad masks
        img_path = os.path.join(self.root, self.file_names[idx])
        img = Image.open(img_path).convert("RGB")
        
        # TODO: load mask here 
        boxes = []
        num_objs = 0
        if self.file_names[idx] in self.file_annotations:
            objs = self.file_annotations[self.file_names[idx]]
            print(self.file_names[idx])
            # get bounding box coordinates for each mask
            num_objs = len(self.file_annotations[self.file_names[idx]])
            print(num_objs)
            
            for i in objs:
                bbox  = i['bbox']
                xmin = np.min(bbox[0])
                xmax = np.max(bbox[0] + bbox[2])
                ymin = np.min(bbox[1])
                ymax = np.max(bbox[1] + bbox[3])
                boxes.append([xmin, ymin, xmax, ymax])

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class

        image_id = torch.tensor([idx])
        if len(boxes) > 0:
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        else:
            area = torch.as_tensor([], dtype=torch.float32)
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = 0
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.file_names)
    

In [23]:
root = 'data/zai2019_hackaton_train/images/train/'
ds = MyDataset(root, get_transform(False), file_names, map_annotations)

In [24]:
import numpy as np
ds[2295]

(tensor([[[0.7608, 0.7647, 0.7843,  ..., 0.8941, 0.8980, 0.8980],
          [0.9059, 0.9098, 0.9137,  ..., 0.9216, 0.9216, 0.9216],
          [0.8824, 0.8784, 0.8745,  ..., 0.9412, 0.9412, 0.9373],
          ...,
          [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
          [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
          [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
 
         [[0.7569, 0.7608, 0.7804,  ..., 0.8902, 0.8941, 0.8941],
          [0.9020, 0.9059, 0.9098,  ..., 0.9176, 0.9176, 0.9176],
          [0.8745, 0.8745, 0.8706,  ..., 0.9373, 0.9373, 0.9333],
          ...,
          [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
          [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
          [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],
 
         [[0.7882, 0.7922, 0.8039,  ..., 0.9098, 0.9137, 0.9137],
          [0.9333, 0.9373, 0.9333,  ..., 0.9412, 0.9412, 0.9412],
          [0.9176, 0.9059, 0.9020,  ...,

In [12]:
len(map_annotations['580.jpg'])

8

In [13]:
len(ds)

2590

In [14]:
ds.file_names[500]

'501.jpg'

In [15]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 2  # 1 class (person) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


In [16]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 2  # 1 class (person) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


In [17]:
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

# load a pre-trained model for classification and return
# only the features
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
# FasterRCNN needs to know the number of
# output channels in a backbone. For mobilenet_v2, it's 1280
# so we need to add it here
backbone.out_channels = 1280

# let's make the RPN generate 5 x 3 anchors per spatial
# location, with 5 different sizes and 3 different aspect
# ratios. We have a Tuple[Tuple[int]] because each feature
# map could potentially have different sizes and
# aspect ratios
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))

# let's define what are the feature maps that we will
# use to perform the region of interest cropping, as well as
# the size of the crop after rescaling.
# if your backbone returns a Tensor, featmap_names is expected to
# be [0]. More generally, the backbone should return an
# OrderedDict[Tensor], and in featmap_names you can choose which
# feature maps to use.
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                output_size=7,
                                                sampling_ratio=2)

# put the pieces together inside a FasterRCNN model
model = FasterRCNN(backbone,
                   num_classes=2,
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=roi_pooler)

In [18]:
import transforms as T

def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [20]:
# # %%shell

# # Download TorchVision repo to use some files from
# # references/detection
# git clone https://github.com/pytorch/vision.git
# cd vision
# git checkout v0.3.0

# cp references/detection/utils.py ../
# cp references/detection/transforms.py ../
# cp references/detection/coco_eval.py ../
# cp references/detection/engine.py ../
# cp references/detection/coco_utils.py ../

In [21]:
from torchvision import utils
from engine  import train_one_epoch, evaluate

from utils import collate_fn

def train(model):
    # train on the GPU or on the CPU, if a GPU is not available
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # our dataset has two classes only - background and person
    num_classes = 2
    # use our dataset and defined transformations
    dataset = MyDataset(root, get_transform(train=False), file_names, map_annotations)
    dataset_test = MyDataset(root, get_transform(train=False), file_names, map_annotations)

    # split the dataset in train and test set
    indices = torch.randperm(len(dataset)).tolist()
    dataset = torch.utils.data.Subset(dataset, indices[:-50])
    dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

    # define training and validation data loaders
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=2, shuffle=True, num_workers=4, collate_fn=collate_fn)

    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=1, shuffle=False, num_workers=4, collate_fn=collate_fn)

    # get the model using our helper function
#     model = get_model_instance_segmentation(num_classes)

    # move model to the right device
    model.to(device)

    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)
    # and a learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)

    # let's train it for 10 epochs
    num_epochs = 10

    for epoch in range(num_epochs):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
        # update the learning rate
        lr_scheduler.step()
        # evaluate on the test dataset
        evaluate(model, data_loader_test, device=device)

    print("That's it!")

In [22]:
train(model)

267.jpg
348.jpg
1
1
386.jpg
1
219.jpg
1


AttributeError: 'int' object has no attribute 'to'

In [28]:
'2295.jpg' in file_names

True

In [30]:
'2295.jpg' in map_annotations

False

In [None]:
map_annotations

In [60]:
torch.__version__

'1.1.0'