In [2]:
import os

import numpy as np
import pandas as pd
from PIL import Image, ImageDraw


import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler

import torchvision
from torchvision import datasets, models, transforms
from torchvision.models.detection.retinanet import RetinaNet
from torchvision.models.detection.faster_rcnn import FasterRCNN
import  torchvision.transforms.functional as F

from functions import *
from functions_torch import *

In [3]:
params = {}
params['target_size']=(800,1333)
params['batch_size'] = 4
params['lr'] = 0.005

In [4]:
class SDDDataSET(torch.utils.data.Dataset):
    def __init__(self, typeOfDS, transforms=None):
        self.typeOfDS = typeOfDS
        self.labels = pd.read_csv('/app/host/lacmus/dataset/sdd-lacmus-version/%s_annotations_pedestrian.csv'%typeOfDS, header = None, 
            names = ['image','x0','y0','x1','y1','class'])
        self.transforms = transforms
    
    def __len__(self):
        return self.labels.image.nunique()

    def __getitem__(self, idx):
        # load images ad masks
        img_name = self.labels.image.unique()[idx]
        img_labels = self.labels [self.labels.image == img_name]
        img = Image.open('/app/host/lacmus/dataset/sdd-lacmus-version/'+img_name).convert("RGB") #convert from tutorial, do we need it?

        # get bounding box coordinates 
        num_objs = img_labels.shape[0]
        boxes = []
        for l in img_labels.iterrows():
            boxes.append([l[1]['x0'], l[1]['y0'], l[1]['x1'], l[1]['y1']])


        target = {}
        target["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)         # there is only one class
        target["labels"] = labels = torch.ones((num_objs,), dtype=torch.int64)

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target


In [5]:
# Uncomment to test dataset implementation
# im_idx = 10

# dataset = SDDDataSET('test',get_transform(train=True)) 
# (image,target) = dataset[im_idx] 
# im = F.to_pil_image(image)
# draw = ImageDraw.Draw(im)

# for bb in target['boxes']:
#     draw.line([(bb[0], bb[1]), (bb[0], bb[3]), (bb[2], bb[3]),
#                (bb[2], bb[1]), (bb[0], bb[1])], width=4, fill=(255, 0, 0))

# im.show()

In [6]:
dataset_train = SDDDataSET('train', get_transform(train=True,target_size=params['target_size']))
dataset_val = SDDDataSET('val', get_transform(train=False,target_size=params['target_size']))

# split the dataset in train and test set
torch.manual_seed(1)

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset_train, batch_size=params['batch_size'], shuffle=True, num_workers=4
     ,collate_fn=collate_fn
)

data_loader_val = torch.utils.data.DataLoader(
    dataset_val, batch_size=1, shuffle=False, num_workers=4
     ,collate_fn=collate_fn
)




In [7]:
# # load an instance segmentation model pre-trained on COCO    
# model = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=False, num_classes=2, pretrained_backbone=True,
#                                                             min_size=params['target_size'][0], max_size = params['target_size'][1] )


In [8]:
# load an instance segmentation model pre-trained on COCO    
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, num_classes=2, pretrained_backbone=True,
                                                            min_size=params['target_size'][0], max_size = params['target_size'][1] )

In [9]:

# the computation device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=params['lr'], momentum=0.9, weight_decay=0.0005) #lr 0.001 -> 0.005
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

In [10]:
for epoch in range(10):

    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=250)
    print ("Train done, evaluating.")
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    inference_res = evaluate(model,data_loader_val)
    print('Inference done, computing mAp : ')
    print(evaluate_res(inference_res, iou_threshold = 0.5, score_threshold = 0.05))    
    print(evaluate_res(inference_res, iou_threshold = 0.6, score_threshold = 0.05))
    print('Epoch Done')
    torch.save(model.state_dict(), '/app/host/lacmus/weights/resnet50_FRCNN_SDD_epoch_%i.pth'%epoch)

Epoch: [0]  [  0/639]  eta: 0:27:17  lr: 0.000013  loss: 1.4677 (1.4677)  loss_classifier: 0.7104 (0.7104)  loss_box_reg: 0.0046 (0.0046)  loss_objectness: 0.6910 (0.6910)  loss_rpn_box_reg: 0.0618 (0.0618)  time: 2.5625  data: 1.2208  max mem: 4917
Epoch: [0]  [250/639]  eta: 0:06:16  lr: 0.001970  loss: 0.9690 (0.8856)  loss_classifier: 0.3126 (0.2825)  loss_box_reg: 0.4919 (0.2481)  loss_objectness: 0.1027 (0.2970)  loss_rpn_box_reg: 0.0497 (0.0581)  time: 0.9793  data: 0.0168  max mem: 5185
Epoch: [0]  [500/639]  eta: 0:02:15  lr: 0.003927  loss: 0.9112 (0.9261)  loss_classifier: 0.2973 (0.2998)  loss_box_reg: 0.5188 (0.3820)  loss_objectness: 0.0601 (0.1925)  loss_rpn_box_reg: 0.0312 (0.0517)  time: 0.9794  data: 0.0183  max mem: 5185
Epoch: [0]  [638/639]  eta: 0:00:00  lr: 0.005000  loss: 0.9726 (0.9412)  loss_classifier: 0.3180 (0.3053)  loss_box_reg: 0.5437 (0.4182)  loss_objectness: 0.0649 (0.1664)  loss_rpn_box_reg: 0.0412 (0.0513)  time: 0.9769  data: 0.0175  max mem: 5185


In [10]:
# # uncomment to test evaluation model and show detections

# cpu_device = torch.device("cpu")
# model.eval()
# for images, targets in data_loader:
#     g_images = list(img.to(device) for img in images)

#     if torch.cuda.is_available():
#         torch.cuda.synchronize()
#     outputs = model(g_images)

#     outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
#     res = targets, outputs
#     break

# im = F.to_pil_image(images[0])
# targets
# # im = to_pil_image(dataset[10][0])
# draw = ImageDraw.Draw(im)

# for bb in outputs[0]['boxes'][:10]:
#     draw.line([(bb[0], bb[1]), (bb[0], bb[3]), (bb[2], bb[3]),
#                (bb[2], bb[1]), (bb[0], bb[1])], width=4, fill=(255, 0, 0))

# for bb in targets[0]['boxes'][:10]:
#     draw.line([(bb[0], bb[1]), (bb[0], bb[3]), (bb[2], bb[3]),
#                (bb[2], bb[1]), (bb[0], bb[1])], width=4, fill=(0,255, 0))
# im.show()

# # # This suggests test set is far from perfect