In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import torch
from pathlib import Path

import xml.etree.ElementTree as ET

In [2]:
images_path = Path('./Datasets/images')
anno_path = Path('./Datasets/annotations')

In [3]:
def filelist(root, file_type):
    """Returns a fully-qualified list of filenames under root directory"""
    return [os.path.join(directory_path, f) for directory_path, directory_name, 
            files in os.walk(root) for f in files if f.endswith(file_type)]

def generate_train_df (anno_path):
    annotations = filelist(anno_path, '.xml')
    anno_list = []
    for anno_path in annotations:
        root = ET.parse(anno_path).getroot()
        anno = {}
        anno['filename'] = Path(str(images_path) + '/'+ root.find("./filename").text)
        anno['bb_info'] = [int(root.find("./object/bndbox/xmin").text),int(root.find("./object/bndbox/ymin").text),int(root.find("./object/bndbox/xmax").text),int(root.find("./object/bndbox/ymax").text)]
        anno['class'] = root.find("./object/name").text
        anno_list.append(anno)
    return pd.DataFrame(anno_list)


In [4]:
df_train = generate_train_df(anno_path)

In [5]:
df_train

Unnamed: 0,filename,bb_info,class
0,Datasets/images/road712.png,"[98, 140, 139, 182]",speedlimit
1,Datasets/images/road706.png,"[136, 92, 177, 135]",speedlimit
2,Datasets/images/road289.png,"[61, 140, 146, 227]",stop
3,Datasets/images/road538.png,"[115, 169, 149, 205]",speedlimit
4,Datasets/images/road510.png,"[89, 201, 133, 245]",speedlimit
...,...,...,...
872,Datasets/images/road535.png,"[100, 254, 180, 334]",speedlimit
873,Datasets/images/road284.png,"[111, 133, 165, 187]",speedlimit
874,Datasets/images/road290.png,"[105, 157, 171, 224]",speedlimit
875,Datasets/images/road723.png,"[115, 185, 160, 230]",speedlimit


In [6]:
#label encode target
class_dict = {'speedlimit': 0, 'stop': 1, 'crosswalk': 2, 'trafficlight': 3}
df_train['class'] = df_train['class'].apply(lambda x:  class_dict[x])

In [7]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [8]:
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.ColorJitter(brightness=0.6, contrast=0.6, saturation=0.6, hue=0.6, p=0.4),
    A.Resize(height=300, width=447, p=1.0),
    ToTensorV2()
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=[]))

In [9]:
y_transform = A.Compose([
    A.Resize(height=300, width=447, p=1.0),
    ToTensorV2()
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=[]))

In [10]:
X = df_train[['filename', 'bb_info']]
Y = df_train['class']

In [11]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

In [12]:
from torch.utils.data import Dataset, DataLoader

class RoadDataset(Dataset):
    def __init__(self, paths, bb, y, transforms=False):
        self.transforms = transforms
        self.paths = paths.values
        self.bb = bb.values
        self.y = y.values
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, idx):
        path = self.paths[idx]
        y_class = self.y[idx]
        bb_info = self.bb[idx]
        
        image = cv2.imread(str(path))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        
        if self.transforms:

            tf_result = self.transforms(image=image , bboxes=[bb_info], class_labels=y_class)
            x = tf_result['image']
            y_bb = tf_result['bboxes'][0]
            y_bb = torch.tensor(y_bb, dtype=torch.float32)
            x = torch.tensor(x, dtype=torch.float32)

        
        
        return x, y_class, y_bb

In [13]:
train_ds = RoadDataset(X_train['filename'],X_train['bb_info'] ,y_train, transforms=transform)
valid_ds = RoadDataset(X_val['filename'],X_val['bb_info'],y_val,transforms=y_transform)

In [14]:
batch_size = 11
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=batch_size)

In [43]:
import torch

import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
import torchvision

import tqdm

In [31]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor


def get_model_instance_segmentation(num_classes):
    # COCO 에서 미리 학습된 인스턴스 분할 모델을 읽어옵니다
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")

    num_classes = num_classes
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

In [32]:
model = get_model_instance_segmentation(num_classes=4)

In [126]:
iterator = tqdm.tqdm(train_dl)

for images, y_class, y_bb in iterator :
    d = {}
    targets = []
    
    for i in range(len(images)):
        d['boxes'] = y_bb[[i]]
        d['labels'] = y_class[[i]]
        targets.append(d)
    output = model(images, targets)
    print(output)

  2%|▏         | 1/64 [01:30<1:34:37, 90.12s/it]

{'loss_classifier': tensor(172.5298, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(0., grad_fn=<DivBackward0>), 'loss_objectness': tensor(86.1844, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0.4941, grad_fn=<DivBackward0>)}


  3%|▎         | 2/64 [02:48<1:26:06, 83.33s/it]

{'loss_classifier': tensor(174.4635, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(4.1543, grad_fn=<DivBackward0>), 'loss_objectness': tensor(66.2407, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(3.1223, grad_fn=<DivBackward0>)}


  5%|▍         | 3/64 [04:05<1:21:28, 80.13s/it]

{'loss_classifier': tensor(165.3902, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(0., grad_fn=<DivBackward0>), 'loss_objectness': tensor(78.2968, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0.4444, grad_fn=<DivBackward0>)}


  6%|▋         | 4/64 [05:25<1:20:04, 80.08s/it]

{'loss_classifier': tensor(164.4387, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(0., grad_fn=<DivBackward0>), 'loss_objectness': tensor(76.9515, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0.3512, grad_fn=<DivBackward0>)}


  8%|▊         | 5/64 [06:51<1:21:08, 82.52s/it]

{'loss_classifier': tensor(193.7045, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(0.3633, grad_fn=<DivBackward0>), 'loss_objectness': tensor(90.6930, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0.1242, grad_fn=<DivBackward0>)}


  9%|▉         | 6/64 [08:15<1:20:02, 82.81s/it]

{'loss_classifier': tensor(166.6313, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(0., grad_fn=<DivBackward0>), 'loss_objectness': tensor(70.9572, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0.2494, grad_fn=<DivBackward0>)}


 11%|█         | 7/64 [09:41<1:19:36, 83.80s/it]

{'loss_classifier': tensor(165.9057, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(0.5558, grad_fn=<DivBackward0>), 'loss_objectness': tensor(76.5231, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0.2033, grad_fn=<DivBackward0>)}


 12%|█▎        | 8/64 [11:06<1:18:46, 84.41s/it]

{'loss_classifier': tensor(193.2180, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(0., grad_fn=<DivBackward0>), 'loss_objectness': tensor(99.2584, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(2.4652, grad_fn=<DivBackward0>)}


 14%|█▍        | 9/64 [12:28<1:16:36, 83.58s/it]

{'loss_classifier': tensor(164.2964, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(0., grad_fn=<DivBackward0>), 'loss_objectness': tensor(79.5439, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0.9624, grad_fn=<DivBackward0>)}


 16%|█▌        | 10/64 [13:49<1:14:21, 82.62s/it]

{'loss_classifier': tensor(192.4595, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(0., grad_fn=<DivBackward0>), 'loss_objectness': tensor(81.5475, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0.5489, grad_fn=<DivBackward0>)}


 17%|█▋        | 11/64 [15:05<1:11:26, 80.88s/it]

{'loss_classifier': tensor(140.1245, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(0., grad_fn=<DivBackward0>), 'loss_objectness': tensor(66.7225, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0.2413, grad_fn=<DivBackward0>)}


 19%|█▉        | 12/64 [16:24<1:09:23, 80.06s/it]

{'loss_classifier': tensor(178.7942, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(0., grad_fn=<DivBackward0>), 'loss_objectness': tensor(94.5397, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0.8721, grad_fn=<DivBackward0>)}


 20%|██        | 13/64 [17:49<1:09:26, 81.71s/it]

{'loss_classifier': tensor(158.4449, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(0., grad_fn=<DivBackward0>), 'loss_objectness': tensor(74.0840, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0.6502, grad_fn=<DivBackward0>)}


 22%|██▏       | 14/64 [19:15<1:09:07, 82.94s/it]

{'loss_classifier': tensor(184.8655, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(0., grad_fn=<DivBackward0>), 'loss_objectness': tensor(81.8620, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0.2532, grad_fn=<DivBackward0>)}


 23%|██▎       | 15/64 [20:37<1:07:25, 82.57s/it]

{'loss_classifier': tensor(175.9402, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(0., grad_fn=<DivBackward0>), 'loss_objectness': tensor(89.3878, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(0.3868, grad_fn=<DivBackward0>)}


 23%|██▎       | 15/64 [21:48<1:11:13, 87.21s/it]


KeyboardInterrupt: 