In [1]:
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
from tqdm import tqdm

import torch
from torch.utils.data import Dataset, DataLoader

box = pd.read_csv('../input/car-object-detection/data/train_solution_bounding_boxes (1).csv')
box

In [2]:
sample = cv2.imread('../input/car-object-detection/data/training_images/vid_4_1000.jpg')
sample = cv2.cvtColor(sample, cv2.COLOR_BGR2RGB)
point = box.iloc[0]
pt1 = (int(point['xmin']), int(point['ymax']))
pt2 = (int(point['xmax']), int(point['ymin']))
cv2.rectangle(sample, pt1, pt2, color = (255, 0, 0), thickness = 2)
plt.imshow(sample)

In [3]:
sample = cv2.imread('../input/car-object-detection/data/training_images/vid_4_10040.jpg')
sample = cv2.cvtColor(sample, cv2.COLOR_BGR2RGB)
point = box.iloc[2]
pt1 = (int(point['xmin']), int(point['ymax']))
pt2 = (int(point['xmax']), int(point['ymin']))
cv2.rectangle(sample, pt1, pt2, color = (255, 0, 0), thickness = 2)
plt.imshow(sample)

In [4]:
# 모델훈련을 위해서는, 데이터셋의 target에 Bounding Box, ImageID, area를 지정해줘야 합니다.

# Explain Dataset
# Image_ID : 마스크가 속한 이미지 ID
# area : box의 면적이다. 나중에 예측 Box와 실제 Box의 IoU를 쉽게 구하기 위함이다.
# iscrowd : 물체가 너무 작은데, 하나의 군집으로 박스를 처리하여, 레이블링 했는지에 관한 여부입니다.
#           Detection을 하다보면, labeling과 같이 몇가지 주의해야할 기준이 있습니다.
#          (물체가 숨어있는 경우, 가려져 있는 경우)


class CarDataset(Dataset):
    def __init__(self, df, image_dir, transforms = None):
        super().__init__()
        
        self.image_ids = df["image"].unique()
        self.df = df
        self.image_dir = image_dir
        self.transforms = transforms
        
    def __len__(self):
        return self.image_ids.shape[0]
    
    def __getitem__(self, idx : int):
        image_id = self.image_ids[idx]
        records = self.df[self.df["image"] == image_id]
        image = cv2.imread(f"{self.image_dir}/{image_id}", cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        image = torch.tensor(image)
        image = image.permute(2,0,1)
        
        boxes = records[["xmin", "ymin", "xmax", "ymax"]].values
        
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype = torch.float32)
        
        labels = torch.ones((records.shape[0]), dtype = torch.int64)
        
        target = {}
        target["boxes"] = torch.tensor(boxes)
        target["labels"] = labels
        target["image_id"] = torch.tensor([idx])
        target["area"] = area
        
        if self.transforms:
            sample = {"image" : image, "boxes" : target["boxes"], "labels" : labels}
            sample = self.transforms(**sample)
            image = sample["image"]
            target["boxes"] = torch.stack(tuple(map(torch.tensor, zip(*sample["boxes"])))).permute(1, 0)
        
        return image, target, image_id    

In [5]:
def collate_fn(batch):
    return tuple(zip(*batch))

dir_train = "../input/car-object-detection/data/training_images"
train_ds = CarDataset(box, dir_train)

train_dl = DataLoader(train_ds,batch_size = 8, shuffle = False, num_workers=2, collate_fn = collate_fn)

In [6]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [7]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection import fasterrcnn_resnet50_fpn

model = fasterrcnn_resnet50_fpn(pretrained = True)

In [8]:
num_classes = 2
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
print(model)

In [9]:
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=0.0005, weight_decay=0.0005)

In [10]:
model.train()

num_epochs = 5
for epoch in range(num_epochs):
    for i, (images, targets, image_ids) in enumerate(train_dl):
        optimizer.zero_grad()
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        losses.backward()
        optimizer.step()

        if (i+1) % 10 == 0:
            print(f'Epoch {epoch+1} - Total: {losses:.4f}, Regression: {loss_dict["loss_box_reg"]:.4f}, Classifier: {loss_dict["loss_classifier"]:.4f}')

In [18]:
images = cv2.imread("../input/car-object-detection/data/testing_images/vid_5_26640.jpg", cv2.IMREAD_COLOR)
images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB).astype(np.float32)
images /= 255.0
sample = images
images = torch.tensor(images)
images = images.permute(2,0,1)
images = torch.unsqueeze(images, 0)
images = images.to(device)
model.eval()
cpu_device = torch.device("cpu")

outputs = model(images)
outputs = [{k : v.to(cpu_device) for k, v in t.items()} for t in outputs]
mask = outputs[0]['scores'] > 0.5
boxes = outputs[0]["boxes"][mask].detach().numpy().astype(np.int32)

In [19]:
for box in boxes:
    cv2.rectangle(sample,
                  (box[0], box[1]),
                  (box[2], box[3]),
                  (220, 0, 0), 3)
    
plt.imshow(sample)

In [20]:
outputs