### 악천후 상황 속 자율주행을 위한 객체 탐지

<br>

[악천후 상황 속 자율주행을 위한 객체 탐지](https://www.aihub.or.kr/aihubdata/data/view.do?currMenu=115&topMenu=100&aihubDataSe=realm&dataSetSn=71626)
<br>[화훼 종류 분류](https://aifactory.space/task/2641/overview)

<br>

In [1]:
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection import fasterrcnn_resnet50_fpn, fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_Weights
from torchvision.transforms import functional as F
from PIL import Image
import json
import tqdm
import torchvision
import sys
import numpy as np

In [None]:
!git clone https://github.com/pytorch/vision.git

In [None]:
pip install pycocotools

In [2]:
from vision.references.detection.engine import train_one_epoch, evaluate

In [3]:
classes_of_interest = ["ambulance", "bicycle", "bus", "motorcycle", "pedestrian", "schoolBus", "trafficLight", "trafficSign", "truck", "vehicle"]
class_name_to_label = {class_name: idx for idx, class_name in enumerate(classes_of_interest)}

class CustomDataset(Dataset):
    def __init__(self, npy_path, annotation_file):
        with open(annotation_file, 'r') as f:
            self.annotations = json.load(f)
        self.images_dict = np.load(npy_path, allow_pickle=True).item()
        self.image_ids = list(self.images_dict.keys())

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        image = Image.fromarray(self.images_dict[image_id]).convert("RGB")
        boxes = []
        labels = []
        for anno in self.annotations[image_id]:
            boxes.append(anno["bbox"])
            labels.append(anno["class"])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        labels = [class_name_to_label[label] for label in labels]
        labels = torch.tensor(labels, dtype=torch.int64)

        image = F.to_tensor(image)
        target = {
            "boxes": boxes,
            "labels": labels,
            "image_id": torch.tensor([idx])
        }
        return image, target

def collate_fn(batch):
    images = [item[0] for item in batch]
    targets = [item[1] for item in batch]
    return images, targets

def get_model(num_classes: int):
    model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.COCO_V1)
    # model = fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_Weights.COCO_V1)

    in_features = model.roi_heads.box_predictor.cls_score.in_features

    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

In [4]:
def main():
    train_npy_path = 'data/train_x.npy'
    label_path = 'data/train_y.json'
    
    dataset = CustomDataset(train_npy_path, label_path)

    indices = torch.randperm(len(dataset)).tolist()
    dataset = torch.utils.data.Subset(dataset, indices[:-50])
    dataset_test = torch.utils.data.Subset(dataset, indices[-50:])

    data_loader = DataLoader(dataset, batch_size=8, shuffle=True, collate_fn=collate_fn, num_workers=2)
    data_loader_test = DataLoader(dataset, batch_size=8, shuffle=True, collate_fn=collate_fn, num_workers=2)
    
    num_classes = len(classes_of_interest) + 1
    model = get_model(num_classes)
    model.to('cuda')
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)
    
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)

    num_epochs = 10

    for epoch in range(num_epochs):
        # 1 에포크동안 학습하고, 10회 마다 출력합니다
        metric_logger, model = train_one_epoch(model, optimizer, data_loader, 'cuda', epoch, print_freq=len(data_loader))
        # 학습률을 업데이트 합니다
        lr_scheduler.step()
        # 테스트 데이터셋에서 평가를 합니다
        # evaluate(model, data_loader_test, device='cuda')
        weight_save_path = 'weights_' + str(epoch) + '.pth'
        torch.save(model.state_dict(), weight_save_path)

    """
    num_epochs = 20
    for epoch in range(1, num_epochs + 1):
        model.train()
        iteration = 0
        for images, targets in tqdm.tqdm(data_loader):
            iteration += 1
            images = [img.to('cuda') for img in images]
            targets = [{k: v.to('cuda') for k, v in t.items()} for t in targets]
 
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
            optimizer.zero_grad()
            #if iteration % 10 == 0:
    # print(f"Epoch {epoch}/{num_epochs}, Iteration {iteration}/{len(data_loader)} Loss: {losses.item()}")
    
        print(f"Epoch {epoch}/{num_epochs} Loss: {losses.item()}")

    weight_save_path = 'weights_' + str(epoch) + '.pth'
    torch.save(model.state_dict(), weight_save_path)
"""

main()

  1%|          | 1/82 [00:02<03:04,  2.28s/it]

Epoch: [0]  [ 0/82]  eta: 0:03:06  lr: 0.000067  loss: 4.0141 (4.0141)  loss_classifier: 2.3879 (2.3879)  loss_box_reg: 0.5220 (0.5220)  loss_objectness: 0.5812 (0.5812)  loss_rpn_box_reg: 0.5230 (0.5230)  time: 2.2799  data: 0.6885


100%|██████████| 82/82 [00:48<00:00,  2.01it/s]

Epoch: [0]  [81/82]  eta: 0:00:00  lr: 0.005000  loss: 1.1867 (1.6699)  loss_classifier: 0.3939 (0.6815)  loss_box_reg: 0.4534 (0.5039)  loss_objectness: 0.0813 (0.1638)  loss_rpn_box_reg: 0.2620 (0.3208)  time: 0.5606  data: 0.0500


100%|██████████| 82/82 [00:48<00:00,  1.68it/s]


Epoch: [0] Total time: 0:00:48 (0.5962 s / it)


  1%|          | 1/82 [00:01<01:35,  1.18s/it]

Epoch: [1]  [ 0/82]  eta: 0:01:36  lr: 0.005000  loss: 1.1380 (1.1380)  loss_classifier: 0.4011 (0.4011)  loss_box_reg: 0.5271 (0.5271)  loss_objectness: 0.0581 (0.0581)  loss_rpn_box_reg: 0.1516 (0.1516)  time: 1.1760  data: 0.6356


100%|██████████| 82/82 [00:47<00:00,  2.17it/s]

Epoch: [1]  [81/82]  eta: 0:00:00  lr: 0.005000  loss: 0.8859 (0.9744)  loss_classifier: 0.2479 (0.2793)  loss_box_reg: 0.3632 (0.3741)  loss_objectness: 0.0563 (0.0643)  loss_rpn_box_reg: 0.2290 (0.2567)  time: 0.5569  data: 0.0511


100%|██████████| 82/82 [00:47<00:00,  1.72it/s]


Epoch: [1] Total time: 0:00:47 (0.5820 s / it)


  1%|          | 1/82 [00:01<01:36,  1.20s/it]

Epoch: [2]  [ 0/82]  eta: 0:01:38  lr: 0.005000  loss: 0.8406 (0.8406)  loss_classifier: 0.2642 (0.2642)  loss_box_reg: 0.3351 (0.3351)  loss_objectness: 0.0509 (0.0509)  loss_rpn_box_reg: 0.1904 (0.1904)  time: 1.1968  data: 0.6570


100%|██████████| 82/82 [00:47<00:00,  2.19it/s]

Epoch: [2]  [81/82]  eta: 0:00:00  lr: 0.005000  loss: 0.7895 (0.8009)  loss_classifier: 0.2160 (0.2236)  loss_box_reg: 0.3009 (0.3112)  loss_objectness: 0.0403 (0.0444)  loss_rpn_box_reg: 0.2262 (0.2218)  time: 0.5557  data: 0.0479


100%|██████████| 82/82 [00:47<00:00,  1.71it/s]


Epoch: [2] Total time: 0:00:47 (0.5837 s / it)


  1%|          | 1/82 [00:01<01:37,  1.21s/it]

Epoch: [3]  [ 0/82]  eta: 0:01:38  lr: 0.000500  loss: 0.9123 (0.9123)  loss_classifier: 0.2754 (0.2754)  loss_box_reg: 0.3400 (0.3400)  loss_objectness: 0.0290 (0.0290)  loss_rpn_box_reg: 0.2679 (0.2679)  time: 1.2067  data: 0.6666


100%|██████████| 82/82 [00:47<00:00,  2.21it/s]

Epoch: [3]  [81/82]  eta: 0:00:00  lr: 0.000500  loss: 0.6987 (0.6944)  loss_classifier: 0.1858 (0.1952)  loss_box_reg: 0.2711 (0.2762)  loss_objectness: 0.0322 (0.0335)  loss_rpn_box_reg: 0.1895 (0.1895)  time: 0.5564  data: 0.0483


100%|██████████| 82/82 [00:48<00:00,  1.71it/s]


Epoch: [3] Total time: 0:00:48 (0.5858 s / it)


  1%|          | 1/82 [00:01<01:40,  1.25s/it]

Epoch: [4]  [ 0/82]  eta: 0:01:42  lr: 0.000500  loss: 0.5020 (0.5020)  loss_classifier: 0.1299 (0.1299)  loss_box_reg: 0.1847 (0.1847)  loss_objectness: 0.0240 (0.0240)  loss_rpn_box_reg: 0.1634 (0.1634)  time: 1.2454  data: 0.7106


100%|██████████| 82/82 [00:47<00:00,  2.19it/s]

Epoch: [4]  [81/82]  eta: 0:00:00  lr: 0.000500  loss: 0.6701 (0.6687)  loss_classifier: 0.1876 (0.1886)  loss_box_reg: 0.2619 (0.2634)  loss_objectness: 0.0283 (0.0321)  loss_rpn_box_reg: 0.1637 (0.1846)  time: 0.5613  data: 0.0526


100%|██████████| 82/82 [00:48<00:00,  1.70it/s]


Epoch: [4] Total time: 0:00:48 (0.5870 s / it)


  1%|          | 1/82 [00:01<01:39,  1.23s/it]

Epoch: [5]  [ 0/82]  eta: 0:01:40  lr: 0.000500  loss: 0.6548 (0.6548)  loss_classifier: 0.1756 (0.1756)  loss_box_reg: 0.2375 (0.2375)  loss_objectness: 0.0411 (0.0411)  loss_rpn_box_reg: 0.2006 (0.2006)  time: 1.2258  data: 0.6791


100%|██████████| 82/82 [00:47<00:00,  2.19it/s]

Epoch: [5]  [81/82]  eta: 0:00:00  lr: 0.000500  loss: 0.6436 (0.6524)  loss_classifier: 0.1776 (0.1841)  loss_box_reg: 0.2660 (0.2605)  loss_objectness: 0.0292 (0.0299)  loss_rpn_box_reg: 0.1874 (0.1780)  time: 0.5573  data: 0.0475


100%|██████████| 82/82 [00:48<00:00,  1.71it/s]


Epoch: [5] Total time: 0:00:48 (0.5857 s / it)


  1%|          | 1/82 [00:01<01:39,  1.23s/it]

Epoch: [6]  [ 0/82]  eta: 0:01:40  lr: 0.000050  loss: 0.5082 (0.5082)  loss_classifier: 0.1309 (0.1309)  loss_box_reg: 0.2238 (0.2238)  loss_objectness: 0.0203 (0.0203)  loss_rpn_box_reg: 0.1332 (0.1332)  time: 1.2281  data: 0.6835


100%|██████████| 82/82 [00:47<00:00,  2.21it/s]

Epoch: [6]  [81/82]  eta: 0:00:00  lr: 0.000050  loss: 0.6435 (0.6420)  loss_classifier: 0.1860 (0.1801)  loss_box_reg: 0.2527 (0.2547)  loss_objectness: 0.0295 (0.0296)  loss_rpn_box_reg: 0.1415 (0.1776)  time: 0.5586  data: 0.0485


100%|██████████| 82/82 [00:48<00:00,  1.70it/s]


Epoch: [6] Total time: 0:00:48 (0.5871 s / it)


  1%|          | 1/82 [00:01<01:39,  1.23s/it]

Epoch: [7]  [ 0/82]  eta: 0:01:40  lr: 0.000050  loss: 0.6592 (0.6592)  loss_classifier: 0.1696 (0.1696)  loss_box_reg: 0.2526 (0.2526)  loss_objectness: 0.0227 (0.0227)  loss_rpn_box_reg: 0.2143 (0.2143)  time: 1.2246  data: 0.6795


100%|██████████| 82/82 [00:48<00:00,  2.16it/s]

Epoch: [7]  [81/82]  eta: 0:00:00  lr: 0.000050  loss: 0.6052 (0.6351)  loss_classifier: 0.1812 (0.1798)  loss_box_reg: 0.2470 (0.2536)  loss_objectness: 0.0292 (0.0284)  loss_rpn_box_reg: 0.1387 (0.1733)  time: 0.5626  data: 0.0531


100%|██████████| 82/82 [00:48<00:00,  1.70it/s]


Epoch: [7] Total time: 0:00:48 (0.5882 s / it)


  1%|          | 1/82 [00:01<01:37,  1.20s/it]

Epoch: [8]  [ 0/82]  eta: 0:01:38  lr: 0.000050  loss: 0.6466 (0.6466)  loss_classifier: 0.2153 (0.2153)  loss_box_reg: 0.2744 (0.2744)  loss_objectness: 0.0283 (0.0283)  loss_rpn_box_reg: 0.1287 (0.1287)  time: 1.1983  data: 0.6572


100%|██████████| 82/82 [00:48<00:00,  2.14it/s]

Epoch: [8]  [81/82]  eta: 0:00:00  lr: 0.000050  loss: 0.5813 (0.6348)  loss_classifier: 0.1721 (0.1790)  loss_box_reg: 0.2432 (0.2531)  loss_objectness: 0.0280 (0.0295)  loss_rpn_box_reg: 0.1331 (0.1732)  time: 0.5598  data: 0.0501


100%|██████████| 82/82 [00:48<00:00,  1.70it/s]


Epoch: [8] Total time: 0:00:48 (0.5892 s / it)


  1%|          | 1/82 [00:01<01:36,  1.19s/it]

Epoch: [9]  [ 0/82]  eta: 0:01:37  lr: 0.000005  loss: 0.5909 (0.5909)  loss_classifier: 0.1776 (0.1776)  loss_box_reg: 0.2498 (0.2498)  loss_objectness: 0.0241 (0.0241)  loss_rpn_box_reg: 0.1394 (0.1394)  time: 1.1878  data: 0.6482


 49%|████▉     | 40/82 [00:24<00:25,  1.64it/s]


KeyboardInterrupt: 

### Model Inference

In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
from torchvision.transforms import functional as F
from PIL import Image
import json
import torchvision
import sys
import numpy as np

In [6]:
classes_of_interest = ["ambulance", "bicycle", "bus", "motorcycle", "pedestrian", "schoolBus", "trafficLight", "trafficSign", "truck", "vehicle"]

class CustomDataset(Dataset):
    def __init__(self, npy_path):
        self.images_dict = np.load(npy_path, allow_pickle=True).item()
        self.image_ids = list(self.images_dict.keys())

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        image = Image.fromarray(self.images_dict[image_id]).convert("RGB")
        image = F.to_tensor(image)
        return image, image_id

def get_model(num_classes: int):
    model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.COCO_V1)

    in_features = model.roi_heads.box_predictor.cls_score.in_features

    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

def predict(model, dataloader, device):
    model.eval()
    predictions = {}
    with torch.no_grad():
        for images, image_ids in dataloader:
            images = [img.to(device) for img in images]
            outputs = model(images)
            for idx, output in enumerate(outputs):
                image_id = image_ids[idx]
                boxes = output['boxes'].cpu().numpy().tolist()
                scores = output['scores'].cpu().numpy().tolist()
                labels = output['labels'].cpu().numpy().tolist()
                predictions[image_id] = [
                    {
                        "class": classes_of_interest[label],
                        "bbox": box,
                        "score": score
                    }
                    for label, box, score in zip(labels, boxes, scores)
                ]
    return predictions

def main():
    model_weights = 'weights_4.pth'
    test_npy_path = 'data/test_x.npy'
    output = 'submit.json'

    num_classes = len(classes_of_interest) + 1
    model = get_model(num_classes)
    model.load_state_dict(torch.load(model_weights))
    model = model.to('cuda')

    test_dataset = CustomDataset(test_npy_path)
    test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

    pred_dict = predict(model, test_dataloader, 'cuda')

    with open(output, 'w') as f:
        json.dump(pred_dict, f, indent=4)

main()

### Reference

[TORCHVISION 객체 검출 미세조정(FINETUNING) 튜토리얼](https://tutorials.pytorch.kr/intermediate/torchvision_tutorial.html)