# faster_rcnn

In [2]:
import torch

if torch.cuda.is_available():    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: NVIDIA L4


In [4]:
import os
import random
import shutil
import time
import gc
import copy

import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image

from glob import glob
from tqdm import tqdm

import xml.etree.ElementTree as ET
import csv
from collections import defaultdict

from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms, datasets, models
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

import warnings
warnings.filterwarnings('ignore')

In [5]:
# 디렉토리 지정
all_xml_dir = '/home/sogaksa123/AIFFEL_THON/data/dataset4_0331/with_normal_224/xml_files'

# 원본 이미지
dir_train = "/home/sogaksa123/AIFFEL_THON/data/dataset4_0331/model/Yolov8/dataset/Yolo8_224/Training/data/train/images/"
dir_val = "/home/sogaksa123/AIFFEL_THON/data/dataset4_0331/model/Yolov8/dataset/Yolo8_224/Training/data/val/images/"
dir_test = "/home/sogaksa123/AIFFEL_THON/data/dataset4_0331/model/Yolov8/dataset/Yolo8_224/Training/data/test/images/"

# xml 저장 디렉토리
dir_train_xml = "/home/sogaksa123/AIFFEL_THON/data/dataset4_0331/model/Yolov8/dataset/Yolo8_224/Training/data/train/xmls/"
dir_val_xml = "/home/sogaksa123/AIFFEL_THON/data/dataset4_0331/model/Yolov8/dataset/Yolo8_224/Training/data/val/xmls/"
dir_test_xml = "/home/sogaksa123/AIFFEL_THON/data/dataset4_0331/model/Yolov8/dataset/Yolo8_224/Training/data/test/xmls/"

# csv 저장 디렉토리
csv_train = '/home/sogaksa123/AIFFEL_THON/ex/csv_train'
csv_val = '/home/sogaksa123/AIFFEL_THON/ex/csv_val'
csv_test = '/home/sogaksa123/AIFFEL_THON/ex/csv_test'

In [6]:
# csv로 df 만들어줌
box_train = pd.read_csv(csv_train)
box_val = pd.read_csv(csv_val)
box_test = pd.read_csv(csv_test)

In [7]:
class Dataset(Dataset):
    '''train_ds = Dataset(box, dir_train)'''
    def __init__(self, df, image_dir, transforms=None):
        super().__init__()
        
        self.image_ids = df["Image_Name"].unique() # all image filenames(겹치지 않게. 2978개, list array)
        self.df = df
        self.image_dir = image_dir # dir to image files
        self.transforms = transforms

    def __getitem__(self, idx: int):
        '''
        image_id: filename dataframe. 2978개
        records: 하나의 이미지에 포함된 박스들의 df
        image: filename(image_id)이미지 불러옴
        '''
        image_id = self.image_ids[idx]
        records = self.df[self.df["Image_Name"] == image_id] # 하나의 이미지에 포함된 박스들을 records라는 df로 
        
        image = cv2.imread(os.path.join(self.image_dir, image_id), cv2.IMREAD_COLOR)
        heights, widths = image.shape[:2]
        # torch에서 사용할 수 있게 바꿔줌
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0 # normalization
        image = torch.tensor(image) # ndarray to tensor
        image = image.permute(2,0,1) # 차원 순서 변경
        
        # 박스 좌표값들만 array로 반환
        boxes = records[["X_min", "Y_min", "X_max", "Y_max"]].values
        # 박스들 전체 면적을 tensor로 (box3개 -> tesnsor[3,]
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        '''   height = ymax - ymin           width = xmax - xmin      '''
        area = torch.as_tensor(area, dtype=torch.float32)
        
        masks = []
        for box in boxes:
            # 박스 크기에 맞는 ndarray 생성
            mask = np.zeros([int(heights), int(widths)], np.uint8)
            # 박스값 할당
            masks.append(cv2.rectangle(mask, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), 1, -1))
        # array to tensor
        masks = torch.tensor(masks, dtype=torch.uint8)

        label_ls = []
        for c in records['Label']:
            if c == 'crackles':
                classes = 0
            elif c == 'wheezes':
                classes = 1
            elif c == 'normal':
                classes = 2
            label_ls.append(classes)
        labels = torch.tensor(label_ls, dtype=torch.int64)

        target = {}
        target["boxes"] = torch.tensor(boxes)
        target["labels"] = labels
        target['masks'] = masks
        target["image_id"] = torch.tensor([idx])
        target["area"] = area

        num_objs = len(label_ls) 
        target["iscrowd"] = torch.zeros((num_objs,), dtype=torch.int64)

        # augmentation(지금은 None)
        if self.transforms:
            sample = {"image": image, "boxes": target["boxes"], "labels": labels}
            sample = self.transforms(**sample)
            image = sample["image"]
            target["boxes"] = torch.stack(tuple(map(torch.tensor, zip(*sample["boxes"])))).permute(1, 0)

        return image, target

    def __len__(self):
        return self.image_ids.shape[0]

In [8]:
# load

train_ds = Dataset(box_train, dir_train)
val_ds = Dataset(box_val, dir_val)
test_ds = Dataset(box_test, dir_test)

In [9]:
def collate_fn(batch):
    return tuple(zip(*batch))

train_dl = DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=4, collate_fn=collate_fn)
val_dl = DataLoader(val_ds, batch_size=16, shuffle=False, num_workers=4, collate_fn=collate_fn)
test_dl = DataLoader(test_ds, batch_size=16, shuffle=False, num_workers=4, collate_fn=collate_fn)

## model

In [11]:
def get_model_instance_segmentation(num_classes):
  
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

In [12]:
model = get_model_instance_segmentation(4)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 
model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

### parameter

In [13]:
num_epochs = 30
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)

## train

In [14]:
print(device)

cuda


In [16]:
print('----------------------train start--------------------------')
for epoch in range(num_epochs):
    start = time.time()
    model.train()
    i = 0    
    epoch_loss = 0
    for imgs, annotations in train_dl:
        i += 1
        imgs = list(img.to(device) for img in imgs)
        annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
        loss_dict = model(imgs, annotations) 
        losses = sum(loss for loss in loss_dict.values())        

        optimizer.zero_grad()
        losses.backward()
        optimizer.step() 
        epoch_loss += losses
    print(f'epoch : {epoch+1}, Loss : {epoch_loss}, time : {time.time() - start}')

----------------------train start--------------------------
epoch : 1, Loss : 54.321231842041016, time : 206.76458048820496
epoch : 2, Loss : 37.4193115234375, time : 204.99448108673096
epoch : 3, Loss : 33.554840087890625, time : 204.97400736808777
epoch : 4, Loss : 32.03372573852539, time : 204.95419478416443
epoch : 5, Loss : 30.220813751220703, time : 205.11805772781372
epoch : 6, Loss : 28.233070373535156, time : 204.92293787002563
epoch : 7, Loss : 26.77448844909668, time : 204.62311434745789
epoch : 8, Loss : 26.179807662963867, time : 204.6527214050293
epoch : 9, Loss : 24.72422981262207, time : 204.5974633693695
epoch : 10, Loss : 23.199947357177734, time : 204.73705387115479
epoch : 11, Loss : 22.408634185791016, time : 204.79533863067627
epoch : 12, Loss : 21.66169548034668, time : 204.7125153541565
epoch : 13, Loss : 20.837278366088867, time : 204.99058747291565
epoch : 14, Loss : 20.633699417114258, time : 204.91739130020142
epoch : 15, Loss : 19.22830581665039, time : 205

In [17]:
# weight save
torch.save(model.state_dict(),f'model_{num_epochs}.pt')
# weight load
model.load_state_dict(torch.load(f'model_{num_epochs}.pt'))

<All keys matched successfully>

## prediction

In [18]:
def make_prediction(model, img, threshold):
    model.eval()
    preds = model(img)
    for id in range(len(preds)) :
        idx_list = []

        for idx, score in enumerate(preds[id]['scores']) :
            if score > threshold : 
                idx_list.append(idx)

        preds[id]['boxes'] = preds[id]['boxes'][idx_list]
        preds[id]['labels'] = preds[id]['labels'][idx_list]
        preds[id]['scores'] = preds[id]['scores'][idx_list]

    return preds

In [20]:
with torch.no_grad(): 
    # 테스트셋 배치사이즈= 2
    for imgs, annotations in test_dl:
        imgs = list(img.to(device) for img in imgs)

        pred = make_prediction(model, imgs, 0.5)
        print(pred)
        break


[{'boxes': tensor([[ 47.4355,  54.7546, 155.9595, 219.0713],
        [  0.8857,  53.7017,  54.2913, 218.4235],
        [154.6310,  57.1863, 189.6456, 218.2519]], device='cuda:0'), 'labels': tensor([2, 2, 2], device='cuda:0'), 'scores': tensor([0.9636, 0.9215, 0.8861], device='cuda:0')}, {'boxes': tensor([[116.1536,  55.9697, 220.0185, 217.8556],
        [  3.4093,  56.1787, 124.5192, 218.2382]], device='cuda:0'), 'labels': tensor([2, 2], device='cuda:0'), 'scores': tensor([0.8441, 0.8394], device='cuda:0')}, {'boxes': tensor([[  0.6995,  55.8153,  74.9493, 219.1944],
        [161.4565,  54.0115, 224.0000, 219.3746],
        [ 72.8456,  58.9890, 161.4222, 217.7514],
        [  0.6279,  57.0063,  30.7992, 219.3555]], device='cuda:0'), 'labels': tensor([2, 2, 2, 2], device='cuda:0'), 'scores': tensor([0.9826, 0.9476, 0.9414, 0.7894], device='cuda:0')}, {'boxes': tensor([[  0.0000,  55.7511,  54.5792, 218.7144],
        [ 84.2990,  54.4633, 161.4490, 217.3032],
        [159.7381,  53.4695,

In [None]:
# _idx = 1
# print("Target : ", annotations[_idx]['labels'])
# plot_image_from_output(imgs[_idx], annotations[_idx])
# print("Prediction : ", pred[_idx]['labels'])
# plot_image_from_output(imgs[_idx], pred[_idx])

## testset evaluation

In [21]:
from tqdm import tqdm

labels = []
preds_adj_all = []
annot_all = []

for im, annot in tqdm(test_dl, position = 0, leave = True):
    im = list(img.to(device) for img in im)
    #annot = [{k: v.to(device) for k, v in t.items()} for t in annot]

    for t in annot:
        labels += t['labels']

    with torch.no_grad():
        preds_adj = make_prediction(model, im, 0.5)
        preds_adj = [{k: v.to(torch.device('cpu')) for k, v in t.items()} for t in preds_adj]
        preds_adj_all.append(preds_adj)
        annot_all.append(annot)

100%|██████████| 28/28 [00:23<00:00,  1.21it/s]


In [23]:
!git clone https://github.com/Pseudo-Lab/Tutorial-Book-Utils

%cd Tutorial-Book-Utils/
import utils_ObjectDetection as utils

Cloning into 'Tutorial-Book-Utils'...
remote: Enumerating objects: 45, done.[K
remote: Counting objects: 100% (45/45), done.[K
remote: Compressing objects: 100% (39/39), done.[K
remote: Total 45 (delta 18), reused 17 (delta 5), pack-reused 0[K
Unpacking objects: 100% (45/45), 11.60 KiB | 1.45 MiB/s, done.
/home/sogaksa123/AIFFEL_THON/ex/Tutorial-Book-Utils


In [24]:
sample_metrics = []
for batch_i in range(len(preds_adj_all)):
    sample_metrics += utils.get_batch_statistics(preds_adj_all[batch_i], annot_all[batch_i], iou_threshold=0.5) 

true_positives, pred_scores, pred_labels = [torch.cat(x, 0) for x in list(zip(*sample_metrics))]  # 배치가 전부 합쳐짐
precision, recall, AP, f1, ap_class = utils.ap_per_class(true_positives, pred_scores, pred_labels, torch.tensor(labels))
mAP = torch.mean(AP)
print(f'mAP : {mAP}')
print(f'AP : {AP}')

mAP : 0.31641158718916523
AP : tensor([0.0000, 0.3408, 0.6084], dtype=torch.float64)
