In [None]:
import torch

if torch.cuda.is_available():    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

In [None]:
import os
import numpy as np
import matplotlib.patches as patches
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
from PIL import Image
import torchvision
from torchvision import transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchinfo import summary
import time
import cv2
from datetime import datetime

In [None]:
classes = ["Baton", "Pliers", "Hammer", "Powerbank", 
        "Scissors", "Wrench", "Gun", "Bullet", 
        "Sprayer", "Handcuffs", "Knife", "Lighter"]

def draw_bounding_boxes(image_path, label_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    with open(label_path, 'r') as file:
        lines = file.readlines()

    fig, ax = plt.subplots()
    ax.imshow(image)

    for line in lines:
        label, x_min, y_min, w, h = map(float, line.strip().split(' '))
        label = int(label)
        x_min, y_min, w, h = int(x_min), int(y_min), int(w), int(h)

        rect = patches.Rectangle((x_min, y_min), w, h, linewidth=2, edgecolor='red', facecolor='none')
        ax.add_patch(rect)

        plt.text(x_min, y_min, classes[label], color='green', fontsize=12, backgroundcolor='white')

    plt.show()

# 예시로 함수를 호출하는 부분
image_path = '../pidray/train/images/xray_00101.png'
label_path = '../pidray/train/labels/xray_00101.txt'

draw_bounding_boxes(image_path, label_path)

In [None]:
def generate_boxes(label_path):
    boxes = []
    with open(label_path, 'r') as file:
        lines = file.readlines()
        
    for line in lines:
        label, x_min, y_min, w, h = map(float, line.strip().split(' '))
        label = int(label)
        
        x_max = x_min+w
        y_max = y_min+h

        boxes.append([x_min, y_min, x_max, y_max])
    
    return boxes


def generate_labels(label_path):
    labels = []
    with open(label_path, 'r') as file:
        lines = file.readlines()
        
    for line in lines:
        label, x_min, y_min, w, h = map(float, line.strip().split(' '))
        label = int(label)
        labels.append(label)
    
    return labels

def check_ratio(new_size, old_size):
    h_ratio=new_size[0] / old_size[0]
    w_ratio=new_size[1] / old_size[1]
    
    return (h_ratio, w_ratio)

print(generate_boxes(label_path))
print(generate_labels(label_path))
print(check_ratio((800,800),old_size=cv2.imread('../pidray/train/images/xray_17232.png').shape[:2]))

In [None]:
new_size = (800,800)

class PidrayDataset(object):
    def __init__(self, transforms, image_dir, annotation_dir):
        '''
        path: path to train folder or test folder
        '''
        # transform module과 img path 경로를 정의
        self.transforms = transforms
        self.image_dir = image_dir
        self.annotation_dir = annotation_dir
        self.img_names = os.listdir(image_dir)

    def __getitem__(self, idx): #special method
        # load images ad masks
        img_path = os.path.join(self.image_dir, self.img_names[idx])
        label_path = os.path.join(self.annotation_dir, self.img_names[idx].replace('.png', '.txt'))
        
        
        old_size=cv2.imread(img_path).shape[:2]
        h_ratio, w_ratio = check_ratio(new_size, old_size) # 224로 리사이즈시 bbox도 리사이즈 해줄라고함
        
        image = Image.open(img_path).convert("RGB")
        if self.transforms:
            image = self.transforms(image)
            
        
        labels = generate_labels(label_path)
        boxes = generate_boxes(label_path)

        boxes = np.array(boxes) * np.array([w_ratio, h_ratio, w_ratio, h_ratio]) # 박스 리사이즈

        boxes = torch.as_tensor(boxes, dtype=torch.float32) 
        labels = torch.as_tensor(labels, dtype=torch.int64) 
        

        target = {}
        target['labels'] = labels
        target['boxes'] = boxes
        
        return image, target

    def __len__(self):
        return len(self.img_names)

trans = transforms.Compose([  # transforms.Compose : list 내의 작업을 연달아 할 수 있게 호출하는 클래스
        transforms.ToTensor(), # ToTensor : numpy 이미지에서 torch 이미지로 변경
        transforms.Resize(new_size)
    ])

def collate_fn(batch):
    images, targets = list(zip(*batch))
    images = torch.stack(images)
    targets = [{k: v for k, v in t.items()} for t in targets]
    return images, targets

In [None]:
from random import shuffle

from sympy import false, true


train_dataset = PidrayDataset(trans, '../pidray/baton/images','../pidray/baton/labels')
test_dataset = PidrayDataset(trans, '../pidray/easy/images','../pidray/easy/labels')

train_batch_size = 12
test_batch_size = 12
train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_batch_size, collate_fn = collate_fn, shuffle=true)
test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=test_batch_size, collate_fn = collate_fn, shuffle=false)


# 데이터 차원 확인하는 코드
# for images, targets in train_data_loader:
#     print("images: ", images.shape)
#     for target in targets:
#         print("labels: ", target['labels'].shape)
#         print("boxes: ", target['boxes'].shape)
#     print()

In [None]:
def get_model_instance_segmentation(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes+1)

    return model

In [None]:
classes = ["Baton", "Pliers", "Hammer", "Powerbank", 
        "Scissors", "Wrench", "Gun", "Bullet", 
        "Sprayer", "Handcuffs", "Knife", "Lighter"]

num_classes = len(classes)

model = get_model_instance_segmentation(num_classes)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
model.to(device)

In [None]:
num_epochs = 10
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0001,
                                momentum=0.9, weight_decay=0.0005)

In [None]:
print('----------------------train start--------------------------')
print(f"학습 시작 시각 : {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
train_start = time.time()

for epoch in range(3,num_epochs):
    epoch_start = time.time()
    model.train()
    epoch_loss = 0
    for batch_num, (imgs, annotations) in enumerate(train_data_loader): # 이미지당 타겟개수 맞춰줘야함
        imgs = list(img.to(device) for img in imgs)
        annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
        loss_dict = model(imgs, annotations) 
        losses = sum(loss for loss in loss_dict.values())    # 각 배치마다의 loss  

        optimizer.zero_grad()
        losses.backward()
        optimizer.step() 
        epoch_loss += losses  # 한 에포크당 loss
        
        if(batch_num<5):
            print(f'epoch : {epoch+1}, batch_num:{batch_num+1}, Loss : {losses}, time : {time.time() - epoch_start}, current_time : {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}') 
            
        if(batch_num%250 == 0 and batch_num > 5):
            print(f'epoch : {epoch+1}, batch_num:{batch_num+1}, Loss : {losses}, time : {time.time() - epoch_start}, current_time : {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
    
    # 설명: torch.save(model.state_dict(), PATH): 모델의 학습된 매개변수만 저장
    if epoch % 1 == 0:
        torch.save(model.state_dict(),f'./model/model_{epoch}.pt')
    print(f'epoch : {epoch+1}, Loss : {epoch_loss}, time : {time.time() - epoch_start}')
    
print(f"학습 완료 시간 : {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") 
print(f"총 소요시간 : {time.time()-train_start}")

In [None]:
# torch.save(model.state_dict(),f'/model/model_{x}.pt')

모델 테스트

In [None]:
model.load_state_dict(torch.load(f'./model/model_{11}.pt'))  # load_state_dict() 함수에 전달하기 전에 반드시 역직렬화를 해야 함
model.to(device)

In [None]:
def make_prediction(model, img, threshold):
    model.eval()
    preds = model(img)
    for id in range(len(preds)) :
        idx_list = []

        for idx, score in enumerate(preds[id]['scores']) :
            if score > threshold : 
                idx_list.append(idx)

        preds[id]['boxes'] = preds[id]['boxes'][idx_list]
        preds[id]['labels'] = preds[id]['labels'][idx_list]
        preds[id]['scores'] = preds[id]['scores'][idx_list]

    return preds

In [None]:
classes = ["Baton", "Pliers", "Hammer", "Powerbank", 
        "Scissors", "Wrench", "Gun", "Bullet", 
        "Sprayer", "Handcuffs", "Knife", "Lighter"]

# 수정된 plot_image_from_output 함수
def plot_image_from_output(img, annotation):
    img = img.cpu().permute(1, 2, 0).numpy()
    # plt.figure(figsize=(4, 4))
    
    fig, ax = plt.subplots(1)
    ax.imshow(img)
    
    for idx in range(len(annotation["boxes"])):
        xmin, ymin, xmax, ymax = annotation["boxes"][idx].cpu().numpy()
        label = classes[annotation['labels'][idx].item()]
        color = 'r' if label == 1 else 'g' if label == 2 else 'orange'

        rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, linewidth=1, edgecolor=color, facecolor='none')
        ax.add_patch(rect)
        ax.text(xmin, ymin, label, color='white', backgroundcolor='red')

    plt.show()

In [None]:
with torch.no_grad(): 
    # 테스트셋 배치사이즈= 2
    for imgs, annotations in train_data_loader:
        imgs = list(img.to(device) for img in imgs)

        pred = make_prediction(model, imgs, 0.7)
        print(pred)
        break

In [None]:
for _idx in range(12):
    print("Target : ", annotations[_idx]['labels'])
    plot_image_from_output(imgs[_idx], annotations[_idx])
    print("Prediction : ", pred[_idx]['labels'], "score : ",pred[_idx]['scores'] )
    plot_image_from_output(imgs[_idx], pred[_idx])
    print()

모델 정확도 분석

In [None]:
from tqdm import tqdm

labels = []
preds_adj_all = []
annot_all = []

for im, annot in tqdm(test_data_loader, position = 0, leave = True):
    im = list(img.to(device) for img in im)
    #annot = [{k: v.to(device) for k, v in t.items()} for t in annot]

    for t in annot:
        labels += t['labels']

    with torch.no_grad():
        preds_adj = make_prediction(model, im, 0.7)
        preds_adj = [{k: v.to(torch.device('cpu')) for k, v in t.items()} for t in preds_adj]
        preds_adj_all.append(preds_adj)
        annot_all.append(annot)

In [None]:
import utils_ObjectDetection as utils

In [None]:
sample_metrics = []
for batch_i in range(len(preds_adj_all)):
    sample_metrics += utils.get_batch_statistics(preds_adj_all[batch_i], annot_all[batch_i], iou_threshold=0.7) 

true_positives, pred_scores, pred_labels = [torch.cat(x, 0) for x in list(zip(*sample_metrics))]  # 배치가 전부 합쳐짐
precision, recall, AP, f1, ap_class = utils.ap_per_class(true_positives, pred_scores, pred_labels, torch.tensor(labels))
mAP = torch.mean(AP)

print(f'mAP : {mAP}')
for _idx in range(num_classes):
    print(f"{classes[_idx]} AP: {AP[_idx]}")

print(f'AP : {AP}')

모델 구조 확인

In [None]:
summary(model,(12,3,330,330))