In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
import torchvision
import torchvision.transforms as transforms
import time
from PIL import Image
import numpy as np
import cv2
import matplotlib.pyplot as plt
from datetime import datetime

class PIDrayDataset(Dataset):
    def __init__(self, images_dir, labels_dir, transform=None):
        self.images_dir = images_dir
        self.labels_dir = labels_dir
        self.transform = transform
        self.image_files = [f for f in os.listdir(images_dir) if f.endswith('.png')]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.images_dir, img_name)
        label_path = os.path.join(self.labels_dir, img_name.replace('.png', '.txt'))

        # 이미지 로드
        image = Image.open(img_path).convert("RGB")

        # 레이블 로드 및 변환
        boxes = []
        labels = []
        with open(label_path, 'r') as f:
            for line in f:
                label, x, y, w, h = map(float, line.strip().split())
                labels.append(int(label))
                
                # 상대 좌표를 절대 좌표로 변환
                img_height, img_width = image.height, image.width
                x_center = x * img_width
                y_center = y * img_height
                box_width = w * img_width
                box_height = h * img_height
                
                # 중심점 좌표를 좌상단 좌표로 변환
                x_min = x_center - box_width / 2
                y_min = y_center - box_height / 2
                x_max = x_center + box_width / 2
                y_max = y_center + box_height / 2
                
                boxes.append([x_min, y_min, x_max, y_max])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {}
        target['boxes'] = boxes
        target['labels'] = labels

        if self.transform:
            image = self.transform(image)

        return image, target

# 전처리 함수
transform = transforms.Compose([
    transforms.ToTensor(),
])

In [2]:
# 데이터셋 생성
root = 'E:/GitHub/Python_Study/기계학습 심화/final_exam_x-ray_cnn/pidray'
train_images_dir = root+'/train/images'
train_labels_dir = root+'/train/labels'
test_images_dir = root+'/test/images'
test_labels_dir = root+'/test/labels'

train_data = PIDrayDataset(train_images_dir, train_labels_dir, transform=transform)
test_data = PIDrayDataset(test_images_dir, test_labels_dir, transform=transform)

batch_size = 4
# 데이터로더 생성
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

In [3]:
# Gpu 사용
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"Using {device} device")

# 사전 학습된 Faster R-CNN 모델 로드
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) #weights='DEFAULT' or weights='COCO_V1' <- 이거임

Using cuda device




In [4]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from datetime import datetime
import time

# 분류기 헤드 수정 (데이터셋에 맞게)
num_classes = 13  # 배경을 포함한 클래스 수 (12개 클래스 + 배경)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# 모델을 GPU로 이동
model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [5]:
# 옵티마이저 설정
params = [p for p in model.parameters() if p.requires_grad]
# optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

optimizer = optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()

In [7]:
# 훈련 루프
num_epochs = 10
for epoch in range(num_epochs):
    epoch_start_time = time.time()  # 에폭 시작 시간 기록
    model.train()
    current_loss= 0.0
    
    for batch_num, (images, targets) in enumerate(train_loader):
        start_time = time.time()
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        # backward propagation
        optimizer.zero_grad()   # 미분값 리셋
        losses.backward()
        optimizer.step()
        
        if batch_num % 500 == 0:
            current_time = time.time()
            batch_time = current_time - start_time
            total_time = current_time - epoch_start_time
            lr = optimizer.param_groups[0]['lr']
            current_loss = losses.item()
            print('epoch: %d, batch_num: %d, lr: %.4f, current_loss: %.3f, batch_time: %.3f, total_time: %.3f, ' % (
                epoch, batch_num, lr, current_loss, batch_time, total_time), "current time: ", datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        
    with torch.no_grad():
        model.eval()  # test시에는 drop out을 적용하면 안됨.
        total_samples = 0.0
        correct_samples = 0.0
        for batch_num, (images, targets) in enumerate(test_loader):
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            outputs = model(images)
            
            # Simple accuracy calculation (Note: This is not a standard way to evaluate object detection models)
            for output, target in zip(outputs, targets):
                pred_boxes = output['boxes']
                true_boxes = target['boxes']

                # Just a dummy example to show how you might compare boxes
                # This doesn't represent real object detection evaluation
                if len(pred_boxes) == len(true_boxes):
                    correct_samples += 1
                total_samples += 1

        accuracy = 100 * float(correct_samples) / float(total_samples)
        print('Accuracy: %.3f' % accuracy)

    
    if(epoch%1==0):
        # 모델 저장
        torch.save(model.state_dict(), 'faster_rcnn_pidray_'+str(epoch)+'.pth')

# 모델 저장
torch.save(model.state_dict(), 'faster_rcnn_pidray.pth')

  return F.conv2d(input, weight, bias, self.stride,


epoch: 0, batch_num: 0, lr: 0.0001, current_loss: 0.447, batch_time: 1.319, total_time: 1.572,  current time:  2024-05-21 01:14:41
epoch: 0, batch_num: 500, lr: 0.0001, current_loss: 0.122, batch_time: 6.192, total_time: 3592.395,  current time:  2024-05-21 02:14:33
epoch: 0, batch_num: 1000, lr: 0.0001, current_loss: 0.347, batch_time: 5.785, total_time: 7280.838,  current time:  2024-05-21 03:16:02
epoch: 0, batch_num: 1500, lr: 0.0001, current_loss: 0.218, batch_time: 11.623, total_time: 10844.396,  current time:  2024-05-21 04:15:29
epoch: 0, batch_num: 2000, lr: 0.0001, current_loss: 0.146, batch_time: 3.189, total_time: 14381.025,  current time:  2024-05-21 05:14:25
epoch: 0, batch_num: 2500, lr: 0.0001, current_loss: 0.257, batch_time: 8.307, total_time: 18153.596,  current time:  2024-05-21 06:17:22
epoch: 0, batch_num: 3000, lr: 0.0001, current_loss: 0.142, batch_time: 9.128, total_time: 22066.783,  current time:  2024-05-21 07:22:29
epoch: 0, batch_num: 3500, lr: 0.0001, curr

ValueError: not enough values to unpack (expected 3, got 2)

In [8]:
torch.save(model.state_dict(), 'faster_rcnn_pidray.pth')

In [12]:
import torch

with torch.no_grad():
    model.eval()  # test시에는 drop out을 적용하면 안됨.
    total_samples = 0.0
    correct_samples = 0.0
    for batch_num, (images, targets) in enumerate(test_loader):
        if batch_num == 100:  # 이미지 테스트를 1000장으로 제한
            break

        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        outputs = model(images)
        
        # Simple accuracy calculation (Note: This is not a standard way to evaluate object detection models)
        for output, target in zip(outputs, targets):
            pred_boxes = output['boxes']
            true_boxes = target['boxes']

            # Just a dummy example to show how you might compare boxes
            # This doesn't represent real object detection evaluation
            if len(pred_boxes) == len(true_boxes):
                correct_samples += 1
            total_samples += 1

    accuracy = 100 * float(correct_samples) / float(total_samples)
    print('Accuracy: %.3f' % accuracy)


KeyboardInterrupt: 