In [1]:
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os
from PIL import Image
from pycocotools.coco import COCO
import numpy as np

In [2]:
data_dir = '../../../dataset/COCO2017'
img_dir = os.path.join(data_dir, 'val2017')
ann_file = os.path.join(data_dir, 'annotations/instances_val2017.json')

In [3]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [4]:
class CustomCocoDetection(datasets.CocoDetection):
    def __init__(self, root, annFile, transform=None):
        super(CustomCocoDetection, self).__init__(root, annFile, transform)
        self.coco = COCO(annFile)
        self.ids = list(self.coco.imgs.keys())

    def __getitem__(self, index):
        img_id = self.ids[index]
        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        anns = self.coco.loadAnns(ann_ids)
        path = self.coco.loadImgs(img_id)[0]['file_name']

        img = Image.open(os.path.join(self.root, path)).convert('RGB')

        if self.transform is not None:
            img = self.transform(img)

        boxes = []
        labels = []

        for ann in anns:
            bbox = np.array(ann['bbox'])
            bbox[2:4] = bbox[0:2] + bbox[2:4]
            
            max_val, min_val = bbox.max(), bbox.min()
            bbox = ( bbox - min_val ) / ( max_val - min_val )
            
            boxes.append(bbox)
            labels.append(ann['category_id'])
        
        if len(boxes) > 0:
            boxes = np.vstack(boxes)    
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            labels = torch.as_tensor(labels, dtype=torch.int64)
        else:
            boxes = torch.zeros((0, 4), dtype=torch.float32)
            labels = torch.zeros((0,), dtype=torch.int64)
            
        target = {'boxes': boxes, 'labels': labels}

        return img, target

In [5]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((32,32))
])

In [6]:
valid_dataset = CustomCocoDetection(img_dir, ann_file, transform=transform)

loading annotations into memory...
Done (t=0.59s)
creating index...
index created!
loading annotations into memory...
Done (t=0.53s)
creating index...
index created!


In [7]:
valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=True, collate_fn=collate_fn)

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = fasterrcnn_resnet50_fpn(pretrained=True)

num_classes = 91  # COCO 데이터셋의 클래스 수 (배경 포함)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

model = model.to(device)



In [9]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [10]:
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    for batch_idx, (images, targets) in enumerate(valid_loader):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()

        running_loss += losses.item()

        if( batch_idx + 1 % 100 == 0 ):
            print(f'Epoch [{epoch + 1}/{num_epochs}], Batch [{batch_idx + 1}/{len(valid_loader)}], Loss: {losses.item():.4f}')

    lr_scheduler.step()
    epoch_loss = running_loss / len(valid_loader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}")

    """
    model.eval()
    with torch.no_grad():
        for images, targets in valid_loader:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            outputs = model(images)
            # 검증 결과 처리 (필요 시)
    """

#torch.save(model.state_dict(), 'fasterrcnn_resnet50_coco.pth')

  return F.conv2d(input, weight, bias, self.stride,


Epoch 1/5, Loss: 0.6527
Epoch 2/5, Loss: 0.5935
Epoch 3/5, Loss: 0.5824
Epoch 4/5, Loss: 0.5666
Epoch 5/5, Loss: 0.5680


In [11]:
print("")


