In [None]:
import os
import json
import torch
import numpy as np
import torchvision.datasets as dset
import torchvision.transforms as T
from torchvision.transforms import v2
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from pycocotools.coco import COCO
from transform import transform_COCO
from PIL import Image
import matplotlib
from matplotlib.patches import Rectangle


class COCO_Dataset(Dataset):
    def __init__(self, split='TRAIN'):
        super().__init__()
        self.rootDataset = os.path.join('/', 'home', 'dblab', 'coco')
        self.trainAnnoPath = os.path.join(self.rootDataset, 'annotations', 'instances_train2017.json')
        self.traincategoryPath = os.path.join(self.rootDataset, 'annotations', 'categories.json')
        self.trainImagePath = os.path.join(self.rootDataset, 'train2017')
        self.coco = COCO(self.trainAnnoPath)

        whole_image_ids = self.coco.getImgIds()  # original length of train2017 is 118287

        self.image_ids = []

        # to remove not annotated image idx
        self.no_anno_list = []

        for idx in whole_image_ids:
            annotations_ids = self.coco.getAnnIds(imgIds=idx, iscrowd=False)
        if len(annotations_ids) == 0:
            self.no_anno_list.append(idx)
        else:
            self.image_ids.append(idx)

        self.load_classes() # read class information
        self.split = split

    def __getitem__(self, idx):

        visualize = True

        image, (w, h) = self.load_image(idx)

        annotation = self.load_annotations(idx)

        boxes = torch.FloatTensor(annotation[:, :4])
        labels = torch.LongTensor(annotation[:, 4])

        if labels.nelement() == 0:  # no labeled img exists.
            visualize = True
        # data augmentation
        image, boxes, labels, segmentations = transform_COCO(image, boxes, labels, self.split)

        return image, boxes, labels



In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
import json
import os
from tqdm import tqdm

# 경로 설정
data_dir = os.path.join('/', 'home', 'dblab', 'coco')
train_dir = os.path.join(data_dir, 'train2017')
val_dir = os.path.join(data_dir, 'val2017')
annotations_path = os.path.join(data_dir, 'annotations', 'instances_train2017.json')

# 클래스 정보 로드
with open(annotations_path, 'r') as f:
    annotations = json.load(f)

# 클래스 ID와 이름 매핑
categories = {category['id']: category['name'] for category in annotations['categories']}

# 이미지 경로 및 해당하는 라벨 가져오기
image_paths = []
image_labels = []

for image_info in tqdm(annotations['images']):
    image_id = image_info['id']
    image_path = os.path.join(train_dir, image_info['file_name'])
    image_paths.append(image_path)

    labels = []
    for annotation in annotations['annotations']:
        if annotation['image_id'] == image_id:
            labels.append(annotation['category_id'])
    image_labels.append(labels)

# 데이터셋 클래스 정의
class COCODataset(Dataset):
    def __init__(self, image_paths, image_labels, transform=None):
        self.image_paths = image_paths
        self.image_labels = image_labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert('RGB')
        labels = self.image_labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, labels

# 데이터 전처리 및 변환 정의
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# 데이터셋 및 데이터로더 생성
dataset = COCODataset(image_paths, image_labels, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# 사전 훈련된 ResNet50 불러오기
model = models.resnet50(pretrained=True)

# ResNet의 마지막 레이어 변경 (수정이 필요할 수 있음)
num_classes = len(categories)
model.fc = torch.nn.Linear(model.fc.in_features, num_classes)

# 모델 훈련
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 옵티마이저와 손실 함수 정의 (적절한 것으로 변경 가능)
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
criterion = torch.nn.CrossEntropyLoss()

# 예시로 5 에폭 동안 훈련
num_epochs = 5
for epoch in range(num_epochs):
    for images, labels in dataloader:
        images = images.to(device)
        labels = torch.tensor([label for sublist in labels for label in sublist]).to(device)  # flatten labels

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

# 여기서는 훈련된 모델을 얻었어요. 이를 사용하여 객체 감지를 수행할 수 있습니다.


In [None]:
import torch
import torchvision
from torchvision import transforms
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torch.utils.data import DataLoader
from torchvision.datasets import CocoDetection
from pycocotools.coco import COCO

# 데이터셋과 어노테이션 파일의 경로
train_images_dir = '/home/dblab/coco/train2017'
val_images_dir = '/home/dblab/coco/val2017'
train_annotation_path = '/home/dblab/coco/annotations/instances_train2017.json'
val_annotation_path = '/home/dblab/coco/annotations/instances_val2017.json'

# COCO 데이터셋 로드
train_coco = COCO(train_annotation_path)
val_coco = COCO(val_annotation_path)

# 클래스 목록 가져오기
categories = train_coco.loadCats(train_coco.getCatIds())
print(categories)
categories_names = [category['name'] for category in categories]

# 클래스를 숫자 라벨로 매핑
category_dict = {category['id']: i + 1 for i, category in enumerate(categories)}

# 데이터셋 변환
data_transform = transforms.Compose([
    transforms.ToTensor(),
])

# COCO 데이터셋 로더 설정
train_dataset = CocoDetection(root=train_images_dir, annFile=train_annotation_path, transform=data_transform)
val_dataset = CocoDetection(root=val_images_dir, annFile=val_annotation_path, transform=data_transform)

# 데이터로더 생성
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)

# Faster R-CNN 모델 초기화
model = fasterrcnn_resnet50_fpn(pretrained=True)

# 모델을 GPU로 이동
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# 손실 함수 및 옵티마이저 설정
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
criterion = torch.nn.CrossEntropyLoss()

# 학습
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    for images, targets in train_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    # 검증
    model.eval()
    val_loss = 0.0
    for images, targets in val_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        with torch.no_grad():
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            val_loss += losses.item()

    lr_scheduler.step()
    print(f"Epoch {epoch+1}/{num_epochs}, Validation Loss: {val_loss/len(val_loader)}")


In [None]:
import os
import math
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
import new_datasets.transforms as T
from torch.utils.data import DataLoader
from matplotlib.patches import Rectangle
from new_datasets.coco_utils import ConvertCocoPolysToMask
from util.label_info import coco_color_array, coco_label_list


class COCODatasetV1(torchvision.datasets.CocoDetection):
    def __init__(self, img_folder, ann_file, transforms, visualization=False):
        super().__init__(img_folder, ann_file)
        # 117266
        self.ids = list(sorted(self.coco.imgToAnns.keys()))
        self._parses = ConvertCocoPolysToMask()
        self._transforms = transforms
        self._visualization = visualization
        if self._visualization:
            self.coco_color = coco_color_array
            self.coco_label = coco_label_list

    def __getitem__(self, idx):
        img, target = super().__getitem__(idx)
        image_id = self.ids[idx]
        target = {'image_id': image_id, 'annotations': target}
        img, target = self._parses(img, target)
        if self._transforms is not None:
            img, target = self._transforms(img, target)
        if self._visualization:
            self.visualize(img, target)
        return img, target

    def collate_fn(self, batch):
        batch = list(zip(*batch))
        batch[0] = self.batched_tensor_from_tensor_list(batch[0])
        return batch

    def max_by_axis(self, the_list):
        maxes = the_list[0]
        for sublist in the_list[1:]:
            for index, item in enumerate(sublist):
                maxes[index] = max(maxes[index], item)
        return maxes

    def batched_tensor_from_tensor_list(self, images, size_divisible=32):
        # if torchvision._is_tracing():
        #     # batch_images() does not export well to ONNX
        #     # call _onnx_batch_images() instead
        #     return self._onnx_batch_images(images, size_divisible)

        max_size = self.max_by_axis([list(img.shape) for img in images])
        stride = float(size_divisible)
        max_size = list(max_size)
        max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride)
        max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride)

        batch_shape = [len(images)] + max_size
        batched_imgs = images[0].new_full(batch_shape, 0)
        for i in range(batched_imgs.shape[0]):
            img = images[i]
            batched_imgs[i, : img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
        return batched_imgs

    def visualize(self, img, target):

        # De-Normalize
        if isinstance(self._transforms.transforms[-1], T.Normalize):
            std = self._transforms.transforms[-1].std
            mean = self._transforms.transforms[-1].mean

            # numpy
            img_np = np.array(img.permute(1, 2, 0), np.float32)  # C, W, H
            img_np *= np.array(std)
            img_np += np.array(mean)
            img_np = np.clip(img_np, 0, 1)
        else:
            img_np = np.array(img.permute(1, 2, 0), np.float32)
            img_np = np.clip(img_np, 0, 1)

        # visualize img
        plt.figure('input')
        plt.imshow(img_np)

        # visualize target
        boxes = target['boxes']
        labels = target['labels']
        print('num objects : {}'.format(len(boxes)))

        for i in range(len(boxes)):

            x1 = boxes[i][0]
            y1 = boxes[i][1]
            x2 = boxes[i][2]
            y2 = boxes[i][3]

            # labels
            plt.text(x=x1 - 5,
                    y=y1 - 5,
                    s=str(self.coco_label[labels[i]]),
                    bbox=dict(boxstyle='round4',
                            facecolor=self.coco_color[labels[i]],
                            alpha=0.9))

            # boxes
            plt.gca().add_patch(Rectangle(xy=(x1, y1),
                                        width=x2 - x1,
                                        height=y2 - y1,
                                        linewidth=1,
                                        edgecolor=self.coco_color[labels[i]],
                                        facecolor='none'))

        plt.show()
        return


if __name__ == '__main__':
    root = "/home/dblab/coco"
    image_set = "train"

    img_folder = os.path.join(root, f'{image_set}2017')
    ann_file = os.path.join(root, 'annotations', f'instances_{image_set}2017.json')
    transforms = T.Compose([
        T.RandomHorizontalFlip(),
        T.RandomResize([800], max_size=1333),
        T.ToTensor(),
        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    dataset = COCODatasetV1(img_folder, ann_file, transforms)

    img, target = dataset.__getitem__(0)
    print("the shape of imgs :", img.size())
    print("target['boxes'] :", target['boxes'])
    print("target keys :", target.keys())
    print("len: ", dataset.__len__())

    train_sampler = torch.utils.data.RandomSampler(dataset)
    train_batch_sampler = torch.utils.data.BatchSampler(train_sampler,
                                                        batch_size=2,
                                                        drop_last=True)
    data_loader = DataLoader(dataset,
                            batch_sampler=train_batch_sampler,
                            num_workers=4,
                            collate_fn=dataset.collate_fn)

    for i, (img, target) in enumerate(data_loader):
        print(img.shape)
        '''
        torch.Size([2, 3, 800, 1152])
        torch.Size([2, 3, 800, 1088])
        torch.Size([2, 3, 1056, 1216])
        torch.Size([2, 3, 800, 1088])
        '''
        if i == 4:
            break