### 이미지에서 객체 라벨 인식

In [4]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
from torchvision.transforms import functional as F
from PIL import Image
import json
import torchvision
import sys
import numpy as np
import tqdm

classes_of_interest = ["object","target"]
class_name_to_label = {class_name: idx for idx, class_name in enumerate(classes_of_interest)}


class CustomDataset(Dataset):
    def __init__(self, npy_path, annotation_file):
        with open(annotation_file, 'r') as f:
            self.annotations = json.load(f)
        self.images_dict = np.load(npy_path, allow_pickle=True).item()
        self.image_ids = list(self.images_dict.keys())

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        image = Image.fromarray(self.images_dict[image_id]).convert("RGB")
        boxes = []
        labels = []
        for anno in self.annotations[image_id]:
            boxes.append(anno["bbox"])
            labels.append(anno["class"])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        labels = [class_name_to_label[label] for label in labels]
        labels = torch.tensor(labels, dtype=torch.int64)

        image = F.to_tensor(image)
        target = {
            "boxes": boxes,
            "labels": labels,
            "image_id": torch.tensor([idx])
        }
        return image, target

def collate_fn(batch):
    images = [item[0] for item in batch]
    targets = [item[1] for item in batch]
    return images, targets

def get_model(num_classes: int):
    model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.COCO_V1)

    in_features = model.roi_heads.box_predictor.cls_score.in_features

    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model
def main(num_epochs, batch_size):
    train_npy_path = 'data/train_image.npy'
    label_path = 'data/train_label.json'
    weight_save_path = 'data/weights_00.pth'

    dataset = CustomDataset(train_npy_path, label_path)
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn, num_workers=2)
    num_classes = len(classes_of_interest) + 1
    model = get_model(num_classes)
    model.to('cuda')
    optimizer = torch.optim.SGD(model.parameters(), lr=0.008, momentum=0.9, weight_decay=0.0005)

    for epoch in range(1, num_epochs+1):
        model.train()
        iteration = 0
        for images, targets in tqdm.tqdm(data_loader):
            iteration += 1
            images = [img.to('cuda') for img in images]
            targets = [{k: v.to('cuda') for k, v in t.items()} for t in targets]
            #print(images, targets)
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
            optimizer.zero_grad()
            # if iteration % 10 == 0:
                # print(f"Epoch {epoch}/{num_epochs}, Iteration {iteration}/{len(data_loader)} Loss: {losses.item()}")
        print(f"Epoch {epoch}/{num_epochs}, Loss: {round(losses.item(), 4)}")   
    torch.save(model.state_dict(), weight_save_path.replace('00', str(epoch)))

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"]="1"

num_epochs = 10
batch_size = 16
main(num_epochs, batch_size)

100%|██████████| 19/19 [00:20<00:00,  1.08s/it]


Epoch 1/10, Loss: 0.1147


100%|██████████| 19/19 [00:20<00:00,  1.08s/it]


Epoch 2/10, Loss: 0.093


100%|██████████| 19/19 [00:20<00:00,  1.09s/it]


Epoch 3/10, Loss: 0.0376


100%|██████████| 19/19 [00:21<00:00,  1.12s/it]


Epoch 4/10, Loss: 0.0692


100%|██████████| 19/19 [00:20<00:00,  1.08s/it]


Epoch 5/10, Loss: 0.0297


100%|██████████| 19/19 [00:20<00:00,  1.10s/it]


Epoch 6/10, Loss: 0.0421


100%|██████████| 19/19 [00:20<00:00,  1.09s/it]


Epoch 7/10, Loss: 0.0385


100%|██████████| 19/19 [00:21<00:00,  1.11s/it]


Epoch 8/10, Loss: 0.0399


100%|██████████| 19/19 [00:20<00:00,  1.09s/it]


Epoch 9/10, Loss: 0.0277


100%|██████████| 19/19 [00:20<00:00,  1.08s/it]


Epoch 10/10, Loss: 0.0527


In [21]:
import torch
import torch.nn as nn
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
from torchvision.transforms import functional as F
from PIL import Image
import json
import torchvision
import sys
import numpy as np

classes_of_interest = ["object","target"]

transform = T.Compose([
                        T.Resize((1080, 1920)),
                    ])

class CustomDataset(Dataset):
    def __init__(self, npy_path, transform=None):
        self.images_dict = np.load(npy_path, allow_pickle=True).item()
        self.image_ids = list(self.images_dict.keys())
        self.transform = transform

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        image = Image.fromarray(self.images_dict[image_id]).convert("RGB")
        image = F.to_tensor(image)
        if self.transform is not None:
            image = self.transform(image)
        return image, image_id

def get_model(num_classes: int):
    model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.COCO_V1)

    in_features = model.roi_heads.box_predictor.cls_score.in_features

    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

def predict(model, dataloader, device):
    model.eval()
    predictions = {}
    with torch.no_grad():
        for images, image_ids in dataloader:
            images = [img.to(device) for img in images]
            outputs = model(images)
            for idx, output in enumerate(outputs):
                image_id = image_ids[idx]
                boxes = output['boxes'].cpu().numpy().tolist()
                scores = output['scores'].cpu().numpy().tolist()
                labels = output['labels'].cpu().numpy().tolist()
                predictions[image_id] = [
                    {
                        "class": classes_of_interest[label],
                        "bbox": box,
                        "score": score
                    }
                    for label, box, score in zip(labels, boxes, scores)
                ]
    return predictions

def main(batch_size):
    model_weights = 'data/weights_10.pth'
    test_npy_path = 'data/test_image.npy'
    output = 'data/submit.json'

    num_classes = len(classes_of_interest) + 1
    model = get_model(num_classes)
    model.load_state_dict(torch.load(model_weights))
    model = model.to('cuda')

    test_dataset = CustomDataset(test_npy_path, transform=transform)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    pred_dict = predict(model, test_dataloader, 'cuda')

    with open(output, 'w') as f:
        json.dump(pred_dict, f, indent=4)

In [22]:
batch_size = 16
main(batch_size)

