In [1]:
import os

import torch
import torch.nn.functional as F
import numpy as np

from tqdm.notebook import tqdm
import albumentations as A
from PIL import Image

from models import get_model

https://tutorials.pytorch.kr/beginner/saving_loading_models.html#checkpoint
https://huggingface.co/docs/transformers/ko/tasks/object_detection
https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/image_processing_detr.py#L1773
https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/image_processing_detr.py#L323

In [2]:
def get_file_extension(filename):
    _, extension = os.path.splitext(filename)
    return extension.lstrip(".")

In [3]:
result_path = "../Result/detect"
test_db_path = "../data/detection/test"
test_res_path = "%s/predictions/v1" % (result_path)

root_dir = "/workspace/traffic_light/data/detection/test/"
model_ckpt = "/workspace/traffic_light/output/facebook/detr-resnet-50/v2/best.pth"

In [17]:
if not os.path.exists(test_res_path):
    os.makedirs(test_res_path)

In [5]:
img_paths = os.path.join(root_dir, "images")
img_files = [img for img in os.listdir(img_paths) if ".jpg" in img]
img_files.sort()

In [6]:
model_name = "facebook/detr-resnet-50"
model = get_model(model_name, "cpu").to("cpu")
model.load_state_dict(torch.load(model_ckpt)["model_state_dict"])

model.eval()

HuggingfaceDetrModel(
  (model): DetrModel(
    (backbone): DetrConvModel(
      (conv_encoder): DetrConvEncoder(
        (model): FeatureListNet(
          (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
          (bn1): DetrFrozenBatchNorm2d()
          (act1): ReLU(inplace=True)
          (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
          (layer1): Sequential(
            (0): Bottleneck(
              (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (bn1): DetrFrozenBatchNorm2d()
              (act1): ReLU(inplace=True)
              (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (bn2): DetrFrozenBatchNorm2d()
              (drop_block): Identity()
              (act2): ReLU(inplace=True)
              (aa): Identity()
              (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        

In [7]:
from transformers import AutoImageProcessor

image_processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-101")


In [8]:
test_augments_for_huggingface = A.Compose(
    [
        A.Resize(height=800, width=800, p=1.0),
    ]
)


In [9]:
# img_path1 = os.path.join(img_paths, img_files[0])
# image = Image.open(img_path1)
# width, height = image.size
# print(width, height)
# image = np.array(image)
# image = test_augments_for_huggingface(image=image)['image']

# input = image_processor(images = image)
# pixel_values = torch.tensor(input['pixel_values'])
# target = {'pixel_mask': torch.tensor(input['pixel_mask'])}
# # print(np.expand_dims(pixel_values,0).shape)
# # outputs = model(pixel_values, target)
# outputs = model(pixel_values, target)  # pixel_value, pixel_mask
# logits = outputs["logits"]
# pred_boxes = outputs["pred_boxes"]
# prob = F.softmax(logits, -1)

# scores, labels = prob[..., :-1].max(-1)  # cx, cy, w, h : [0,1]

In [10]:
# scale_fct = torch.tensor([width, height, width, height])
# boxes = pred_boxes * scale_fct

In [11]:
# threshold = 0.5
# results = []
# for s, l, b in zip(scores, labels, boxes):
#     score = s[s > threshold]
#     label = l[s > threshold]
#     box = b[s > threshold]
#     results.append({"scores": score, "labels": label, "boxes": box})

In [12]:
# threshold = 0.5
# score = scores[scores > threshold]
# bbbox = boxes[scores > threshold]

In [20]:
threshold = 0.15

with torch.no_grad():
    for img_file in tqdm(img_files):
        img_ext = get_file_extension(img_file)
        txt_filename = img_file.replace(img_ext, "txt")
        txt_filename = os.path.join(img_paths, txt_filename)
        txt_filename = txt_filename.replace(
            "data/detection/test/images", "Result/detect/predictions/v1"
        )

        img_path = os.path.join(img_paths, img_file)
        image = Image.open(img_path)
        width, height = image.size

        image = np.array(image)
        image = test_augments_for_huggingface(image=image)["image"]

        input = image_processor(images=image)
        pixel_values = torch.tensor(input["pixel_values"])
        target = {"pixel_mask": torch.tensor(input["pixel_mask"])}

        outputs = model(pixel_values, target)  # pixel_value, pixel_mask
        logits = outputs["logits"]
        pred_boxes = outputs["pred_boxes"]
        prob = F.softmax(logits, -1)
        scores, labels = prob[..., :-1].max(-1)  # cx, cy, w, h : [0,1]

        scale_fct = torch.tensor([width, height, width, height])
        boxes = pred_boxes * scale_fct

        # apply threshold
        mask = scores > threshold
        scores = scores[mask]
        labels = labels[mask]
        boxes = boxes[mask]

        num_obj = len(scores)
        print(txt_filename)
        with open(txt_filename, "w") as f:
            for score, label, box in zip(scores, labels, boxes):
                f.write(
                    "%d %lf %lf %lf %lf %lf\n"
                    % (label, box[0], box[1], box[2], box[3], score)
                )
            f.write("helloworld")

        if len(boxes) == 0:
            print(f"'{img_file}' has no boxes")

  0%|          | 0/13505 [00:00<?, ?it/s]

/workspace/traffic_light/Result/detect/predictions/v1/10000000.txt
/workspace/traffic_light/Result/detect/predictions/v1/10000001.txt
/workspace/traffic_light/Result/detect/predictions/v1/10000002.txt
/workspace/traffic_light/Result/detect/predictions/v1/10000003.txt
/workspace/traffic_light/Result/detect/predictions/v1/10000004.txt
/workspace/traffic_light/Result/detect/predictions/v1/10000005.txt
/workspace/traffic_light/Result/detect/predictions/v1/10000006.txt
/workspace/traffic_light/Result/detect/predictions/v1/10000007.txt
/workspace/traffic_light/Result/detect/predictions/v1/10000008.txt


KeyboardInterrupt: 

In [13]:
!pip install --upgrade jupyter
# main(args.model_ckpt)  # 절대경로로 변경할 것.

Collecting jupyter
  Downloading jupyter-1.1.1-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting notebook (from jupyter)
  Downloading notebook-7.2.2-py3-none-any.whl.metadata (10 kB)
Collecting jupyter-console (from jupyter)
  Downloading jupyter_console-6.6.3-py3-none-any.whl.metadata (5.8 kB)
Collecting nbconvert (from jupyter)
  Downloading nbconvert-7.16.4-py3-none-any.whl.metadata (8.5 kB)
Collecting jupyterlab (from jupyter)
  Downloading jupyterlab-4.2.5-py3-none-any.whl.metadata (16 kB)
Collecting async-lru>=1.0.0 (from jupyterlab->jupyter)
  Downloading async_lru-2.0.4-py3-none-any.whl.metadata (4.5 kB)
Collecting httpx>=0.25.0 (from jupyterlab->jupyter)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jupyter-lsp>=2.0.0 (from jupyterlab->jupyter)
  Downloading jupyter_lsp-2.2.5-py3-none-any.whl.metadata (1.8 kB)
Collecting jupyter-server<3,>=2.4.0 (from jupyterlab->jupyter)
  Downloading jupyter_server-2.14.2-py3-none-any.whl.metadata (8.4 kB)
Collect