# 고양이, 개 객체 탐지 모델 (데이터셋 직접 만들어보기)

Roboflow 의 api 키는 공개되면 안되기 때문에 .env 파일을 활용하자

`uv add dotenv`

In [62]:
from dotenv import load_dotenv
import os

load_dotenv()

ROBOFLOW_API_KEY = os.environ.get('ROBOFLOW_API_KEY')

# 1. 데이터셋 불러오기 (roboflow)

In [63]:
from roboflow import Roboflow

rf = Roboflow(api_key=ROBOFLOW_API_KEY)
project = rf.workspace("hayley-kim-6vkvh").project("yolo-cat-dog-ydlel")
version = project.version(1)
dataset = version.download("yolov11")

loading Roboflow workspace...
loading Roboflow project...


# 2. 모델 불러오기

In [64]:
data_yaml_path = "C:\Potenup\DeepLearning-YOLO-Study\src\YOLO-Cat-Dog-1\data.yaml"
model_path = "C:\Potenup\DeepLearning-YOLO-Study\models\yolo11n.pt"

In [65]:
from ultralytics import YOLO

model = YOLO(model_path)
model

YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C3k2(
        (cv1): Conv(
          (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(48, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_

# 3. 학습하기

In [67]:
results = model.train(data=data_yaml_path, epochs=100, imgsz=640, save=True)

Ultralytics 8.3.191  Python-3.10.18 torch-2.8.0+cpu CPU (Intel Core(TM) Ultra 9 185H)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=C:\Potenup\DeepLearning-YOLO-Study\src\YOLO-Cat-Dog-1\data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=C:\Potenup\DeepLearning-YOLO-Study\models\yolo11n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train3, nbs=64, nms=False, opset=None, optimize=False, optim

- all (전체)   
전반적인 성능은 mAP50이 0.635, mAP50-95가 0.391

- cats (고양이)   
고양이 탐지 성능이 매우 좋음   
Recall (1), mAP50 (0.913), mAP50-95 (0.556) 높은 값

- dogs (개)   
개 탐지 성능이 고양이에 비해 매우 낮음   
특히 mAP50 (0.358)과 mAP50-95 (0.225)가 상대적으로 낮아,   
개를 탐지하는 데 어려움이 있었음을 알 수 있습니다.

고양이의 결과가 극단적으로 좋고,   
재현률에 비해 정밀도(고양이라고 예측한 것 중에 실제로 고양이인 경우에 대한 확률)가 극단적으로 낮은것으로 볼 때,

전체적으로 고양이라고 예측 하여, 고양이에 대한 재현률이 높은 것으로 추측된다.

# 4. 평가하기

## 1) best 모델 불러오기

In [58]:
load_model = YOLO("C:\Potenup\DeepLearning-YOLO-Study\\runs\detect\\train\weights\\best.pt")

## 2) 평가하기

In [59]:
val_results = load_model.val(data=data_yaml_path, imgsz=640, iou=0.6)
# conf(신뢰도 임계값) : 모델이 객체를 탐지했다고 판단하는 최소한의 신뢰도 값
# iou(IoU (Intersection over Union) Threshold) : 탐지된 박스와 실제 정답 박스(Ground Truth)가 얼마나 겹치는지를 판단하는 임계값

Ultralytics 8.3.191  Python-3.10.18 torch-2.8.0+cpu CPU (Intel Core(TM) Ultra 9 185H)
YOLO11n summary (fused): 100 layers, 2,582,542 parameters, 0 gradients, 6.3 GFLOPs
[34m[1mval: [0mFast image access  (ping: 0.00.0 ms, read: 427.5159.1 MB/s, size: 45.3 KB)
[K[34m[1mval: [0mScanning C:\Potenup\DeepLearning-YOLO-Study\src\YOLO-Cat-Dog-1\valid\labels.cache... 6 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 6/6  0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 1/1 2.7it/s 0.4s
                   all          6          7    0.00353      0.875      0.641      0.397
                  cats          3          3    0.00435          1      0.913      0.556
                  dogs          4          4     0.0027       0.75       0.37      0.238
Speed: 2.5ms preprocess, 47.3ms inference, 0.0ms loss, 9.6ms postprocess per image
Results saved to [1mC:\Potenup\DeepLearning-YOLO-Study\runs\detect\val2[0m


# 5. 사용하기

## 1) 이미지 배치 사이즈로 준비하기(선택)

In [38]:
from PIL import Image

def resize_with_padding(img, target_size, padding_color=(0, 0, 0)):
       
    original_width, original_height = img.size
    target_width, target_height = target_size

    # 1. 이미지의 비율에 맞춰 리사이징 : 가로/세로 비율을 유지
    scale = min(target_width / original_width, target_height / original_height)
    new_width = int(original_width * scale)
    new_height = int(original_height * scale)
    resized_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
    
    # 2. 패딩 추가
    padded_img = Image.new('RGB', target_size, padding_color)

    paste_x = (target_width - new_width) // 2
    paste_y = (target_height - new_height) // 2
    padded_img.paste(resized_img, (paste_x, paste_y))
    
    return padded_img

def create_image_grids_from_list(image_paths, max_width, max_height, rows, cols, padding=0, bg_color=(0, 0, 0)):
    # 그리드당 이미지 수 계산
    grid_size = rows * cols
    
    # 이미지 파일들을 열고 로드
    images = [Image.open(path).convert('RGB') for path in image_paths]
    total_images = len(images)
    
    # 최종 그리드 크기 계산 (패딩 포함)
    grid_width = int(max_width / cols)
    grid_height = int(max_height / rows)

    resized_images = [resize_with_padding(image, (grid_width, grid_height), padding_color=(255, 255, 255)) for image in images]

    # 이미지를 그리드 크기만큼 덩어리(청크)로 나누기
    image_chunks = [resized_images[i:i + grid_size] for i in range(0, total_images, grid_size)]
    
    result_grids = []

    for chunk in image_chunks:
        # 새로운 빈 이미지 생성
        grid_img = Image.new('RGB', (max_width, max_height), bg_color)
        
        # 덩어리(청크)에 있는 이미지를 순서대로 배치
        for i, img in enumerate(chunk):
            row = i // cols
            col = i % cols
    
            # 이미지를 각 셀의 중앙에 오도록 위치 계산
            x_pos = col * (grid_width + padding) + (grid_width - img.width) // 2
            y_pos = row * (grid_height + padding) + (grid_height - img.height) // 2
    
            grid_img.paste(img, (x_pos, y_pos))

        result_grids.append(grid_img)

    return result_grids


base_path = "C:\Potenup\DeepLearning-YOLO-Study\images"
image_files = [base_path + "\cat_dog_test.jpeg", base_path + "\cat_dog_test_2.jpeg",  base_path + "\cat_test.jpg", base_path + "\dog_test.jpeg"]

combined_grids = create_image_grids_from_list(image_files, max_height=640, max_width=640, rows=2, cols=2, padding=10, bg_color=(255, 255, 255))

batch_image_base_path = "C:\Potenup\DeepLearning-YOLO-Study\images\\train"

# 결과 이미지 리스트 확인 및 저장
if combined_grids:
    for i, grid_img in enumerate(combined_grids):
        grid_img.save(batch_image_base_path + f'\\train_batch_{i+1}.jpg')

In [None]:
results = load_model.predict(["C:\Potenup\DeepLearning-YOLO-Study\images\\train\\batch_1.jpg"], save=True)


0: 640x640 (no detections), 68.4ms
Speed: 2.9ms preprocess, 68.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1mC:\Potenup\DeepLearning-YOLO-Study\runs\detect\predict4[0m


In [61]:
results[0].names

{0: 'cats', 1: 'dogs'}

In [57]:
for result in results:
    print(result.boxes)
    for box in result.boxes:
        print(box.cls)

ultralytics.engine.results.Boxes object with attributes:

cls: tensor([])
conf: tensor([])
data: tensor([], size=(0, 6))
id: None
is_track: False
orig_shape: (640, 640)
shape: torch.Size([0, 6])
xywh: tensor([], size=(0, 4))
xywhn: tensor([], size=(0, 4))
xyxy: tensor([], size=(0, 4))
xyxyn: tensor([], size=(0, 4))


모델 사용 결과 탐지가 되지 않는다

기존 YOLO를 그대로 사용했을 때는 구분이 되지만, 오히려 학습 후 탐지가 되지 않는 것으로 보아

학습 데이터의 품질 문제가 클 것으로 예상된다(오히려 학습으로 인해 성능 저하됨)

----

# 고양이-개 객체 탐지 모델 (얼굴만 크롭한 이미지)

In [None]:
from roboflow import Roboflow

rf = Roboflow(api_key = ROBOFLOW_API_KEY)
project = rf.workspace("hayley-kim-6vkvh").project("yolo-cat-dog-2-d9bxa")
version = project.version(1)
dataset = version.download("yolov11")

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in YOLO-Cat-Dog-2-1 to yolov11:: 100%|██████████| 1162/1162 [00:01<00:00, 940.06it/s]





Extracting Dataset Version Zip to YOLO-Cat-Dog-2-1 in yolov11:: 100%|██████████| 70/70 [00:00<00:00, 869.74it/s]


In [48]:
from ultralytics import YOLO

model = YOLO(model_path)
model

YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C3k2(
        (cv1): Conv(
          (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(48, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_

In [49]:
data_yaml_path_2 = "C:\Potenup\DeepLearning-YOLO-Study\src\YOLO-Cat-Dog-2-1\data.yaml"

In [52]:
results = model.train(data=data_yaml_path_2, epochs=100, imgsz=640)

Ultralytics 8.3.191  Python-3.10.18 torch-2.8.0+cpu CPU (Intel Core(TM) Ultra 9 185H)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=C:\Potenup\DeepLearning-YOLO-Study\src\YOLO-Cat-Dog-2-1\data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=C:\Potenup\DeepLearning-YOLO-Study\models\yolo11n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train15, nbs=64, nms=False, opset=None, optimize=False, op

KeyboardInterrupt: 