In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import json
import math
import glob
import time
from datetime import datetime
from typing import List, Tuple, Dict, Any

import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [None]:
!unzip /content/drive/MyDrive/yolo/Bee.zip

In [None]:
# =========================
# Config
# =========================
class Config:
    DATA_ROOT = "/content/data"
    IMAGES_DIR = os.path.join(DATA_ROOT, "images")
    JSONS_DIR = os.path.join(DATA_ROOT, "jsons")

    OUTPUT_DIR = "/content/drive/MyDrive/yolo/output"
    CHECKPOINT_DIR = os.path.join(OUTPUT_DIR, "checkpoints")
    RESULTS_DIR = os.path.join(OUTPUT_DIR, "results")
    CROPS_DIR = os.path.join(OUTPUT_DIR, "crops")

    INPUT_SIZE = 416
    GRID_SIZE = 13
    ANCHORS = [
        (10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
        (59, 119), (116, 90), (156, 198), (373, 326)
    ]
    NUM_CLASSES = 1
    NUM_ANCHORS = 5

    CLASSIFICATION_INPUT_SIZE = 224
    CLASSIFICATION_NUM_CLASSES = 2
    CLASSIFICATION_CLASSES = ['non_bee', 'bee']

    BATCH_SIZE_GPU = 8
    BATCH_SIZE_CPU = 64
    BASE_LR = 4e-5       # warmup에 사용할 기본 LR
    WARMUP_EPOCHS = 5    # warmup epoch 수
    NUM_EPOCHS = 100
    MOMENTUM = 0.9
    WEIGHT_DECAY = 1e-4

    AUGMENTATION = True
    AUGMENTATION_PROB = 0.5
    HORIZONTAL_FLIP = True
    VERTICAL_FLIP = False
    ROTATION_RANGE = 15
    BRIGHTNESS_RANGE = 0.8
    CONTRAST_RANGE = 0.8

    DEVICE = "cuda"
    SAVE_INTERVAL = 10

    CONFIDENCE_THRESHOLD = 0.3
    NMS_THRESHOLD = 0.45
    MIN_BOX_SIZE = 6

    CLASSIFICATION_CONFIDENCE_THRESHOLD = 0.7

    def __init__(self):
        os.makedirs(self.OUTPUT_DIR, exist_ok=True)
        os.makedirs(self.CHECKPOINT_DIR, exist_ok=True)
        os.makedirs(self.RESULTS_DIR, exist_ok=True)
        os.makedirs(self.CROPS_DIR, exist_ok=True)


In [None]:
# =========================
# Utils
# =========================
def load_json_annotations(json_path: str) -> Dict[str, Any]:
    with open(json_path, 'r', encoding='utf-8') as f:
        return json.load(f)


def convert_to_yolo_format(annotation_data: Dict[str, Any], img_width: int, img_height: int) -> List[List[float]]:
    yolo_annotations = []
    # Use .get() for safety when accessing 'ANNOTATION_INFO'
    for annotation in annotation_data.get('ANNOTATION_INFO', []):
        x_tl = annotation['XTL']
        y_tl = annotation['YTL']
        x_br = annotation['XBR']
        y_br = annotation['YBR']
        center_x = (x_tl + x_br) / 2.0 / img_width
        center_y = (y_tl + y_br) / 2.0 / img_height
        width = (x_br - x_tl) / img_width
        height = (y_br - y_tl) / img_height # Fixed ytl instead of ybr for height calculation
        class_id = 0
        yolo_annotations.append([class_id, center_x, center_y, width, height])
    return yolo_annotations


def preprocess_image(image_path: str, target_size: int = 416) -> Tuple[np.ndarray, float, Tuple[int, int]]:
    # Robust load (Korean path safe)
    image = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_COLOR)
    if image is None:
        raise ValueError(f"이미지를 로드할 수 없습니다: {image_path}")

    original_height, original_width = image.shape[:2]
    scale = min(target_size / original_width, target_size / original_height)
    new_width = int(original_width * scale)
    new_height = int(original_height * scale)

    resized_image = cv2.resize(image, (new_width, new_height))
    padded_image = np.zeros((target_size, target_size, 3), dtype=np.uint8)
    y_offset = (target_size - new_height) // 2
    x_offset = (target_size - new_width) // 2
    padded_image[y_offset:y_offset+new_height, x_offset:x_offset+new_width] = resized_image

    # BGR->RGB and normalize (ImageNet stats)
    padded_image = cv2.cvtColor(padded_image, cv2.COLOR_BGR2RGB)
    padded_image = padded_image.astype(np.float32) / 255.0
    mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
    std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
    padded_image = (padded_image - mean) / std

    return padded_image, scale, (x_offset, y_offset)


def calculate_iou(box1, box2):
    x1_1, y1_1, x2_1, y2_1 = box1
    x1_2, y1_2, x2_2, y2_2 = box2

    # 면적 계산 (가로 * 세로)
    area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
    area2 = (x2_2 - x1_2) * (y2_2 - y1_2)

    # 교집합 좌표
    inter_x1 = max(x1_1, x1_2)
    inter_y1 = max(y1_1, y1_2)
    inter_x2 = min(x2_1, x2_2)
    inter_y2 = min(y2_1, y2_2)

    # 교집합 넓이 (음수 방지)
    inter_width = max(0, inter_x2 - inter_x1)
    inter_height = max(0, inter_y2 - inter_y1)
    intersection = inter_width * inter_height

    # IOU 계산 (분모가 0인 경우 처리)
    union = area1 + area2 - intersection
    if union == 0:
        return 0

    iou = intersection / union
    return iou


def non_max_suppression(boxes: List[List[float]], iou_threshold: float = 0.5) -> List[List[float]]:
    if not boxes:
        return []
    boxes = sorted(boxes, key=lambda x: x[4], reverse=True)
    keep = []
    while boxes:
        current_box = boxes.pop(0)
        keep.append(current_box)
        boxes_to_remove = []
        for i, box in enumerate(boxes):
            iou = calculate_iou(current_box[:4], box[:4])
            if iou > iou_threshold:
                boxes_to_remove.append(i)
        for i in reversed(boxes_to_remove):
            boxes.pop(i)
    return keep


def save_detection_result(image: np.ndarray, output_path: str):
    image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    success, encoded_image = cv2.imencode('.jpg', image_bgr)
    if success:
        encoded_image.tofile(output_path)
    else:
        cv2.imwrite(output_path, image_bgr)
def detect_bees(model: YOLOv2, image_path: str, config: Config,
                confidence_threshold: float = 0.3, nms_threshold: float = 0.45) -> List[List[float]]:
    processed_image, scale, (x_offset, y_offset) = preprocess_image(image_path, config.INPUT_SIZE)
    input_tensor = torch.from_numpy(processed_image).unsqueeze(0).permute(0,3,1,2).float()

    device = torch.device(config.DEVICE if torch.cuda.is_available() else "cpu")
    input_tensor = input_tensor.to(device)
    model = model.to(device)

    model.eval()
    with torch.no_grad():
        predictions = model(input_tensor)

    predictions = predictions.squeeze(0).cpu().numpy()
    boxes = []

    for gy in range(config.GRID_SIZE):
        for gx in range(config.GRID_SIZE):
            for anchor_idx in range(config.NUM_ANCHORS):
                pred = predictions[gy, gx, anchor_idx]
                confidence = torch.sigmoid(torch.tensor(pred[4])).item()

                if confidence > confidence_threshold:
                    cx = (pred[0] + gx) / config.GRID_SIZE
                    cy = (pred[1] + gy) / config.GRID_SIZE
                    w = pred[2]
                    h = pred[3]

                    class_probs = torch.sigmoid(torch.tensor(pred[5:])).numpy()
                    class_id = np.argmax(class_probs)
                    class_confidence = class_probs[class_id]

                    final_confidence = confidence * class_confidence

                    if final_confidence > confidence_threshold:
                        abs_cx = cx * config.INPUT_SIZE
                        abs_cy = cy * config.INPUT_SIZE
                        abs_w = w * config.INPUT_SIZE
                        abs_h = h * config.INPUT_SIZE

                        x1 = int(abs_cx - abs_w / 2)
                        y1 = int(abs_cy - abs_h / 2)
                        x2 = int(abs_cx + abs_w / 2)
                        y2 = int(abs_cy + abs_h / 2)

                        x1 = int((x1 - x_offset) / scale)
                        y1 = int((y1 - y_offset) / scale)
                        x2 = int((x2 - x_offset) / scale)
                        y2 = int((y2 - y_offset) / scale)

                        x1 = max(0, x1)
                        y1 = max(0, y1)
                        x2 = max(0, x2)
                        y2 = max(0, y2)

                        boxes.append([x1, y1, x2, y2, final_confidence, class_id])

    if boxes:
        boxes = non_max_suppression(boxes, nms_threshold)

    return boxes

# ------------------------------
# 바운딩 박스 그리기
# ------------------------------
def draw_bounding_boxes(image: np.ndarray, boxes: list) -> np.ndarray:
    """
    boxes: [x1, y1, x2, y2, confidence, class_id]
    """
    for box in boxes:
        x1, y1, x2, y2, conf, class_id = box
        color = (0, 255, 0)  # 초록색
        cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
        label = f"{class_id}:{conf:.2f}"
        cv2.putText(image, label, (x1, max(0, y1-5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
    return image


In [None]:
# =========================
# Dataset
# =========================
class BeeDataset(Dataset):
    def __init__(self, config: Config, transform=None, is_training=True):
        self.config = config
        self.transform = transform
        self.is_training = is_training

        # 이미지와 JSON 파일 목록 생성
        self.image_files = []
        self.json_files = []

        for filename in os.listdir(config.IMAGES_DIR):
            if filename.endswith(('.jpg', '.jpeg', '.png')):
                image_path = os.path.join(config.IMAGES_DIR, filename)
                json_filename = filename.replace('.jpg', '.json').replace('.jpeg', '.json').replace('.png', '.json')
                json_filename = json_filename.replace('TS_', 'TL_')
                json_path = os.path.join(config.JSONS_DIR, json_filename)
                if os.path.exists(json_path):
                    self.image_files.append(image_path)
                    self.json_files.append(json_path)

        print(f"데이터셋 로드 완료: {len(self.image_files)} 개의 이미지")

        # 데이터 증강
        if is_training and config.AUGMENTATION:
            self.aug_transform = A.Compose([
                A.HorizontalFlip(p=0.5),
                A.VerticalFlip(p=0.1),
                A.RandomBrightnessContrast(p=0.3),
                A.HueSaturationValue(p=0.3),
                A.RandomGamma(p=0.3),
                A.GaussNoise(p=0.2),
                A.Blur(p=0.1),
                A.Resize(config.INPUT_SIZE, config.INPUT_SIZE),
                A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2(),
            ], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))
        else:
            self.aug_transform = A.Compose([
                A.Resize(config.INPUT_SIZE, config.INPUT_SIZE),
                A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2(),
            ], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_path = self.image_files[idx]
        json_path = self.json_files[idx]

        # 이미지 로드
        try:
            image = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_COLOR)
            if image is None:
                image = np.zeros((416, 416, 3), dtype=np.uint8)
            else:
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        except Exception as e:
            print(f"이미지 로드 오류: {image_path}, {e}")
            image = np.zeros((416, 416, 3), dtype=np.uint8)

        # 어노테이션 로드
        annotation_data = load_json_annotations(json_path)
        img_width = annotation_data['IMAGE']['WIDTH']
        img_height = annotation_data['IMAGE']['HEIGHT']

        # YOLO 형식 변환
        yolo_annotations = convert_to_yolo_format(annotation_data, img_width, img_height)

        bboxes, class_labels = [], []
        for annotation in yolo_annotations:
            class_id, cx, cy, w, h = annotation
            # Clip bbox
            cx = np.clip(cx, 0.0, 1.0)
            cy = np.clip(cy, 0.0, 1.0)
            w  = np.clip(w, 0.01, 1.0)
            h  = np.clip(h, 0.01, 1.0)
            bboxes.append([cx, cy, w, h])
            class_labels.append(class_id)

        # 데이터 증강
        if self.transform:
            augmented = self.aug_transform(image=image, bboxes=bboxes, class_labels=class_labels)
            image = augmented['image']
            bboxes = augmented['bboxes']
            class_labels = augmented['class_labels']
        else:
            image = cv2.resize(image, (self.config.INPUT_SIZE, self.config.INPUT_SIZE))
            image = image.astype(np.float32) / 255.0
            image = np.transpose(image, (2, 0, 1))
            image = torch.from_numpy(image).float()

        # dynamic anchor target
        target = self._create_dynamic_anchor_target(bboxes, class_labels)
        return image, target

    def _create_dynamic_anchor_target(self, bboxes, class_labels):
        target = torch.zeros((self.config.GRID_SIZE, self.config.GRID_SIZE,
                              self.config.NUM_ANCHORS, 5 + self.config.NUM_CLASSES))

        for bbox, cls in zip(bboxes, class_labels):
            cx, cy, w, h = bbox
            grid_x = int(cx * self.config.GRID_SIZE)
            grid_y = int(cy * self.config.GRID_SIZE)
            grid_cx = cx * self.config.GRID_SIZE - grid_x
            grid_cy = cy * self.config.GRID_SIZE - grid_y

            # dynamic anchor smoothing
            best_anchor = 0
            best_iou = 0
            for i, (aw, ah) in enumerate(self.config.ANCHORS[:self.config.NUM_ANCHORS]):
                aw_n, ah_n = aw / self.config.INPUT_SIZE, ah / self.config.INPUT_SIZE
                iou = min(w / aw_n, h / ah_n) * min(aw_n / w, ah_n / h)
                iou = np.clip(iou, 0.0, 1.0)  # smoothing
                if iou > best_iou:
                    best_iou = iou
                    best_anchor = i

            if grid_x < self.config.GRID_SIZE and grid_y < self.config.GRID_SIZE:
                target[grid_y, grid_x, best_anchor, 0] = grid_cx
                target[grid_y, grid_x, best_anchor, 1] = grid_cy
                target[grid_y, grid_x, best_anchor, 2] = w
                target[grid_y, grid_x, best_anchor, 3] = h
                target[grid_y, grid_x, best_anchor, 4] = 1.0  # objectness
                target[grid_y, grid_x, best_anchor, 5 + cls] = 1.0

        return target



def create_data_loaders(config: Config, train_ratio: float = 0.8):
    full_dataset = BeeDataset(config)
    total_size = len(full_dataset)
    train_size = int(total_size * train_ratio)
    val_size = total_size - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])

    pin = torch.cuda.is_available()
    train_loader = DataLoader(train_dataset, batch_size=(config.BATCH_SIZE if hasattr(config, 'BATCH_SIZE') else config.BATCH_SIZE_CPU),
                              shuffle=True, num_workers=2, pin_memory=pin)
    val_loader = DataLoader(val_dataset, batch_size=(config.BATCH_SIZE if hasattr(config, 'BATCH_SIZE') else config.BATCH_SIZE_CPU),
                            shuffle=False, num_workers=2, pin_memory=pin)
    return train_loader, val_loader



In [None]:
# =========================
# Model
# =========================
class ConvBlock(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 3,
                 stride: int = 1, padding: int = 1, batch_norm: bool = True):
        super(ConvBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=not batch_norm)
        self.bn = nn.BatchNorm2d(out_channels) if batch_norm else None
        self.leaky_relu = nn.LeakyReLU(0.1)

    def forward(self, x):
        x = self.conv(x)
        if self.bn is not None:
            x = self.bn(x)
        x = self.leaky_relu(x)
        return x


class Darknet19(nn.Module):
    def __init__(self):
        super(Darknet19, self).__init__()
        self.conv1 = ConvBlock(3, 32)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = ConvBlock(32, 64)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv3 = ConvBlock(64, 128)
        self.conv4 = ConvBlock(128, 64, kernel_size=1, padding=0)
        self.conv5 = ConvBlock(64, 128)
        self.pool3 = nn.MaxPool2d(2, 2)
        self.conv6 = ConvBlock(128, 256)
        self.conv7 = ConvBlock(256, 128, kernel_size=1, padding=0)
        self.conv8 = ConvBlock(128, 256)
        self.pool4 = nn.MaxPool2d(2, 2)
        self.conv9 = ConvBlock(256, 512)
        self.conv10 = ConvBlock(512, 256, kernel_size=1, padding=0)
        self.conv11 = ConvBlock(256, 512)
        self.conv12 = ConvBlock(512, 256, kernel_size=1, padding=0)
        self.conv13 = ConvBlock(256, 512)
        self.pool5 = nn.MaxPool2d(2, 2)
        self.conv14 = ConvBlock(512, 1024)
        self.conv15 = ConvBlock(1024, 512, kernel_size=1, padding=0)
        self.conv16 = ConvBlock(512, 1024)
        self.conv17 = ConvBlock(1024, 512, kernel_size=1, padding=0)
        self.conv18 = ConvBlock(512, 1024)

    def forward(self, x):
        route_1 = self.conv1(x)
        route_1 = self.pool1(route_1)
        route_1 = self.conv2(route_1)
        route_1 = self.pool2(route_1)
        route_1 = self.conv3(route_1)
        route_1 = self.conv4(route_1)
        route_1 = self.conv5(route_1)
        route_1 = self.pool3(route_1)
        route_1 = self.conv6(route_1)
        route_1 = self.conv7(route_1)
        route_1 = self.conv8(route_1)
        route_1 = self.pool4(route_1)
        route_1 = self.conv9(route_1)
        route_1 = self.conv10(route_1)
        route_1 = self.conv11(route_1)
        route_1 = self.conv12(route_1)
        route_1 = self.conv13(route_1)

        route_2 = self.pool5(route_1)
        route_2 = self.conv14(route_2)
        route_2 = self.conv15(route_2)
        route_2 = self.conv16(route_2)
        route_2 = self.conv17(route_2)
        route_2 = self.conv18(route_2)

        return route_1, route_2


class YOLOv2(nn.Module):
    def __init__(self, config: Config, num_classes: int = 1, num_anchors: int = 5):
        super(YOLOv2, self).__init__()
        self.config = config # Store config as an attribute
        self.num_classes = num_classes
        self.num_anchors = num_anchors
        self.backbone = Darknet19()
        self.conv19 = ConvBlock(1024, 1024, kernel_size=3, padding=1)
        self.conv20 = ConvBlock(1024, 1024, kernel_size=3, padding=1)
        self.route1_resize = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.1)
        )
        self.conv21 = ConvBlock(512, 64, kernel_size=1, padding=0)
        self.conv22 = nn.Conv2d(1024 + 64, num_anchors * (5 + num_classes), kernel_size=1)

    def forward(self, x):
        route_1, route_2 = self.backbone(x)
        x = self.conv19(route_2); x = self.conv20(x)
        route_1_resized = self.route1_resize(route_1)
        route_1_processed = self.conv21(route_1_resized)
        x = torch.cat([x, route_1_processed], dim=1)

        # 반드시 head 적용
        x = self.conv22(x)  # (B, A*(5+C), H, W)

        b, c, h, w = x.shape
        x = x.view(b, self.num_anchors, 5 + self.num_classes, h, w)
        x = x.permute(0, 3, 4, 1, 2).contiguous()  # (B, H, W, A, 5+C)
        return x

class YOLOLoss(nn.Module):
    def __init__(self, num_classes: int = 1, num_anchors: int = 5,
                 anchors: Tuple[Tuple[int, int], ...] = None, input_size: int = 416,
                 lambda_coord: float = 10.0, lambda_noobj: float = 1.0, lambda_class: float = 2.0):
        super(YOLOLoss, self).__init__()
        self.num_classes = num_classes
        self.num_anchors = num_anchors
        self.input_size = input_size
        if anchors is None:
            anchors = tuple((10, 13), (16, 30), (33, 23), (30, 61), (62, 45))
        self.register_buffer('anchor_ws', torch.tensor([a[0] for a in anchors[:num_anchors]], dtype=torch.float32).view(1, 1, 1, num_anchors))
        self.register_buffer('anchor_hs', torch.tensor([a[1] for a in anchors[:num_anchors]], dtype=torch.float32).view(1, 1, 1, num_anchors))
        self.lambda_coord = lambda_coord
        self.lambda_noobj = lambda_noobj
        self.lambda_class = lambda_class
        self.mse_loss = nn.MSELoss(reduction='sum')
        self.bce_logits = nn.BCEWithLogitsLoss(reduction='sum')

    def forward(self, predictions: torch.Tensor, targets: torch.Tensor) -> torch.Tensor:
        pred_x = predictions[..., 0]
        pred_y = predictions[..., 1]
        pred_w = predictions[..., 2]
        pred_h = predictions[..., 3]
        pred_conf = predictions[..., 4]
        pred_cls = predictions[..., 5:]

        target_x = targets[..., 0]
        target_y = targets[..., 1]
        target_w = targets[..., 2]
        target_h = targets[..., 3]
        target_conf = targets[..., 4]
        target_cls = targets[..., 5:]

        obj_mask = target_conf > 0
        noobj_mask = target_conf == 0

        pred_x_sig = torch.sigmoid(pred_x)
        pred_y_sig = torch.sigmoid(pred_y)
        pred_w_clamped = torch.clamp(pred_w, min=-6.0, max=6.0)
        pred_h_clamped = torch.clamp(pred_h, min=-6.0, max=6.0)

        anchor_ws = self.anchor_ws.to(predictions.device)
        anchor_hs = self.anchor_hs.to(predictions.device)
        pred_w_dec = torch.exp(pred_w_clamped) * (anchor_ws / float(self.input_size))
        pred_h_dec = torch.exp(pred_h_clamped) * (anchor_hs / float(self.input_size))

        eps = 1.0 / float(self.input_size)
        target_w_safe = torch.clamp(target_w, min=eps, max=1.0)
        target_h_safe = torch.clamp(target_h, min=eps, max=1.0)

        coord_loss = self.lambda_coord * (
            self.mse_loss(pred_x_sig[obj_mask], target_x[obj_mask]) +
            self.mse_loss(pred_y_sig[obj_mask], target_y[obj_mask]) +
            self.mse_loss(pred_w_dec[obj_mask], target_w_safe[obj_mask]) +
            self.mse_loss(pred_h_dec[obj_mask], target_h_safe[obj_mask])
        )

        conf_loss_obj = self.bce_logits(pred_conf[obj_mask], target_conf[obj_mask])
        conf_loss_noobj = self.lambda_noobj * self.bce_logits(pred_conf[noobj_mask], target_conf[noobj_mask])
        class_loss = self.lambda_class * self.bce_logits(pred_cls[obj_mask], target_cls[obj_mask])

        total_loss = coord_loss + conf_loss_obj + conf_loss_noobj + class_loss
        batch_size = predictions.size(0)
        return total_loss / max(1, batch_size)

In [None]:
# =========================
# Training
# =========================
def train_model(config: Config, resume_path: str = None):
    print("YOLOv2 벌 감지 모델 학습을 시작합니다...")
    device = torch.device(config.DEVICE if torch.cuda.is_available() else "cpu")
    print(f"사용 디바이스: {device}")

    if torch.cuda.is_available():
        config.BATCH_SIZE = config.BATCH_SIZE_GPU
    else:
        config.BATCH_SIZE = config.BATCH_SIZE_CPU
    print(f"배치 크기: {config.BATCH_SIZE}")

    train_loader, val_loader = create_data_loaders(config)

    model = YOLOv2(config=config, num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS).to(device)

    criterion = YOLOLoss(
        num_classes=config.NUM_CLASSES,
        num_anchors=config.NUM_ANCHORS,
        anchors=tuple(config.ANCHORS[:config.NUM_ANCHORS]),
        input_size=config.INPUT_SIZE,
        lambda_noobj=0.5
    ).to(device)

    optimizer = optim.Adam(model.parameters(), lr=config.BASE_LR, weight_decay=config.WEIGHT_DECAY)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, min_lr=1e-6)

    log_dir = os.path.join(config.OUTPUT_DIR, "logs", datetime.now().strftime("%Y%m%d_%H%M%S"))
    writer = SummaryWriter(log_dir)

    best_val_loss = float('inf')
    start_epoch = 0

    # 체크포인트 재개
    if resume_path is not None:
        if resume_path == 'auto':
            resume_candidate = _find_latest_checkpoint(config.CHECKPOINT_DIR)
            if resume_candidate is not None:
                resume_path = resume_candidate
        if resume_path and os.path.exists(resume_path):
            print(f"재개 체크포인트 로드: {resume_path}")
            ckpt = torch.load(resume_path, map_location=device, weights_only=False)
            model.load_state_dict(ckpt['model_state_dict'])
            if 'optimizer_state_dict' in ckpt:
                optimizer.load_state_dict(ckpt['optimizer_state_dict'])
            if 'scheduler_state_dict' in ckpt and ckpt['scheduler_state_dict'] is not None:
                try:
                    scheduler.load_state_dict(ckpt['scheduler_state_dict'])
                except Exception:
                    pass
            if 'loss' in ckpt:
                best_val_loss = ckpt['loss']
            if 'epoch' in ckpt:
                start_epoch = ckpt['epoch'] + 1
            print(f"재개 시작 에포크: {start_epoch}, best_val_loss: {best_val_loss:.4f}")

    train_global_step = 0
    for epoch in range(start_epoch, config.NUM_EPOCHS):
        # ===== Warm-up Learning Rate 적용 =====
        if epoch < config.WARMUP_EPOCHS:
            lr = config.BASE_LR * (epoch + 1) / config.WARMUP_EPOCHS
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        else:
            for param_group in optimizer.param_groups:
                param_group['lr'] = scheduler.get_last_lr()[0]

        print(f"\nEpoch {epoch+1}/{config.NUM_EPOCHS}, LR: {optimizer.param_groups[0]['lr']:.6f}")

        # --- Training ---
        model.train()
        train_loss = 0.0
        train_batches = 0

        for images, targets in train_loader:
            images = images.to(device)
            targets = targets.to(device)

            optimizer.zero_grad()
            predictions = model(images)
            loss = criterion(predictions, targets)
            if not torch.isfinite(loss):
                print("비유한(loss) 발생: 배치 스킵")
                continue
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            train_loss += loss.item()
            train_batches += 1
            writer.add_scalar('Loss/Train_Batch', loss.item(), train_global_step)
            train_global_step += 1

        avg_train_loss = train_loss / max(1, train_batches)

        # --- Validation ---
        model.eval()
        val_loss = 0.0
        val_batches = 0
        with torch.no_grad():
            for images, targets in val_loader:
                images = images.to(device)
                targets = targets.to(device)
                predictions = model(images)
                loss = criterion(predictions, targets)
                if not torch.isfinite(loss):
                    continue
                val_loss += loss.item()
                val_batches += 1

        avg_val_loss = val_loss / max(1, val_batches)
        scheduler.step(avg_val_loss)

        writer.add_scalar('Loss/Train', avg_train_loss, epoch)
        writer.add_scalar('Loss/Validation', avg_val_loss, epoch)
        writer.add_scalar('Learning_Rate', optimizer.param_groups[0]['lr'], epoch)
        print(f"Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

        # --- Checkpoint 저장 ---
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'loss': avg_val_loss,
            'config': config
        }, os.path.join(config.CHECKPOINT_DIR, 'last_checkpoint.pth'))

        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'loss': best_val_loss,
                'config': config
            }, os.path.join(config.CHECKPOINT_DIR, 'best_model.pth'))
            print(f"새로운 최고 모델 저장: {best_val_loss:.4f}")

        if (epoch + 1) % config.SAVE_INTERVAL == 0:
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'loss': avg_val_loss,
                'config': config
            }, os.path.join(config.CHECKPOINT_DIR, f'model_epoch_{epoch+1}.pth'))

    writer.close()
    print("학습 완료!")
    return model


def main():
    config = Config()
    print("\n모델 학습을 시작합니다...")
    train_model(config, resume_path='auto')
    print("모델 학습이 완료되었습니다!")
    print(f"결과는 {config.OUTPUT_DIR} 폴더에 저장되었습니다.")

main()

In [None]:
# -----------------------------
# Array 기반 preprocess (patch용)
# -----------------------------

def preprocess_image_from_array(image: np.ndarray, input_size: int):
    h, w, _ = image.shape
    scale = min(input_size / w, input_size / h)
    new_w, new_h = int(w * scale), int(h * scale)
    resized = cv2.resize(image, (new_w, new_h))
    canvas = np.zeros((input_size, input_size, 3), dtype=np.uint8)
    x_offset = (input_size - new_w) // 2
    y_offset = (input_size - new_h) // 2
    canvas[y_offset:y_offset+new_h, x_offset:x_offset+new_w, :] = resized
    processed = canvas / 255.0
    return processed.astype(np.float32), scale, (x_offset, y_offset)

# -----------------------------
# Patch 단위 이미지 감지
# -----------------------------
def detect_bees_patch(model: YOLOv2, image_path: str, config: Config,
                      patch_size: int = 416, stride: int = 416,
                      confidence_threshold: float = 0.3,
                      nms_threshold: float = 0.45) -> List[List[float]]:

    image = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_COLOR)
    if image is None:
        raise ValueError(f"이미지를 로드할 수 없습니다: {image_path}")
    orig_h, orig_w = image.shape[:2]

    device = torch.device(config.DEVICE if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model.eval()

    all_boxes = []

    for y0 in range(0, orig_h, stride):
        for x0 in range(0, orig_w, stride):
            y1 = min(y0 + patch_size, orig_h)
            x1 = min(x0 + patch_size, orig_w)
            patch = image[y0:y1, x0:x1].copy()
            ph, pw = patch.shape[:2]

            processed_patch, scale, (x_offset, y_offset) = preprocess_image_from_array(patch, config.INPUT_SIZE)
            input_tensor = torch.from_numpy(processed_patch).unsqueeze(0).permute(0,3,1,2).float().to(device)

            with torch.no_grad():
                predictions = model(input_tensor).squeeze(0).cpu().numpy()

            for gy in range(config.GRID_SIZE):
                for gx in range(config.GRID_SIZE):
                    for anchor_idx in range(config.NUM_ANCHORS):
                        pred = predictions[gy, gx, anchor_idx]
                        confidence = torch.sigmoid(torch.tensor(pred[4])).item()

                        if confidence > confidence_threshold:
                            cx = (pred[0] + gx) / config.GRID_SIZE
                            cy = (pred[1] + gy) / config.GRID_SIZE
                            w = pred[2]
                            h = pred[3]

                            class_probs = torch.sigmoid(torch.tensor(pred[5:])).numpy()
                            class_id = np.argmax(class_probs)
                            class_confidence = class_probs[class_id]
                            final_conf = confidence * class_confidence

                            if final_conf > confidence_threshold:
                                abs_cx = cx * config.INPUT_SIZE
                                abs_cy = cy * config.INPUT_SIZE
                                abs_w = w * config.INPUT_SIZE
                                abs_h = h * config.INPUT_SIZE

                                x1_box = int((abs_cx - abs_w/2 - x_offset)/scale + x0)
                                y1_box = int((abs_cy - abs_h/2 - y_offset)/scale + y0)
                                x2_box = int((abs_cx + abs_w/2 - x_offset)/scale + x0)
                                y2_box = int((abs_cy + abs_h/2 - y_offset)/scale + y0)

                                x1_box = max(0, x1_box)
                                y1_box = max(0, y1_box)
                                x2_box = min(orig_w, x2_box)
                                y2_box = min(orig_h, y2_box)

                                all_boxes.append([x1_box, y1_box, x2_box, y2_box, final_conf, class_id])

    if all_boxes:
        all_boxes = non_max_suppression(all_boxes, nms_threshold)

    return all_boxes

# -----------------------------
# 디렉토리 이미지 처리
# -----------------------------
def process_images(model: YOLOv2, config: Config, input_dir: str, output_dir: str,
                   confidence_threshold: float = 0.3, patch_size: int = 416, stride: int = 416):
    os.makedirs(output_dir, exist_ok=True)
    image_files = [os.path.join(input_dir, f) for f in os.listdir(input_dir)
                   if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp'))]

    print(f"총 {len(image_files)}장 처리 시작...")

    for i, image_path in enumerate(image_files):
        print(f"[{i+1}/{len(image_files)}] 처리 중: {os.path.basename(image_path)}")
        try:
            boxes = detect_bees_patch(model, image_path, config,
                                      confidence_threshold=confidence_threshold,
                                      patch_size=patch_size, stride=stride)

            original_image = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_COLOR)
            if original_image is None:
                print(f"  이미지를 불러올 수 없음: {image_path}")
                continue
            original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

            result_image = draw_bounding_boxes(original_image, boxes) if boxes else original_image
            output_path = os.path.join(output_dir, f"detected_{os.path.basename(image_path)}")
            save_detection_result(result_image, output_path)

            print(f"  감지된 벌 개수: {len(boxes)}")
        except Exception as e:
            print(f"  오류 발생: {e}")
            continue

    print(f"감지 완료! 결과는 {output_dir}에 저장되었습니다.")

In [None]:
# -----------------------------
# 메인 실행
# -----------------------------
def main(model_path: str = None,
         input_dir: str = '/content/data/images',
         output_dir: str = '/content/drive/MyDrive/yolo/output/results/test3',
         confidence: float = 0.3,
         patch_size: int = 416,
         stride: int = 416):
    config = Config()

    if model_path is None:
        model_path = os.path.join(config.CHECKPOINT_DIR, 'best_model.pth')

    if not os.path.exists(model_path):
        print(f"모델 파일 없음: {model_path}")
        return

    device = torch.device(config.DEVICE if torch.cuda.is_available() else "cpu")
    checkpoint = torch.load(model_path, map_location=device)

    model = YOLOv2(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS, config=config)
    model.load_state_dict(checkpoint['model_state_dict'])
    model = model.to(device)
    model.eval()

    print(f"모델 로드 완료: {model_path}")
    if 'epoch' in checkpoint and 'loss' in checkpoint:
        print(f"Epoch: {checkpoint['epoch']}, Loss: {checkpoint['loss']:.4f}")

    process_images(model, config, input_dir, output_dir,
                   confidence_threshold=confidence,
                   patch_size=patch_size, stride=stride)

# -----------------------------
# 실행
# -----------------------------
if __name__ == "__main__":
    main(input_dir='/content/data/images',
         output_dir='/content/drive/MyDrive/yolo/output/results/test3',
         confidence=0.01,
         patch_size=416,
         stride=416)