In [7]:
from ultralytics import YOLO
import torch.nn as nn
import torch
from PIL import Image
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader
import os
from tqdm import tqdm


# 사전학습된 YOLOv8 불러오기
model = YOLO('yolov8n.pt')  # n/s/m/l/x 중 선택 가능

# 입력 Conv 레이어 수정 (원래 Conv2d(in_channels=3))
first_layer = model.model.model[0]

# 기존 파라미터 가져오기
weight = first_layer.conv.weight  # [out_channels, in_channels=3, k, k]

# 4채널로 확장 (새 채널은 평균으로 초기화)
new_weight = torch.cat(
    [weight, weight[:, :1, :, :]], dim=1
) / 2  # 간단한 초기화 전략

# 새로운 Conv 정의
new_conv = nn.Conv2d(4, first_layer.conv.out_channels, kernel_size=first_layer.conv.kernel_size,
                     stride=first_layer.conv.stride, padding=first_layer.conv.padding, bias=first_layer.conv.bias is not None)
new_conv.weight = nn.Parameter(new_weight)
if first_layer.conv.bias is not None:
    new_conv.bias = first_layer.conv.bias

# 레이어 교체
first_layer.conv = new_conv

In [8]:
# 모든 레이어 freeze
for param in model.parameters():
    param.requires_grad = False

# 첫 번째 conv 레이어만 학습하도록 설정
for param in model.model.model[0].parameters():
    param.requires_grad = True


In [9]:
# 경로 지정
base_dir = r"C:\Users\KSH\Documents\Final_project\Custum_YOLOv8\ultralytics-main\datasets\thermal"
rgb_dir = os.path.join(base_dir, "images", "rgb")
thermal_dir = os.path.join(base_dir, "images", "thermal")
label_dir = os.path.join(base_dir, "labels")

# 전처리 정의
transform = T.Compose([
    T.Resize((640, 640)),
    T.ToTensor(),
])

# 4채널 이미지 생성 함수
def load_4ch_image(rgb_path, thermal_path):
    rgb = Image.open(rgb_path).convert('RGB')
    thermal = Image.open(thermal_path).convert('L')
    rgb_tensor = transform(rgb)
    thermal_tensor = transform(thermal)
    return torch.cat([rgb_tensor, thermal_tensor], dim=0)  # (4, H, W)

# YOLO 라벨 로딩 함수
def load_label_txt(label_path):
    with open(label_path, 'r') as f:
        line = f.readline().strip()
        if not line:
            return torch.zeros(5)  # 빈 라벨 처리 (예외 방지)
        return torch.tensor(list(map(float, line.split())), dtype=torch.float32)

# 커스텀 데이터셋
class CustomDataset(Dataset):
    def __init__(self, rgb_dir, thermal_dir, label_dir):
        self.rgb_dir = rgb_dir
        self.thermal_dir = thermal_dir
        self.label_dir = label_dir
        self.image_ids = [f[:-4] for f in os.listdir(rgb_dir) if f.endswith(".jpg")]

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        rgb_path = os.path.join(self.rgb_dir, f"{image_id}.jpg")
        thermal_path = os.path.join(self.thermal_dir, f"{image_id}.jpg")
        label_path = os.path.join(self.label_dir, f"{image_id}.txt")

        img = load_4ch_image(rgb_path, thermal_path)
        label = load_label_txt(label_path)
        return img, label

In [4]:
dataset = CustomDataset(rgb_dir, thermal_dir, label_dir)
loader = DataLoader(dataset, batch_size=8, shuffle=True)

# 확인
for imgs, labels in loader:
    print("이미지 shape:", imgs.shape)   # [B, 4, 640, 640]
    print("라벨 shape:", labels.shape)   # [B, 5]
    break


이미지 shape: torch.Size([8, 4, 640, 640])
라벨 shape: torch.Size([8, 5])


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.train()

pt_model = model.model

pt_model.to(device)
pt_model.train()

optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, pt_model.parameters()), lr=1e-4)

num_epochs = 10
for epoch in range(num_epochs):
    total_loss = 0
    pbar = tqdm(loader, desc=f"Epoch {epoch+1}/{num_epochs}")
    for imgs, targets in pbar:
        imgs = imgs.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        # PyTorch 모델 직접 호출
        preds = pt_model(imgs)

        # 커스텀 손실 계산 함수 필요 (아래는 예시)
        loss = compute_custom_yolo_loss(preds, targets)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        pbar.set_postfix({'loss': loss.item()})

    print(f"[Epoch {epoch+1}] Average Loss: {total_loss / len(loader):.4f}")

[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=coco.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train57, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True, pose=12.0, pretrained=True, profile=False, project=None, rect=False, resume=False, retina_masks=False, save