In [1]:
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
from torchvision.models.segmentation import deeplabv3_resnet50
from sklearn.model_selection import train_test_split
import segmentation_models_pytorch as smp

# YOLO 데이터셋 경로
yolo_data_dir = './data/YOLO/images'

# U-Net 모델 정의
class UNetModel(nn.Module):
    def __init__(self, num_classes):
        super(UNetModel, self).__init__()
        self.model = smp.Unet(
            encoder_name="resnet34",
            encoder_weights="imagenet",
            in_channels=3,
            classes=num_classes,
        )

    def forward(self, x):
        return self.model(x)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# YOLO 데이터 로드 및 학습/검증 분할
def load_yolo_data(data_dir):
    images = []
    labels = []
    for filename in os.listdir(data_dir):
        if filename.endswith('.jpg'):
            img = cv2.imread(os.path.join(data_dir, filename))
            images.append(img)
            
            # 추후에 YOLO 객체 탐지 결과에서 라벨 정보 추출
            label = [0] * 80 # COCO 데이터셋의 클래스 개수
            labels.append(label)
    
    x_train, x_val, y_train, y_val = train_test_split(np.array(images), np.array(labels), test_size=0.2, random_state=42)
    return x_train, x_val, y_train, y_val

# YOLO 데이터 로드
x_train, x_val, y_train, y_val = load_yolo_data(yolo_data_dir)

In [3]:
# 성능 지표 계산 함수
def calculate_iou(outputs, labels):
    # 출력 채널 수와 라벨 채널 수가 일치하도록 변환
    outputs = torch.nn.functional.softmax(outputs, dim=1)
    labels = torch.from_numpy(labels).long()
    
    # IoU 계산
    intersection = (outputs * labels).sum()
    union = (outputs + labels).sum() - intersection
    iou = intersection / union
    return iou.item()

In [4]:
def calculate_fwiou(outputs, labels):
    # 출력 채널 수와 라벨 채널 수가 일치하도록 변환
    outputs = torch.nn.functional.softmax(outputs, dim=1)
    labels = torch.from_numpy(labels).long()
    
    # fwIoU 계산
    intersection = (outputs * labels).sum(axis=(1, 2))
    union = (outputs + labels).sum(axis=(1, 2)) - intersection
    class_iou = intersection / union
    class_weights = labels.sum(axis=(1, 2)) / labels.sum()
    fwiou = (class_iou * class_weights).sum()
    return fwiou.item()

In [5]:
def calculate_f1(outputs, labels):
    # 출력 채널 수와 라벨 채널 수가 일치하도록 변환
    outputs = torch.nn.functional.softmax(outputs, dim=1)
    labels = torch.from_numpy(labels).long()
    
    # F1 스코어 계산
    pred = outputs.argmax(dim=1)
    true_positive = ((pred == labels) & (labels == 1)).sum().item()
    false_positive = ((pred == 1) & (labels == 0)).sum().item()
    false_negative = ((pred == 0) & (labels == 1)).sum().item()
    precision = true_positive / (true_positive + false_positive + 1e-9)
    recall = true_positive / (true_positive + false_negative + 1e-9)
    f1 = 2 * (precision * recall) / (precision + recall + 1e-9)
    return f1

In [6]:
# U-Net 모델 학습 및 검증
model = UNetModel(num_classes=30)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
num_epochs = 30
best_f1_score = 0

for epoch in range(num_epochs):
    # 학습 모드
    model.train()
    train_loss = 0
    train_iou = 0
    train_fwiou = 0
    train_f1 = 0
    for i in range(len(x_train)):
        # 이미지와 레이블을 U-Net 모델에 입력
        outputs = model(torch.from_numpy(x_train[i]).unsqueeze(0).permute(0, 3, 1, 2).float())
        loss = criterion(outputs, torch.from_numpy(y_train[i]).unsqueeze(0).long())
        
        # 학습 진행
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        
        # 학습 데이터 성능 지표 계산
        train_iou += calculate_iou(outputs, y_train[i])
        train_fwiou += calculate_fwiou(outputs, y_train[i])
        train_f1 += calculate_f1(outputs, y_train[i])
    
    train_loss /= len(x_train)
    train_iou /= len(x_train)
    train_fwiou /= len(x_train)
    train_f1 /= len(x_train)
    
    # 검증 모드
    model.eval()
    val_loss = 0
    val_iou = 0
    val_fwiou = 0
    val_f1 = 0
    with torch.no_grad():
        for i in range(len(x_val)):
            # 이미지와 레이블을 U-Net 모델에 입력
            outputs = model(torch.from_numpy(x_val[i]).unsqueeze(0).permute(0, 3, 1, 2).float())
            loss = criterion(outputs, torch.from_numpy(y_val[i]).unsqueeze(0).long())
            
            val_loss += loss.item()
            
            # 검증 데이터 성능 지표 계산
            val_iou += calculate_iou(outputs, y_val[i])
            val_fwiou += calculate_fwiou(outputs, y_val[i])
            val_f1 += calculate_f1(outputs, y_val[i])
    
    val_loss /= len(x_val)
    val_iou /= len(x_val)
    val_fwiou /= len(x_val)
    val_f1 /= len(x_val)
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train IoU: {train_iou:.4f}, Train fwIoU: {train_fwiou:.4f}, Train F1: {train_f1:.4f}, Val Loss: {val_loss:.4f}, Val IoU: {val_iou:.4f}, Val fwIoU: {val_fwiou:.4f}, Val F1: {val_f1:.4f}')
    
    # 가장 높은 F1 스코어를 가진 모델 저장
    if val_f1 > best_f1_score:
        best_f1_score = val_f1
        torch.save(model.state_dict(), './best_model.pth')

RuntimeError: only batches of spatial targets supported (3D tensors) but got targets of dimension: 2