## 코드 실행 전 기존 json 파일이 포함된 labels 폴더 이름을 labels_json 으로 변경 후 코드실행

1.  requirment.txt 로드 

In [None]:
import os
import subprocess

requirements_file = 'requirements.txt'

result = subprocess.run(['pip', 'install', '-r', requirements_file], capture_output=True, text=True)

print(result.stdout)
if result.returncode != 0:
    print("에러 :")
    print(result.stderr)
    
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118



2. 라이브러리 호출 

In [None]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
import cv2
import json
import numpy as np
from tqdm import tqdm
from PIL import Image
import ultralytics
from ultralytics import YOLO
from albumentations import (Compose, RandomBrightnessContrast, Rotate, HorizontalFlip, VerticalFlip, Blur, RandomScale)
import matplotlib.pyplot as plt
from IPython.display import clear_output  
import time

clear_output()
ultralytics.checks()

print(f"CUDA is available: {torch.cuda.is_available()}")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


3. YOLO 형식의 어노테이션으로 변환


##기존 json 파일이 포함된 labels 폴더 이름을 labels_json 으로 변경 후 코드실행##

In [None]:
## 기존 json 파일이 포함된 labels 폴더 이름을 labels_json 으로 변경 후 코드실행

class_map = {
    "road": 0, "sidewalk": 1, "road roughness": 2, "road boundaries": 3, "crosswalks": 4, 
    "lane": 5, "road color guide": 6, "road marking": 7, "parking": 8, "traffic sign": 9, 
    "traffic light": 10, "pole/structural object": 11, "building": 12, "tunnel": 13, 
    "bridge": 14, "pedestrian": 15, "vehicle": 16, "bicycle": 17, "motorcycle": 18, 
    "personal mobility": 19, "dynamic": 20, "vegetation": 21, "sky": 22, "static": 23
}

def normalize_polygon(polygon, image_width, image_height):
    normalized_polygon = []
    for point in polygon:
        normalized_x = point[0] / image_width
        normalized_y = point[1] / image_height
        normalized_polygon.extend([normalized_x, normalized_y])
    return normalized_polygon

def convert_annotation(json_file, output_file, image_size, image_path):
    if not os.path.exists(image_path):
        print(f"Image file not found: {image_path}. Skipping...")
        return
    
    with open(json_file, 'r') as f:
        data = json.load(f)
    annotations = data.get('Annotation', [])
    
    img = cv2.imread(image_path)
    img_height, img_width = img.shape[:2]

    for anno in annotations:
        if 'data' in anno and len(anno['data']) > 0:
            data = anno['data'][0]
            if len(data) % 2 != 0:
                continue
            class_id = class_map.get(anno['class_name'], -1)
            if class_id == -1:
                continue
            
            polygon = list(zip(data[0::2], data[1::2]))
            normalized_polygon = normalize_polygon(polygon, img_width, img_height)
            
            x_coords = data[0::2]
            y_coords = data[1::2]
            x_min, x_max = min(x_coords), max(x_coords)
            y_min, y_max = min(y_coords), max(y_coords)
            
            if x_max <= x_min or y_max <= y_min:
                print(f"Skipping {json_file}: x_min={x_min}, x_max={x_max}, y_min={y_min}, y_max={y_max}")
                continue
            
            coordinates_str = " ".join([f"{coord:.6f}" for coord in normalized_polygon])
            with open(output_file, 'a') as f_out:
                f_out.write(f"{class_id} {coordinates_str}\n")


def process_directory(json_dir, output_dir, image_dir, image_size=(1920, 1200)):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for json_file in os.listdir(json_dir):
        if json_file.endswith('.json'):
            base_name = os.path.splitext(json_file)[0]
            output_file = os.path.join(output_dir, f"{base_name}.txt")
            image_path = os.path.join(image_dir, f"{base_name}.jpg")
            convert_annotation(os.path.join(json_dir, json_file), output_file, image_size, image_path)

image_size = (1920, 1200)

## 기존 json 파일이 포함된 labels 폴더 이름을 labels_json 으로 변경 후 코드실행
val_json_dir = 'data/validation/labels_json'
val_image_dir = 'data/validation/images'
val_output_dir = 'data/validation/labels'
process_directory(val_json_dir, val_output_dir, val_image_dir, image_size=image_size)

train_json_dir = 'data/training/labels_json'
train_image_dir = 'data/training/images'
train_output_dir = 'data/training/labels'
process_directory(train_json_dir, train_output_dir, train_image_dir, image_size=image_size)


4. test mask 파일 생성

##기존 json 파일이 포함된 labels 폴더 이름을 labels_json 으로 변경 후 코드실행##

In [9]:
def create_mask(image_size, annotations, class_labels):
    width, height = image_size
    mask = np.zeros((height, width), dtype=np.uint8)

    for annotation in annotations:
        class_name = annotation['class_name']
        polygon = annotation['data'][0]  
        
        if class_name not in class_labels:
            continue  

        class_id = class_labels[class_name]
        
        polygon = np.array(polygon).reshape((-1, 2))
        
        cv2.fillPoly(mask, [polygon.astype(np.int32)], class_id)

    return Image.fromarray(mask)

def process_annotations(image_dir, label_dir, mask_dir):
    if not os.path.exists(mask_dir):
        os.makedirs(mask_dir)
    
    for label_file in os.listdir(label_dir):
        if label_file.endswith('.json'):
            with open(os.path.join(label_dir, label_file)) as f:
                data = json.load(f)
            
            image_name = data["image_name"]
            image_size = tuple(data["image_size"])
            annotations = data["Annotation"]
            
            mask = create_mask(image_size, annotations, class_map)
            mask.save(os.path.join(mask_dir, image_name.replace('.jpg', '_mask.png')))

image_dir = 'data/test/images'
label_dir = 'data/test/labels_json'
mask_dir = 'data/test/masks'

process_annotations(image_dir, label_dir, mask_dir)


5. 하이퍼파라미터 설정 및 모델 학습

##새로 학습을 진행하면 runs\segment\train2\weights\best.pt 로 경로 변경해야함

In [8]:
hyperparameters = {
    'lr0': 0.01,
    'lrf': 0.01,
    'momentum': 0.937,
    'weight_decay': 0.0005,
    'warmup_epochs': 3.0,
    'warmup_momentum': 0.8,
    'warmup_bias_lr': 0.1,
    'box': 0.05,
    'cls': 0.5,
    'dfl': 1.5,
    'pose': 12.0,
    'kobj': 1.0,
    'label_smoothing': 0.0,
    'nbs': 64,
    'hsv_h': 0.015,
    'hsv_s': 0.7,
    'hsv_v': 0.4,
    'degrees': 0.0,
    'translate': 0.1,
    'scale': 0.5,
    'shear': 0.0,
    'perspective': 0.0,
    'flipud': 0.0,
    'fliplr': 0.5,
    'bgr': 0.0,
    'mosaic': 1.0,
    'mixup': 0.0,
    'copy_paste': 0.0,
    'auto_augment': 'randaugment',
    'erasing': 0.4,
    'crop_fraction': 1.0
}

yaml_path = 'data.yaml'
model = YOLO('yolov8l-seg.pt')
model.train(data=yaml_path, epochs=50, batch=8, imgsz=640, device=0, augment=True, optimizer='SGD', **hyperparameters)

6. 예측된 마스크 파일 생성

In [None]:
# 학습된 모델로 예측된 마스크 파일 생성
class_map = {
    0: 0,  # road
    1: 1,  # sidewalk
    2: 2,  # road roughness
    3: 3,  # road boundaries
    4: 4,  # crosswalks
    5: 5,  # lane
    6: 6,  # road color guide
    7: 7,  # road marking
    8: 8,  # parking
    9: 9,  # traffic sign
    10: 10,  # traffic light
    11: 11,  # pole/structural object
    12: 12,  # building
    13: 13,  # tunnel
    14: 14,  # bridge
    15: 15,  # pedestrian
    16: 16,  # vehicle
    17: 17,  # bicycle
    18: 18,  # motorcycle
    19: 19,  # personal mobility
    20: 20,  # dynamic
    21: 21,  # vegetation
    22: 22,  # sky
    23: 23   # static
}

#현재 만들어진 모델 경로
model = YOLO(r"runs/segment/train/weights/best.pt")

output_dir = r"predicted_masks"
os.makedirs(output_dir, exist_ok=True) 

image_dir = r"data/test/images"
image_paths = [os.path.join(image_dir, img) for img in os.listdir(image_dir) if img.endswith('.jpg')]

for img_path in image_paths:
    img = cv2.imread(img_path)
    height, width = img.shape[:2]
    results = model.predict(img, save=False)

   
    combined_mask = np.zeros((height, width), dtype=np.uint8)

    if results and results[0].masks:
        for i, mask in enumerate(results[0].masks.data):
            mask_np = mask.cpu().numpy().astype(np.uint8)

            resized_mask = cv2.resize(mask_np, (width, height), interpolation=cv2.INTER_NEAREST)

            class_id = int(results[0].boxes.cls[i].item())  
            shade_value = class_map[class_id]  

            combined_mask[resized_mask > 0.5] = shade_value

        unique_values = np.unique(combined_mask)
        print(f"Unique values in combined mask: {unique_values}")

        mask_name = os.path.splitext(os.path.basename(img_path))[0] + '_combined_mask.png'
        mask_path = os.path.join(output_dir, mask_name)
        cv2.imwrite(mask_path, combined_mask) 
        print(f"Saved combined mask: {mask_path}")
    else:
        print(f"No masks found for image: {img_path}")


7. mIoU 계산

In [None]:
#로그가 너무 길다면 mIoU 최종 print 함수 제외하고 삭제 가능

def calculate_iou(pred_mask, true_mask):
    intersection = np.logical_and(pred_mask, true_mask)
    union = np.logical_or(pred_mask, true_mask)

    print(f"Intersection sum: {np.sum(intersection)}")
    print(f"Union sum: {np.sum(union)}")

    if np.sum(union) == 0:
        return 1.0 
    iou = np.sum(intersection) / np.sum(union)
    return iou

def load_mask(mask_path):
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    return mask  

def visualize_masks(pred_mask, true_mask):
    plt.subplot(1, 2, 1)
    plt.imshow(pred_mask, cmap='gray')
    plt.title('Predicted Mask')
    
    plt.subplot(1, 2, 2)
    plt.imshow(true_mask, cmap='gray')
    plt.title('True Mask')
    
    plt.show()

def calculate_miou(pred_mask_dir, true_mask_dir, mask_size=(1920, 1200)):
    pred_mask_paths = [os.path.join(pred_mask_dir, f) for f in os.listdir(pred_mask_dir) if f.endswith('.png')]
    true_mask_paths = [os.path.join(true_mask_dir, f) for f in os.listdir(true_mask_dir) if f.endswith('.png')]

    ious = []
    for pred_mask_path, true_mask_path in tqdm(zip(pred_mask_paths, true_mask_paths), total=len(pred_mask_paths), desc="Calculating mIoU"):
        pred_mask = load_mask(pred_mask_path)
        true_mask = load_mask(true_mask_path)

        if pred_mask.shape != true_mask.shape:
            pred_mask = cv2.resize(pred_mask, mask_size, interpolation=cv2.INTER_NEAREST)
            true_mask = cv2.resize(true_mask, mask_size, interpolation=cv2.INTER_NEAREST)

        print(f"Comparing: {pred_mask_path} with {true_mask_path}")
        print(f"Pred mask shape: {pred_mask.shape}, True mask shape: {true_mask.shape}")
        print(f"Pred mask unique values: {np.unique(pred_mask)}")
        print(f"True mask unique values: {np.unique(true_mask)}")

        visualize_masks(pred_mask, true_mask)

        pred_mask_binary = pred_mask > 0
        true_mask_binary = true_mask > 0

        print(f"Binary pred mask unique values: {np.unique(pred_mask_binary)}")
        print(f"Binary true mask unique values: {np.unique(true_mask_binary)}")

        iou = calculate_iou(pred_mask_binary, true_mask_binary)
        print(f"Calculated IoU: {iou}")  
        ious.append(iou)

    miou = np.mean(ious)
    return miou

pred_mask_dir = r"predicted_masks"
true_mask_dir = r"data/test/masks"

miou = calculate_miou(pred_mask_dir, true_mask_dir)
print(f"mIoU: {miou:.4f}")


8. 성능 지표 출력

In [None]:
#현재 만들어진 모델 경로
model = YOLO('runs/segment/train/weights/best.pt')

val_data = 'data.yaml'  

results = model.val(data=val_data, imgsz=640, device=0)

print(f"Box mAP@0.5: {results.box.map50:.4f}")         
print(f"Box mAP@0.5:0.95: {results.box.map:.4f}")       
print(f"Box Precision: {results.box.p.mean():.4f}")     
print(f"Box Recall: {results.box.r.mean():.4f}")         

print(f"Segmentation mAP@0.5: {results.seg.map50:.4f}") 
print(f"Segmentation mAP@0.5:0.95: {results.seg.map:.4f}")  
print(f"Segmentation Precision: {results.seg.p.mean():.4f}") 
print(f"Segmentation Recall: {results.seg.r.mean():.4f}")    


9. 세그멘테이션 결과 시각화

In [None]:
#현재 만들어진 모델 경로
model_path = r"runs\segment\train\weights\best.pt"
model = YOLO(model_path)

test_image_path = r"data\test\images\N_SFL_230703_015_FC_246.jpg"

image = cv2.imread(test_image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
height, width, _ = image.shape

class_colors = {
    0: [255, 0, 0],   # road
    1: [0, 255, 0],   # sidewalk
    2: [0, 0, 255],   # road roughness
    3: [255, 255, 0], # road boundaries
    4: [255, 0, 255], # crosswalks
    5: [0, 255, 255], # lane
    6: [128, 0, 0],   # road color guide
    7: [128, 128, 0], # road marking
    8: [0, 128, 0],   # parking
    9: [128, 0, 128], # traffic sign
    10: [0, 128, 128],# traffic light
    11: [0, 0, 128],  # pole/structural object
    12: [128, 128, 128], # building
    13: [64, 0, 0],   # tunnel
    14: [64, 64, 0],  # bridge
    15: [0, 64, 0],   # pedestrian
    16: [64, 0, 64],  # vehicle
    17: [0, 64, 64],  # bicycle
    18: [192, 0, 0],  # motorcycle
    19: [192, 192, 0],# personal mobility
    20: [0, 192, 0],  # dynamic
    21: [192, 0, 192],# vegetation
    22: [0, 192, 192],# sky
    23: [0, 0, 192]   # static
}

results = model.predict(source=image_rgb)

masks = results[0].masks.data
boxes = results[0].boxes.data
classes = results[0].boxes.cls

overlay_image = image_rgb.copy()
for i in range(len(masks)):
    mask = masks[i].cpu().numpy()
    class_id = int(classes[i])
    color = class_colors[class_id]

    resized_mask = cv2.resize(mask, (width, height), interpolation=cv2.INTER_NEAREST)
    
    overlay_image[resized_mask > 0.5] = color

output_dir = r"visual"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

output_image_path = os.path.join(output_dir, "output_overlay.png")
plt.figure(figsize=(10, 10))
plt.imshow(overlay_image)
plt.axis('off')
plt.savefig(output_image_path, bbox_inches='tight', pad_inches=0)
plt.show()


10. 평균 추론 시간 계산

In [None]:

model_path = r"runs\segment\train\weights\best.pt"
test_images_folder = r"data\test\images"

model = YOLO(model_path)

image_files = [f for f in os.listdir(test_images_folder) if f.endswith(('.jpg'))]

times = []

for image_file in tqdm(image_files, desc="Processing images", disable=True):
    image_path = os.path.join(test_images_folder, image_file)
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    height, width, _ = image.shape
    
    start_time = time.time()
    results = model.predict(source=image_rgb, verbose=False)  
    end_time = time.time()
    
    times.append(end_time - start_time)

average_time = np.mean(times)
print(f"Average inference time over {len(image_files)} images: {average_time:.4f} seconds")
