In [3]:
import os
import json
import logging
from pathlib import Path
import cv2
import numpy as np
from tqdm.auto import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

# ─── 설정 ────────────────────────────────────────────────────
image_root = Path(r"C:\Users\hyunj\Downloads\deep_test\gray")
image_subfolders = ["C_Frontback_D02", "C_Frontback_G01"]

ann_root = Path(r"C:\Users\hyunj\Downloads\deep_test\annotation")

output_root = Path(r"C:\Users\hyunj\Downloads\deep_test\mask")
output_root.mkdir(parents=True, exist_ok=True)

# 로그 파일 설정
log_file = output_root / "mask_generation.log"
logging.basicConfig(
    filename=log_file,
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s"
)

# 모폴로지 커널 (크랙 평균 폭에 맞춰 조정 가능)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))

# ─── 마스크 생성 함수 ─────────────────────────────────────────
def make_mask(pair):
    img_path, ann_path = pair
    try:
        # 1) 이미지 로드
        img = cv2.imread(str(img_path))
        if img is None:
            raise ValueError("이미지 로드 실패")
        h, w = img.shape[:2]

        # 2) 어노테이션 파싱
        with open(ann_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        # 3) 빈 마스크 초기화
        mask = np.zeros((h, w), dtype=np.uint8)

        # 4) 중심선 그리기 (thickness=10)
        for obj in data.get("annotations", []):
            for poly in obj.get("polyline", []):
                pts = np.array(poly, dtype=np.float32).reshape(-1, 2)
                pts = np.round(pts).astype(np.int32)
                cv2.polylines(mask, [pts], isClosed=False, color=255, thickness=10)

        # 5) 모폴로지 팽창으로 폭 확대
        mask = cv2.dilate(mask, kernel, iterations=1)

        # 6) 마스크 저장 (원본 하위 폴더 구조 유지)
        out_dir = output_root / img_path.parent.name
        out_dir.mkdir(parents=True, exist_ok=True)
        out_path = out_dir / f"{img_path.stem}_mask.png"
        cv2.imwrite(str(out_path), mask)

        return True, str(img_path)
    except Exception as e:
        logging.error(f"{img_path} 실패: {e}")
        return False, str(img_path)

# ─── 입력 쌍 수집 ────────────────────────────────────────────
pairs = []
for sub in image_subfolders:
    img_dir = image_root / sub
    ann_dir = ann_root / sub
    for img_path in img_dir.glob("*.webp"):
        ann_path = ann_dir / f"{img_path.stem}_PLINE.json"
        if ann_path.exists():
            pairs.append((img_path, ann_path))
        else:
            logging.warning(f"어노테이션 없음: {img_path}")

# ─── 멀티스레드 병렬 처리 ────────────────────────────────────
workers = min(len(pairs), os.cpu_count() or 1)
success_count = 0

with ThreadPoolExecutor(max_workers=workers) as executor:
    futures = [executor.submit(make_mask, p) for p in pairs]
    for future in tqdm(as_completed(futures),
                       total=len(futures),
                       desc="Generating Masks"):
        ok, _ = future.result()
        if ok:
            success_count += 1

print(f"완료: {success_count}/{len(pairs)} masks 생성")
print(f"로그 파일: {log_file}")


Generating Masks:   0%|          | 0/17851 [00:00<?, ?it/s]

완료: 17851/17851 masks 생성
로그 파일: C:\Users\hyunj\Downloads\deep_test\mask\mask_generation.log


In [5]:
import cv2
import os
import numpy as np

# ─── 설정 ────────────────────────────────────────────────────
mask_root = r"C:\Users\hyunj\Downloads\deep_test\mask"
image_subfolders = ["C_Frontback_D02", "C_Frontback_G01"]

output_txt_root = r"C:\Users\hyunj\Downloads\deep_test\txt"
os.makedirs(output_txt_root, exist_ok=True)

# ─── 모든 서브폴더 순회 ─────────────────────────────────────────
for sub in image_subfolders:
    mask_folder = os.path.join(mask_root, sub)
    output_txt_folder = os.path.join(output_txt_root, sub)
    os.makedirs(output_txt_folder, exist_ok=True)

    if not os.path.isdir(mask_folder):
        print(f"경고: 해당 폴더가 없습니다: {mask_folder}")
        continue

    for filename in os.listdir(mask_folder):
        if not filename.lower().endswith(".png"):
            continue

        mask_path = os.path.join(mask_folder, filename)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        if mask is None:
            print(f"이미지 로드 실패: {mask_path}")
            continue

        height, width = mask.shape[:2]

        # 윤곽선 찾기 (CHAIN_APPROX_NONE으로 더 세밀하게)
        contours, _ = cv2.findContours(
            mask,
            cv2.RETR_EXTERNAL,
            cv2.CHAIN_APPROX_NONE
        )

        txt_lines = []
        for contour in contours:
            if len(contour) < 3:
                continue  # 폴리곤은 최소 3점 필요

            # 정규화된 좌표 생성
            coords = []
            for pt in contour:
                x, y = pt[0]
                coords.append(f"{x/width:.6f} {y/height:.6f}")

            # YOLO Seg 파일 포맷
            class_id = 0
            txt_lines.append(f"{class_id} " + " ".join(coords))

        # TXT 저장
        txt_filename = filename.rsplit(".", 1)[0] + ".txt"
        txt_path = os.path.join(output_txt_folder, txt_filename)
        with open(txt_path, "w") as f:
            f.write("\n".join(txt_lines))

print("YOLOv8 Segmentation용 .txt 파일 생성 완료!")


YOLOv8 Segmentation용 .txt 파일 생성 완료!


In [9]:
import cv2
import os
import numpy as np

BASE = Path(r"C:\Users\hyunj\Downloads\deep_test")
# ─── 설정 ────────────────────────────────────────────────────
mask_root = r"C:\Users\hyunj\Downloads\deep_test\gray"
image_subfolders = ["C_Frontback_D02", "C_Frontback_G01"]

output_txt_root = r"C:\Users\hyunj\Downloads\deep_test\txt_approx"
os.makedirs(output_txt_root, exist_ok=True)

mlruns_dir = BASE / "mlruns"
mlruns_dir.mkdir(exist_ok=True)
os.environ["MLFLOW_TRACKING_URI"] = f"file:///{mlruns_dir.as_posix()}"


# ─── 모든 서브폴더 순회 ─────────────────────────────────────────
for sub in image_subfolders:
    mask_folder = os.path.join(mask_root, sub)
    output_txt_folder = os.path.join(output_txt_root, sub)
    os.makedirs(output_txt_folder, exist_ok=True)

    if not os.path.isdir(mask_folder):
        print(f"경고: 해당 폴더가 없습니다: {mask_folder}")
        continue

    for filename in os.listdir(mask_folder):
        if not filename.lower().endswith(".png"):
            continue

        mask_path = os.path.join(mask_folder, filename)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        if mask is None:
            print(f"이미지 로드 실패: {mask_path}")
            continue

        height, width = mask.shape[:2]

        # 윤곽선 찾기 (모든 점을 추출)
        contours, _ = cv2.findContours(
            mask,
            cv2.RETR_EXTERNAL,
            cv2.CHAIN_APPROX_NONE
        )

        txt_lines = []
        for contour in contours:
            if len(contour) < 3:
                continue  # 폴리곤은 최소 3점 필요

            # approxPolyDP로 단순화
            epsilon = 0.005 * cv2.arcLength(contour, True)  # 이 값을 조절하며 점 개수 조절 가능
            simplified = cv2.approxPolyDP(contour, epsilon, True)
            if len(simplified) < 3:
                continue

            # 정규화된 좌표 생성
            coords = []
            for pt in simplified:
                x, y = pt[0]
                coords.append(f"{x/width:.6f} {y/height:.6f}")

            # YOLO Seg 포맷: class_id + 좌표들
            class_id = 0
            txt_lines.append(f"{class_id} " + " ".join(coords))

        # TXT 저장
        txt_filename = os.path.splitext(filename)[0] + ".txt"
        txt_path = os.path.join(output_txt_folder, txt_filename)
        with open(txt_path, "w") as f:
            f.write("\n".join(txt_lines))

print("YOLOv8 Segmentation용 approxPolyDP 기반 .txt 생성 완료!")


YOLOv8 Segmentation용 approxPolyDP 기반 .txt 생성 완료!


In [13]:
import os
import shutil
from pathlib import Path
from sklearn.model_selection import train_test_split
from ultralytics import YOLO
import yaml

# ─── 1) 경로 설정 ──────────────────────────────────────────────
BASE         = Path(r"C:\Users\hyunj\Downloads\deep_test")
images_root  = BASE / "gray"    # 원본 이미지(.webp 혹은 .png)
labels_root  = BASE / "txt"     # YOLO Seg .txt (서브폴더별로 _mask.txt 로 저장됨)
subfolders   = ["C_Frontback_D02", "C_Frontback_G01"]

mlruns_dir = BASE / "mlruns"
mlruns_dir.mkdir(exist_ok=True)
os.environ["MLFLOW_TRACKING_URI"] = f"file:///{mlruns_dir.as_posix()}"

# ─── 2) 분할된 데이터 저장 구조 ─────────────────────────────────
out_base     = BASE / "dataset"
train_images = out_base / "images" / "train"
val_images   = out_base / "images" / "val"
train_labels = out_base / "labels" / "train"
val_labels   = out_base / "labels" / "val"
for d in (train_images, val_images, train_labels, val_labels):
    d.mkdir(parents=True, exist_ok=True)

# ─── 3) train/val 분할 & 복사 ──────────────────────────────────
for sub in subfolders:
    img_dir = images_root / sub
    lbl_dir = labels_root / sub

    if not img_dir.exists():
        print(f"[경고] 이미지 폴더가 없습니다: {img_dir}")
        continue
    if not lbl_dir.exists():
        print(f"[경고] 라벨 폴더가 없습니다: {lbl_dir}")
        continue

    # 이미지 목록(.webp, .png)
    imgs = list(img_dir.glob("*.webp")) + list(img_dir.glob("*.png"))
    train_imgs, val_imgs = train_test_split(imgs, test_size=0.2, random_state=42)

    for split, img_list in [("train", train_imgs), ("val", val_imgs)]:
        for img_path in img_list:
            # 1) 이미지 복사
            dst_img_dir = (train_images if split=="train" else val_images) / sub
            dst_img_dir.mkdir(parents=True, exist_ok=True)
            shutil.copy(img_path, dst_img_dir / img_path.name)

            # 2) 라벨(.txt) 복사
            #   (1) img_path.stem + ".txt"
            #   (2) img_path.stem + "_mask.txt"  두 가지 경우 모두 확인
            candidates = [
                lbl_dir / f"{img_path.stem}.txt",
                lbl_dir / f"{img_path.stem}_mask.txt"
            ]
            found = False
            for lbl_path in candidates:
                if lbl_path.exists():
                    dst_lbl_dir = (train_labels if split=="train" else val_labels) / sub
                    dst_lbl_dir.mkdir(parents=True, exist_ok=True)
                    shutil.copy(lbl_path, dst_lbl_dir / lbl_path.name)
                    found = True
                    break
            if not found:
                print(f"[경고] 라벨 파일을 찾을 수 없음: {candidates}")

print("✔️ 데이터 분할 및 복사 완료")

# ─── 4) data.yaml 생성 ─────────────────────────────────────────
data_yaml = {
    "train": str(train_images.parent),  # dataset/images
    "val":   str(val_images.parent),
    "nc": 1,
    "names": ["crack"]
}
with open(BASE / "data.yaml", "w") as f:
    yaml.dump(data_yaml, f, sort_keys=False)
print("✔️ data.yaml 작성 완료")

# ─── 5) YOLOv8 세그멘테이션 학습 ───────────────────────────────
model = YOLO("yolov8n-seg.pt")
model.train(
    data=str(BASE / "data.yaml"),
    epochs=50,
    imgsz=640,
    batch=8,
    name="crack_segmentation",
    val=False,            # ← 검증 생략
    log_mlflow=False      # (이전 MLflow 에러 방지를 위해 여전히 꺼둘 수 있습니다)
)


✔️ 데이터 분할 및 복사 완료
✔️ data.yaml 작성 완료
New https://pypi.org/project/ultralytics/8.3.145 available  Update with 'pip install -U ultralytics'


SyntaxError: '[31m[1mlog_mlflow[0m' is not a valid YOLO argument. 

    Arguments received: ['yolo', '--f=c:\\Users\\hyunj\\AppData\\Roaming\\jupyter\\runtime\\kernel-v3978bf20d1db1cc37962144e1869a925fc8e57e51.json']. Ultralytics 'yolo' commands use the following syntax:

        yolo TASK MODE ARGS

        Where   TASK (optional) is one of frozenset({'obb', 'classify', 'pose', 'detect', 'segment'})
                MODE (required) is one of frozenset({'val', 'train', 'benchmark', 'track', 'predict', 'export'})
                ARGS (optional) are any number of custom 'arg=value' pairs like 'imgsz=320' that override defaults.
                    See all ARGS at https://docs.ultralytics.com/usage/cfg or with 'yolo cfg'

    1. Train a detection model for 10 epochs with an initial learning_rate of 0.01
        yolo train data=coco8.yaml model=yolo11n.pt epochs=10 lr0=0.01

    2. Predict a YouTube video using a pretrained segmentation model at image size 320:
        yolo predict model=yolo11n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320

    3. Val a pretrained detection model at batch-size 1 and image size 640:
        yolo val model=yolo11n.pt data=coco8.yaml batch=1 imgsz=640

    4. Export a YOLO11n classification model to ONNX format at image size 224 by 128 (no TASK required)
        yolo export model=yolo11n-cls.pt format=onnx imgsz=224,128

    5. Ultralytics solutions usage
        yolo solutions count or in ['crop', 'blur', 'workout', 'heatmap', 'isegment', 'visioneye', 'speed', 'queue', 'analytics', 'inference', 'trackzone'] source="path/to/video.mp4"

    6. Run special commands:
        yolo help
        yolo checks
        yolo version
        yolo settings
        yolo copy-cfg
        yolo cfg
        yolo solutions help

    Docs: https://docs.ultralytics.com
    Solutions: https://docs.ultralytics.com/solutions/
    Community: https://community.ultralytics.com
    GitHub: https://github.com/ultralytics/ultralytics
     (<string>)

In [4]:
import os
from ultralytics import YOLO

# 0) MLflow Tracking URI를 올바른 파일 URI로 설정
#    - "file:///" + 드라이브+경로
mlruns = r"C:\Users\hyunj\Downloads\deep_test\mlruns"
os.makedirs(mlruns, exist_ok=True)
os.environ["MLFLOW_TRACKING_URI"] = f"file:///{mlruns.replace(os.sep, '/')}"

# 1) 모델 로드
model = YOLO("yolov8n-seg.pt")

# 2) 학습 실행
model.train(
    data="C:/Users/hyunj/Downloads/deep_test/data.yaml",
    epochs=50,
    imgsz=640,
    batch=8,
    name="crack_segmentation",
    val=True
)

print("학습이 완료되었습니다.")


New https://pypi.org/project/ultralytics/8.3.145 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.137  Python-3.9.21 torch-2.7.0+cu128 CPU (11th Gen Intel Core(TM) i7-11700 2.50GHz)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=C:/Users/hyunj/Downloads/deep_test/data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n-seg.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=crac

[34m[1mtrain: [0mScanning C:\Users\hyunj\Downloads\deep_test\dataset\labels\train\C_Frontback_D02.cache... 0 images, 14265 backgrounds, 15 corrupt: 100%|██████████| 14280/14280 [00:00<?, ?it/s]

[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_0898_20201231_111555_E_CH0_Seoul_Sun_Frontback_Day_88937.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_2899_20201109_093348_N_CH2_Seoul_Sun_Frontback_Day_04037.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_4471_20201230_122847_E_CH0_Seoul_Sun_Frontback_Day_83952.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_5502_20201224_104609_N_CH1_Seoul_Sun_Frontback_Sunrise_40987.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_6015_20201109


[34m[1mval: [0mScanning C:\Users\hyunj\Downloads\deep_test\dataset\labels\val\C_Frontback_D02.cache... 0 images, 3567 backgrounds, 4 corrupt: 100%|██████████| 3571/3571 [00:00<?, ?it/s]

[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01\V2F_HY_3674_20201230_150636_N_CH0_Seoul_Sun_Frontback_Day_74627.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01\V2F_HY_8030_20201231_111555_E_CH0_Seoul_Sun_Frontback_Day_86195.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01\V3F_HY_1181_20160212_023749_E_CH1_Seoul_Sun_Frontback_Sunset_86619.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01\V3F_HY_1347_20160212_022303_N_CH0_Seoul_Sun_Frontback_Day_76884.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
Plotting labels to e:\runs\segment\crack_segmentation7\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'l


2025/05/26 18:19:50 INFO mlflow.tracking.fluent: Autologging successfully enabled for pyspark.


[34m[1mMLflow: [0mlogging run_id(6ba2cb696d5c4b209b958933177f5143) to file:///C:/Users/hyunj/Downloads/deep_test/mlruns
[34m[1mMLflow: [0mdisable with 'yolo settings mlflow=False'
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1me:\runs\segment\crack_segmentation7[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       1/50         0G          0          0      4.426          0          0        640: 100%|██████████| 1784/1784 [1:27:40<00:00,  2.95s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 223/223 [05:27<00:00,  1.47s/it]


RuntimeError: torch.cat(): expected a non-empty list of Tensors

In [5]:
from pathlib import Path

base_out    = Path(r"C:\Users\hyunj\Downloads\deep_test")
image_root  = base_out / "gray"
seg_txt     = base_out / "txt"
subfolders  = ["C_Frontback_D02", "C_Frontback_G01"]

for sub in subfolders:
    img_dir = image_root / sub
    txt_dir = seg_txt    / sub
    print(f"\n[subfolder: {sub}]")
    print(" Images exist:", img_dir.exists(), "->", len(list(img_dir.iterdir())) if img_dir.exists() else 0)
    print("  *.webp files:", len(list(img_dir.glob("*.webp"))))
    print("  *.png   files:", len(list(img_dir.glob("*.png"))))
    print("Labels exist:", txt_dir.exists(), "->", len(list(txt_dir.iterdir())) if txt_dir.exists() else 0)
    print("  *.txt   files:", len(list(txt_dir.glob("*.txt"))))



[subfolder: C_Frontback_D02]
 Images exist: True -> 8531
  *.webp files: 8531
  *.png   files: 0
Labels exist: True -> 8531
  *.txt   files: 8531

[subfolder: C_Frontback_G01]
 Images exist: True -> 9320
  *.webp files: 9320
  *.png   files: 0
Labels exist: True -> 9320
  *.txt   files: 9320


In [6]:
import os
import random
import shutil
from pathlib import Path
import yaml
from ultralytics import YOLO

# ─── 0) 설정 ────────────────────────────────────────────
BASE        = Path(r"C:\Users\hyunj\Downloads\deep_test")
IMAGE_ROOT  = BASE / "gray"
LABEL_ROOT  = BASE / "txt"    # 세그멘테이션 .txt 라벨 폴더
SUBFOLDERS  = ["C_Frontback_D02", "C_Frontback_G01"]

# ─── 1) 출력(train/val) 폴더 준비 ──────────────────────────
train_img_out = BASE / "dataset" / "images" / "train"
train_lbl_out = BASE / "dataset" / "labels" / "train"
val_img_out   = BASE / "dataset" / "images" / "val"
val_lbl_out   = BASE / "dataset" / "labels" / "val"
for p in (train_img_out, train_lbl_out, val_img_out, val_lbl_out):
    p.mkdir(parents=True, exist_ok=True)

# ─── 2) (이미지, 라벨) 파일 쌍 수집 ────────────────────────
pairs = []
for sub in SUBFOLDERS:
    img_dir = IMAGE_ROOT / sub
    lbl_dir = LABEL_ROOT  / sub

    # 이미지 확장자 모두
    img_files = list(img_dir.glob("*.webp")) + list(img_dir.glob("*.png"))
    for img_path in img_files:
        stem = img_path.stem
        # 라벨 후보 두 가지
        candidates = [
            lbl_dir / f"{stem}.txt",
            lbl_dir / f"{stem}_mask.txt"
        ]
        # 존재하는 첫 번째 라벨 사용
        txt_path = next((p for p in candidates if p.exists()), None)
        if txt_path:
            pairs.append((img_path, txt_path))
        else:
            print(f"[누락] 라벨 없음: {img_path.name} -> {candidates}")

print(f"✅ 전체 파일 쌍: {len(pairs)}")

# ─── 3) train/val 분할 (80/20) ─────────────────────────────
random.seed(30)
random.shuffle(pairs)
split_idx   = int(len(pairs) * 0.8)
train_pairs = pairs[:split_idx]
val_pairs   = pairs[split_idx:]
print(f"train: {len(train_pairs)} 쌍, val: {len(val_pairs)} 쌍")

# ─── 4) 파일 복사 ─────────────────────────────────────────
def copy_pairs(pairs, img_out, lbl_out):
    for img_path, txt_path in pairs:
        # sub폴더명_원본이름 으로 파일명 충돌 방지
        new_img = f"{img_path.parent.name}_{img_path.name}"
        new_lbl = f"{txt_path.parent.name}_{txt_path.name}"
        shutil.copy2(img_path, img_out / new_img)
        shutil.copy2(txt_path, lbl_out / new_lbl)

copy_pairs(train_pairs, train_img_out, train_lbl_out)
copy_pairs(val_pairs,   val_img_out,   val_lbl_out)
print("✅ train/val 파일 복사 완료")

# ─── 5) data.yaml 생성 ────────────────────────────────────
data_yaml = {
    "train": str((BASE/"dataset"/"images"/"train").as_posix()),
    "val":   str((BASE/"dataset"/"images"/"val").as_posix()),
    "nc":    1,
    "names": ["crack"]
}
yaml_path = BASE / "crack_segmentation2.yaml"
with open(yaml_path, "w") as f:
    yaml.dump(data_yaml, f, sort_keys=False)
print(f"✅ data.yaml 생성: {yaml_path}")

# ─── 6) YOLOv8n-seg 학습 실행 ─────────────────────────────
# (로컬에 GPU 없으면 device="cpu" 로 변경하세요)
model = YOLO("yolov8n-seg.pt")
model.train(
    data=str(yaml_path).replace("\\", "/"),
    imgsz=640,
    epochs=50,
    batch=8,
    device="cpu",                    # GPU가 없으면 "cpu" 로
    name="crack_seg2",
    project=str((BASE/"yolo_results").as_posix())
)
print("✅ 학습 완료")


✅ 전체 파일 쌍: 17851
train: 14280 쌍, val: 3571 쌍
✅ train/val 파일 복사 완료
✅ data.yaml 생성: C:\Users\hyunj\Downloads\deep_test\crack_segmentation2.yaml
New https://pypi.org/project/ultralytics/8.3.145 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.137  Python-3.9.21 torch-2.7.0+cu128 CPU (11th Gen Intel Core(TM) i7-11700 2.50GHz)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=C:/Users/hyunj/Downloads/deep_test/crack_segmentation2.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_wid

[34m[1mtrain: [0mScanning C:\Users\hyunj\Downloads\deep_test\dataset\labels\train\C_Frontback_D02... 0 images, 28529 backgrounds, 31 corrupt: 100%|██████████| 28560/28560 [00:52<00:00, 545.98it/s]

[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_0898_20201231_111555_E_CH0_Seoul_Sun_Frontback_Day_88937.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_2899_20201109_093348_N_CH2_Seoul_Sun_Frontback_Day_04037.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_4471_20201230_122847_E_CH0_Seoul_Sun_Frontback_Day_83952.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_5502_20201224_104609_N_CH1_Seoul_Sun_Frontback_Sunrise_40987.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_6015_20201109




[34m[1mtrain: [0mNew cache created: C:\Users\hyunj\Downloads\deep_test\dataset\labels\train\C_Frontback_D02.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))




[34m[1mval: [0mFast image access  (ping: 0.20.1 ms, read: 21.35.3 MB/s, size: 149.2 KB)


[34m[1mval: [0mScanning C:\Users\hyunj\Downloads\deep_test\dataset\labels\val\C_Frontback_D02... 0 images, 7135 backgrounds, 7 corrupt: 100%|██████████| 7142/7142 [00:12<00:00, 584.85it/s]

[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01\V2F_HY_3674_20201230_150636_N_CH0_Seoul_Sun_Frontback_Day_74627.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01\V2F_HY_8030_20201231_111555_E_CH0_Seoul_Sun_Frontback_Day_86195.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01\V3F_HY_1181_20160212_023749_E_CH1_Seoul_Sun_Frontback_Sunset_86619.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01\V3F_HY_1347_20160212_022303_N_CH0_Seoul_Sun_Frontback_Day_76884.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01_V2F_HY_6428_20201109_093048_E_CH0_Seoul_S




[34m[1mval: [0mNew cache created: C:\Users\hyunj\Downloads\deep_test\dataset\labels\val\C_Frontback_D02.cache




Plotting labels to C:\Users\hyunj\Downloads\deep_test\yolo_results\crack_seg2\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 66 weight(decay=0.0), 77 weight(decay=0.0005), 76 bias(decay=0.0)


UnsupportedModelRegistryStoreURIException:  Model registry functionality is unavailable; got unsupported URI 'e:\runs\mlflow' for model registry data storage. Supported URI schemes are: ['', 'file', 'databricks', 'databricks-uc', 'uc', 'http', 'https', 'postgresql', 'mysql', 'sqlite', 'mssql']. See https://www.mlflow.org/docs/latest/tracking.html#storage for how to run an MLflow server against one of the supported backend storage locations.

In [13]:
import os
from ultralytics import YOLO

# 0) MLflow 로깅 & 레지스트리 전부 로컬 파일로 리다이렉트
mlruns = r"C:\Users\hyunj\Downloads\deep_test\mlruns"
os.makedirs(mlruns, exist_ok=True)
file_uri = f"file:///{mlruns.replace(os.sep, '/')}"
os.environ["MLFLOW_TRACKING_URI"] = file_uri
os.environ["MLFLOW_REGISTRY_URI"] = file_uri

# 1) 모델 로드
model = YOLO("yolov8n-seg.pt")

# 2) (Optional) 모든 콜백 제거 — 여기까진 이제 에러 없을 겁니다
model.callbacks = []

# 3) 학습 실행
model.train(
    data="C:/Users/hyunj/Downloads/deep_test/crack_segmentation2.yaml",
    imgsz=640,
    epochs=50,
    batch=8,
    device="cpu",  
    name="crack_seg2",
    project="C:/Users/hyunj/Downloads/deep_test/yolo_results"
)


New https://pypi.org/project/ultralytics/8.3.145 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.137  Python-3.9.21 torch-2.7.0+cu128 CPU (11th Gen Intel Core(TM) i7-11700 2.50GHz)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=C:/Users/hyunj/Downloads/deep_test/crack_segmentation2.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n-seg.pt, momentum=0.937, mosaic=1.0, multi_scale=F

[34m[1mtrain: [0mScanning C:\Users\hyunj\Downloads\deep_test\dataset\labels\train\C_Frontback_D02.cache... 0 images, 28529 backgrounds, 31 corrupt: 100%|██████████| 28560/28560 [00:00<?, ?it/s]

[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_0898_20201231_111555_E_CH0_Seoul_Sun_Frontback_Day_88937.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_2899_20201109_093348_N_CH2_Seoul_Sun_Frontback_Day_04037.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_4471_20201230_122847_E_CH0_Seoul_Sun_Frontback_Day_83952.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_5502_20201224_104609_N_CH1_Seoul_Sun_Frontback_Sunrise_40987.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_6015_20201109




[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access  (ping: 0.10.0 ms, read: 1078.0279.4 MB/s, size: 149.2 KB)


[34m[1mval: [0mScanning C:\Users\hyunj\Downloads\deep_test\dataset\labels\val\C_Frontback_D02.cache... 0 images, 7135 backgrounds, 7 corrupt: 100%|██████████| 7142/7142 [00:00<?, ?it/s]

[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01\V2F_HY_3674_20201230_150636_N_CH0_Seoul_Sun_Frontback_Day_74627.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01\V2F_HY_8030_20201231_111555_E_CH0_Seoul_Sun_Frontback_Day_86195.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01\V3F_HY_1181_20160212_023749_E_CH1_Seoul_Sun_Frontback_Sunset_86619.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01\V3F_HY_1347_20160212_022303_N_CH0_Seoul_Sun_Frontback_Day_76884.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01_V2F_HY_6428_20201109_093048_E_CH0_Seoul_S


2025/05/26 20:23:28 INFO mlflow.tracking.fluent: Experiment with name 'C:/Users/hyunj/Downloads/deep_test/yolo_results' does not exist. Creating a new experiment.
2025/05/26 20:23:28 INFO mlflow.tracking.fluent: Autologging successfully enabled for pyspark.


[34m[1mMLflow: [0mlogging run_id(b35d8c6972534cf297d56c2f720fea3d) to file:///C:/Users/hyunj/Downloads/deep_test/mlruns
[34m[1mMLflow: [0mdisable with 'yolo settings mlflow=False'
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mC:\Users\hyunj\Downloads\deep_test\yolo_results\crack_seg24[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       1/50         0G          0          0      3.121          0          0        640: 100%|██████████| 3567/3567 [4:29:26<00:00,  4.53s/it]  
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 446/446 [09:58<00:00,  1.34s/it]


RuntimeError: torch.cat(): expected a non-empty list of Tensors

In [None]:
from ultralytics import YOLO

model = YOLO("yolov8n-seg.pt")
model.train(
    data="C:/Users/hyunj/Downloads/deep_test/crack_segmentation2.yaml",
    imgsz=640,
    epochs=50,
    batch=8,
    device="cpu",  
    name="crack_seg2",
    project="C:/Users/hyunj/Downloads/deep_test/yolo_results",
    val=False   # ← 검증 스킵
)


New https://pypi.org/project/ultralytics/8.3.145 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.137  Python-3.9.21 torch-2.7.0+cu128 CPU (11th Gen Intel Core(TM) i7-11700 2.50GHz)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=C:/Users/hyunj/Downloads/deep_test/crack_segmentation2.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n-seg.pt, momentum=0.937, mosaic=1.0, multi_scale=F

[34m[1mtrain: [0mScanning C:\Users\hyunj\Downloads\deep_test\dataset\labels\train\C_Frontback_D02.cache... 0 images, 28529 backgrounds, 31 corrupt: 100%|██████████| 28560/28560 [00:00<?, ?it/s]

[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_0898_20201231_111555_E_CH0_Seoul_Sun_Frontback_Day_88937.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_2899_20201109_093348_N_CH2_Seoul_Sun_Frontback_Day_04037.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_4471_20201230_122847_E_CH0_Seoul_Sun_Frontback_Day_83952.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_5502_20201224_104609_N_CH1_Seoul_Sun_Frontback_Sunrise_40987.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mtrain: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\train\C_Frontback_G01\V2F_HY_6015_20201109




[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access  (ping: 0.10.0 ms, read: 588.4125.6 MB/s, size: 149.2 KB)


[34m[1mval: [0mScanning C:\Users\hyunj\Downloads\deep_test\dataset\labels\val\C_Frontback_D02.cache... 0 images, 7135 backgrounds, 7 corrupt: 100%|██████████| 7142/7142 [00:00<?, ?it/s]

[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01\V2F_HY_3674_20201230_150636_N_CH0_Seoul_Sun_Frontback_Day_74627.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01\V2F_HY_8030_20201231_111555_E_CH0_Seoul_Sun_Frontback_Day_86195.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01\V3F_HY_1181_20160212_023749_E_CH1_Seoul_Sun_Frontback_Sunset_86619.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01\V3F_HY_1347_20160212_022303_N_CH0_Seoul_Sun_Frontback_Day_76884.webp: ignoring corrupt image/label: image size (1, 1) <10 pixels
[34m[1mval: [0mC:\Users\hyunj\Downloads\deep_test\dataset\images\val\C_Frontback_G01_V2F_HY_6428_20201109_093048_E_CH0_Seoul_S




Plotting labels to C:\Users\hyunj\Downloads\deep_test\yolo_results\crack_seg28\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 66 weight(decay=0.0), 77 weight(decay=0.0005), 76 bias(decay=0.0)


2025/05/27 01:49:09 INFO mlflow.tracking.fluent: Autologging successfully enabled for pyspark.


[34m[1mMLflow: [0mlogging run_id(b35d8c6972534cf297d56c2f720fea3d) to file:///C:/Users/hyunj/Downloads/deep_test/mlruns
[34m[1mMLflow: [0mdisable with 'yolo settings mlflow=False'
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mC:\Users\hyunj\Downloads\deep_test\yolo_results\crack_seg28[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       1/50         0G          0          0      3.121          0          0        640: 100%|██████████| 3567/3567 [2:35:37<00:00,  2.62s/it]  



      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       2/50         0G          0          0  3.837e-08          0          0        640: 100%|██████████| 3567/3567 [2:32:07<00:00,  2.56s/it]  



      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       3/50         0G          0          0          0          0          0        640: 100%|██████████| 3567/3567 [2:28:34<00:00,  2.50s/it]  


      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size



       4/50         0G          0          0          0          0          0        640: 100%|██████████| 3567/3567 [2:28:01<00:00,  2.49s/it]  



      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       5/50         0G          0          0          0          0          0        640: 100%|██████████| 3567/3567 [2:38:41<00:00,  2.67s/it]  



      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       6/50         0G          0          0          0          0          0        640: 100%|██████████| 3567/3567 [3:04:08<00:00,  3.10s/it]  



      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       7/50         0G          0          0          0          0          0        640: 100%|██████████| 3567/3567 [2:56:07<00:00,  2.96s/it]  



      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       8/50         0G          0          0          0          0          0        640:  83%|████████▎ | 2962/3567 [2:22:25<29:15,  2.90s/it]  

In [None]:
import os

os.chdir(r"C:\Users\hyunj\Downloads\deep_test")

cmd = (
    "yolo train "
    "model=yolov8n-seg.pt "
    "data=data.yaml "
    "epochs=50 "
    "imgsz=640 "
    "batch=8 "
    "name=crack_segmentation"
)
os.system(cmd)
