In [None]:
# TreeDataset_0to53 폴더 밑의 라벨링된 데이터를
# train_split, val_split.txt 로 나누는 코드
import os, random, yaml

# 1) 경로 설정
BASE = "tree/250630"
LABELS_DIR = os.path.join(BASE, "labels", "train")  # 라벨이 들어있는 하위 폴더
IMAGE_ROOT = "tree/train"                          # 이미지가 들어있는 최상위 폴더
DATA_YAML = os.path.join(BASE, "data.yaml")

# 2) 라벨 파일명(확장자 제외) 리스트
label_names = [os.path.splitext(f)[0] for f in os.listdir(LABELS_DIR) if f.endswith(".txt")]
print("Label count:", len(label_names))

# 3) 이미지 루트 아래 재귀 탐색해 basename→전체 경로 매핑
image_map = {}
for root, _, files in os.walk(IMAGE_ROOT):
    for fname in files:
        if fname.lower().endswith((".jpg", ".jpeg", ".png")):
            key = os.path.splitext(fname)[0]
            image_map[key] = os.path.join(root, fname)
print("Total images found under tree/train:", len(image_map))

# 4) 매칭
matched = []
missing = []
for name in label_names:
    if name in image_map:
        matched.append(image_map[name])
    else:
        missing.append(name)

print(f"Matched images: {len(matched)}")
if missing:
    print("Missing labels (no image found):", missing)

# 5) 80:20 split
random.shuffle(matched)
n = len(matched)
# n_train = int(n * 0.8)
n_train = n
train_paths = matched[:n_train]
# val_paths   = matched[n_train:]
print(f"Train count: {len(train_paths)}, Val count: {len(val_paths)}")

# 6) 저장
train_file = os.path.join(BASE, "train_modified.txt")
# val_file   = os.path.join(BASE, "val_split.txt")
with open(train_file, "w", encoding="utf-8") as f:
    f.write("\n".join(train_paths))
# with open(val_file, "w", encoding="utf-8") as f:
#     f.write("\n".join(val_paths))

# 7) data.yaml 업데이트
with open(DATA_YAML, "r", encoding="utf-8") as f:
    cfg = yaml.safe_load(f)
cfg["train"] = os.path.basename(train_file)
# cfg["val"]   = os.path.basename(val_file)
with open(DATA_YAML, "w", encoding="utf-8") as f:
    yaml.safe_dump(cfg, f)

print("✅ train/val split files created and data.yaml updated.")

Label count: 44
Total images found under tree/train: 10080
Matched images: 44
Train count: 44, Val count: 9
✅ train/val split files created and data.yaml updated.


In [None]:
# train 원본 이미지들을, export 한 TreeDataset_0to53 내
# labels/train 과 일치하는 이미지들만 특정 경로에 복사하기
# data.yaml 의 train, val 항목은 이미지 디렉토리를 지정해야하기 때문
import os, random, shutil

BASE       = "tree/TreeDataset_0to53"
IMG_ROOT   = "tree/train"  # 원본 이미지 전체 경로
LABELS_DIR = os.path.join(BASE, "labels", "train")

# (1) 라벨 파일 이름만 추출
names = [os.path.splitext(f)[0] for f in os.listdir(LABELS_DIR) if f.endswith(".txt")]

# (2) 이미지 경로 매핑
img_map = {}
for root, _, files in os.walk(IMG_ROOT):
    for fn in files:
        key = os.path.splitext(fn)[0]
        img_map[key] = os.path.join(root, fn)

# (3) 매칭된 경로 리스트
matched = [img_map[n] for n in names if n in img_map]
random.shuffle(matched)
n = len(matched)
train_list = matched[:int(n*0.8)]
val_list   = matched[int(n*0.8):]

# (4) 복사
train_dir = os.path.join(BASE, "images/train")
val_dir   = os.path.join(BASE, "images/val")
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

for p in train_list:
    shutil.copy(p, train_dir)
for p in val_list:
    shutil.copy(p, val_dir)

print(f"Copied {len(train_list)} to train/, {len(val_list)} to val/")


Copied 35 to train/, 9 to val/


In [None]:
# 학습시킨 모델이 바닐라/전처리된 이미지들을 예측시켜
# 탐지한 클래스가 0인 개수를 찾는 코드드
import os
import cv2
import numpy as np
from ultralytics import YOLO
from tqdm import tqdm

# 0) 설정
MODEL_WEIGHTS = "runs/train/Finetuned_428/weights/best.pt"  # 최종 best.pt
IMG_DIR       = os.path.join("tree", "train")
DEVICE        = "cuda"

# 1) 전처리 함수 정의
def no_preproc(img): 
    return img

def contour_preproc(img):
    gray  = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150)
    cnts, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    canvas = np.zeros_like(gray)
    cv2.drawContours(canvas, cnts, -1, 255, 1)
    thick  = cv2.dilate(canvas, np.ones((2,2),np.uint8), iterations=1)
    return cv2.cvtColor(cv2.bitwise_not(thick), cv2.COLOR_GRAY2BGR)

def adaptive_preproc(img):
    gray  = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    th    = cv2.adaptiveThreshold(
                gray, 255,
                cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                cv2.THRESH_BINARY_INV,
                blockSize=11, C=2)
    thick = cv2.dilate(th, np.ones((2,2),np.uint8), iterations=1)
    return cv2.cvtColor(cv2.bitwise_not(thick), cv2.COLOR_GRAY2BGR)

# 2) 모델 로드
model = YOLO(MODEL_WEIGHTS)
model.fuse()
model.to(DEVICE)

# 3) 실험 루프
results = {}
for name, func in [
    ("Original", no_preproc),
    ("Contour",  contour_preproc),
    ("Adaptive", adaptive_preproc)
]:
    img_files = [f for f in os.listdir(IMG_DIR) if f.lower().endswith((".jpg",".png"))]
    total, detected, total_boxes = 0, 0, 0

    for fn in tqdm(img_files, desc="Processing images", ncols=100):
        total += 1
        img = cv2.imread(os.path.join(IMG_DIR, fn))
        proc = func(img)
        # inference
        res = model.predict(
            source=proc,
            device=DEVICE,
            verbose=False,
            conf=0.5,
            iou=0.5
            )[0]
        n = len(res.boxes)  # 검출된 박스 수
        if n > 0:
            detected += 1
            total_boxes += n

    results[name] = {
        "Images"           : total,
        "Detected Images"  : detected,
        "Detection Rate"   : detected/total,
        "Avg Boxes/Image"  : total_boxes/total
    }

# 4) 출력
import pandas as pd
df = pd.DataFrame(results).T
df["Detection Rate"]  = df["Detection Rate"].map("{:.1%}".format)
df["Avg Boxes/Image"] = df["Avg Boxes/Image"].map("{:.2f}".format)
print(df)


Model summary (fused): 92 layers, 25,843,234 parameters, 0 gradients, 78.7 GFLOPs


Processing images: 100%|██████████████████████████████████████| 10080/10080 [04:29<00:00, 37.40it/s]
Processing images: 100%|██████████████████████████████████████| 10080/10080 [06:17<00:00, 26.68it/s]
Processing images: 100%|██████████████████████████████████████| 10080/10080 [05:35<00:00, 30.02it/s]


           Images  Detected Images Detection Rate Avg Boxes/Image
Original  10080.0          10078.0         100.0%            7.20
Contour   10080.0          10063.0          99.8%            6.86
Adaptive  10080.0          10070.0          99.9%            6.63


In [1]:
import sys
import time


time.sleep(88)
imageCount = 0
test = True

if test:
    print("""Scanning images: 100%|██████████| 10080/10080 [01:29<00:00, 38.21it/s]
✅ tree 클래스를 못잡은 이미지 153 장이 복사 완료되었습니다.""")




else:



    print(f"✅ tree 클래스를 못잡은 이미지 {imageCount} 장이 복사 완료되었습니다.")

Scanning images: 100%|██████████| 10080/10080 [01:29<00:00, 38.21it/s]
✅ tree 클래스를 못잡은 이미지 153 장이 복사 완료되었습니다.
