In [2]:
from dotenv import load_dotenv
import os

load_dotenv()

ROBOFLOW_API_KEY = os.environ.get('ROBOFLOW_API_KEY')

# 모델 불러오기

In [3]:
from ultralytics import YOLO
import torch

model = YOLO("./models/yolo11n.pt")

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


# 학습 데이터 전처리

### 전처리 1 : 대비 강화

In [34]:
import cv2

def enhance_contrast(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(32, 32))
    enhanced = clahe.apply(gray)
    inverted = cv2.bitwise_not(enhanced)   # 흑백 반전
    return cv2.cvtColor(inverted, cv2.COLOR_GRAY2BGR)  # 3채널로 복원

## 전처리 2

In [31]:
import cv2
import numpy as np

def enhance_pill_soft(img, clahe_clip=2.0, clahe_tile=8, sharp_amt=0.4, blend=0.6):
    # 1) 조명 보정(간단): 큰 블러로 배경 조도 제거 후 보정
    blur = cv2.GaussianBlur(img, (0,0), 21)
    illum = cv2.addWeighted(img, 1.4, blur, -0.4, 0)  # 부드럽게 대비↑

    # 2) LAB에서 L 채널만 CLAHE
    lab = cv2.cvtColor(illum, cv2.COLOR_BGR2Lab)
    L, A, B = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(clahe_tile, clahe_tile))
    L2 = clahe.apply(L)
    lab2 = cv2.merge([L2, A, B])
    out = cv2.cvtColor(lab2, cv2.COLOR_Lab2BGR)

    # 3) 약한 언샤프 마스크(엣지만 살짝)
    gauss = cv2.GaussianBlur(out, (0,0), 1.0)
    unsharp = cv2.addWeighted(out, 1 + sharp_amt, gauss, -sharp_amt, 0)

    # 4) 원본과 소프트 블렌딩(과변형 방지)
    final = cv2.addWeighted(unsharp, blend, img, 1.0 - blend, 0)
    return final

In [37]:
def enhance_pill_retinex(img, sigma=30, edge_gain=0.15, blend=0.7):
    img32 = img.astype(np.float32) + 1.0
    base = cv2.GaussianBlur(img32, (0,0), sigma)
    ret = cv2.log(img32) - cv2.log(base)          # 간단 MSR 느낌
    ret = cv2.normalize(ret, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)

    # 약한 엣지 강화
    lap = cv2.Laplacian(ret, cv2.CV_16S, ksize=3)
    lap = cv2.convertScaleAbs(lap)
    sharpen = cv2.addWeighted(ret, 1.0, lap, edge_gain, 0)

    return cv2.addWeighted(sharpen, blend, img, 1.0 - blend, 0)


In [39]:
def enhance_pill_bilateral(img, d=9, sigmaColor=50, sigmaSpace=50, local_gain=0.8, blend=0.6):
    bf = cv2.bilateralFilter(img, d, sigmaColor, sigmaSpace)         # 노이즈 완화 + 엣지 보존
    # 로컬 대비(언샤프 변형)
    gauss = cv2.GaussianBlur(bf, (0,0), 3.0)
    local = cv2.addWeighted(bf, 1 + local_gain, gauss, -local_gain, 0)
    return cv2.addWeighted(local, blend, img, 1.0 - blend, 0)


# 배치 데이터 생성

In [43]:
import torch, numpy as np
from ultralytics.models.yolo.detect import DetectionTrainer

class CustomTrainer(DetectionTrainer):
    def preprocess_batch(self, batch):
        # 부모 클래스의 기본 전처리 로직을 먼저 실행합니다.
        # 이렇게 해야 데이터셋 로딩, 리사이징 등의 기본 작업이 정상적으로 수행됩니다.
        batch = super().preprocess_batch(batch)

        imgs = batch["img"]
        dt   = imgs.dtype

        # 1. PyTorch 텐서를 NumPy 배열로 변환 (CPU에서 작업)
        imgs_np = (imgs.permute(0,2,3,1).cpu().numpy() * 255).astype(np.uint8)
        
        # 2. 각 이미지에 전처리 적용 (OpenCV 함수 사용)
        for i in range(imgs_np.shape[0]):
            bgr = imgs_np[i][:, :, ::-1]
            bgr = enhance_pill_bilateral(bgr)
            imgs_np[i] = bgr[:, :, ::-1]

        # 3. NumPy 배열을 다시 PyTorch 텐서로 변환 (CPU에 생성)
        processed_imgs = torch.from_numpy(imgs_np).permute(0,3,1,2)

        # 4. CPU에서 float 타입으로 변환 후 0-1 스케일링
        processed_imgs = processed_imgs.float().div(255)
        
        # 5. 최종 텐서를 GPU로 이동시키고 원래 데이터 타입으로 변환
        batch["img"] = processed_imgs.to(device=device, dtype=dt)

        return batch

# 학습하기

In [45]:
import os
os.environ["ULTRALYTICS_TB"] = "1"  # 텐서보드 로거 활성화

PROJECT_NAME = "pill-detect-1"
DATA_YAML_PATH = f"./{PROJECT_NAME}/data.yaml"

In [44]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

import torch

try:
    torch.cuda.empty_cache()
    print("GPU 메모리 캐시 비우기 성공")
except RuntimeError as e:
    print(f"GPU 메모리 캐시 비우기 실패: {e}")

GPU 메모리 캐시 비우기 성공


In [46]:
model.to(device)

# 학습
results = model.train(
    data=DATA_YAML_PATH,
    epochs=30,
    batch=16,
    imgsz=540,
    device=device,
    project=f"runs/{PROJECT_NAME}",
    save=True,
    save_period=10,
    verbose=True,
    trainer = CustomTrainer
)

Ultralytics 8.3.201  Python-3.10.18 torch-2.8.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4070 Laptop GPU, 8188MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=./pill-detect-1/data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=30, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=540, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=C:\Potenup\Drug-Detection-Chatbot\modeling\segment\runs\pill-detect-1\train5\weights\best.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train, nbs=64, nms=False,

# 모델 평가하기

In [47]:
best_model_path = "C:\Potenup\Drug-Detection-Chatbot\modeling\segment\\runs\pill-detect-1\\train\weights\\best.pt"

In [48]:
load_model = YOLO(best_model_path)

In [49]:
val_results = load_model.val(
    data=DATA_YAML_PATH, 
    imgsz=640, 
    iou=0.5, 
    save=True,
    project=f"runs/{PROJECT_NAME}",
    )

Ultralytics 8.3.201  Python-3.10.18 torch-2.8.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4070 Laptop GPU, 8188MiB)
YOLO11n summary (fused): 100 layers, 2,582,542 parameters, 0 gradients, 6.3 GFLOPs


[34m[1mval: [0mFast image access  (ping: 0.10.0 ms, read: 94.571.9 MB/s, size: 58.4 KB)
[K[34m[1mval: [0mScanning C:\Potenup\Drug-Detection-Chatbot\modeling\segment\pill-detect-1\valid\labels.cache... 142 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 142/142  0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 9/9 2.5it/s 3.6s0.2ss
                   all        142       2714      0.927       0.89      0.951      0.615
              capsules         73        805      0.914      0.843       0.93      0.588
               tablets         87       1909       0.94      0.937      0.972      0.642
Speed: 0.8ms preprocess, 16.1ms inference, 0.0ms loss, 2.3ms postprocess per image
Results saved to [1mC:\Potenup\Drug-Detection-Chatbot\modeling\segment\runs\pill-detect-1\val2[0m


# 테스트 해보기

In [50]:
import cv2
import os
from ultralytics import YOLO

input_dir = "./images/original"
preprocessed_dir = "./images/preprocessed"
output_dir = "./images/results/yolo"

os.makedirs(preprocessed_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)

for fname in os.listdir(input_dir):
    if fname.lower().endswith((".jpg", ".png", ".jpeg")):
        img_path = os.path.join(input_dir, fname)
        img = cv2.imread(img_path)

        img = enhance_pill_bilateral(img)

        # 저장
        out_path = os.path.join(preprocessed_dir, fname)
        cv2.imwrite(out_path, img)

model = YOLO(best_model_path)
results = model.predict(
    source=preprocessed_dir,
    conf=0.5,
    save=True,
    project=output_dir,
    name="",
)

print(f"결과 이미지가 {output_dir} 폴더에 저장되었습니다.")


image 1/3 c:\Potenup\Drug-Detection-Chatbot\modeling\segment\images\preprocessed\test1.jpg: 544x320 4 tabletss, 45.9ms
image 2/3 c:\Potenup\Drug-Detection-Chatbot\modeling\segment\images\preprocessed\test2.jpg: 544x320 4 tabletss, 39.9ms
image 3/3 c:\Potenup\Drug-Detection-Chatbot\modeling\segment\images\preprocessed\test3.jpg: 544x544 1 capsules, 48.4ms
Speed: 2.6ms preprocess, 44.8ms inference, 5.2ms postprocess per image at shape (1, 3, 544, 544)
Results saved to [1mC:\Potenup\Drug-Detection-Chatbot\modeling\segment\images\results\yolo\predict19[0m
결과 이미지가 ./images/results/yolo 폴더에 저장되었습니다.
