# YOLOv8 + Tesseract OCR 노트북

이 노트북은 현재 디렉터리(`/home/work/llama_young/차량번호판인식_yolov8`) 내에서 YOLOv8 탐지 모델과 Tesseract OCR을 이용해 단일 이미지 또는 스트림의 번호판 문자열을 추론합니다.

In [1]:
from pathlib import Path
import os
import sys

# 가상환경을 노트북 커널에 반영합니다.
VENV_DIR = Path('venv')
if not VENV_DIR.exists():
    raise FileNotFoundError(f'가상환경을 찾을 수 없습니다: {VENV_DIR.resolve()}')

site_packages = sorted((VENV_DIR / 'lib').glob('python*/site-packages'))
if not site_packages:
    raise FileNotFoundError('site-packages 디렉터리가 없습니다. 가상환경 구성을 확인하세요.')
site_packages_path = site_packages[0].resolve()
if str(site_packages_path) not in sys.path:
    sys.path.insert(0, str(site_packages_path))

os.environ['VIRTUAL_ENV'] = str(VENV_DIR.resolve())
bin_path = (VENV_DIR / 'bin').resolve()
current_path = os.environ.get('PATH', '')
if current_path:
    parts = current_path.split(':')
    if str(bin_path) not in parts:
        os.environ['PATH'] = f"{bin_path}:{current_path}"
else:
    os.environ['PATH'] = str(bin_path)

print('가상환경 경로 설정 완료:', VENV_DIR.resolve())
print('현재 커널 파이썬:', sys.executable)


가상환경 경로 설정 완료: /home/work/llama_young/차량번호판인식_yolov8/venv
현재 커널 파이썬: /usr/bin/python


In [2]:
import torch
import cv2
import pytesseract
from ultralytics import YOLO

print('PyTorch version:', torch.__version__)
print('CUDA 사용 가능 여부:', torch.cuda.is_available())
print('OpenCV version:', cv2.__version__)
print('Tesseract version:', pytesseract.get_tesseract_version())


PyTorch version: 2.9.0+cu128
CUDA 사용 가능 여부: True
OpenCV version: 4.12.0
Tesseract version: 4.1.1


In [None]:
from pathlib import Path

# --- 사용자 설정 ---
DET_WEIGHTS = Path('runs/license_plate_yolov8n/weights/best.pt')  # YOLO 탐지 모델 가중치
INPUT_PATH = Path('samples/example.jpg')  # 처리할 이미지/동영상 경로 또는 '0' 문자열(웹캠)
OUTPUT_DIR = Path('runs/ocr_demo')
CONF_THRES = 0.5
IOU_THRES = 0.65
IMGSZ = 512
USE_HALF = True  # GPU 사용 시 True 권장

OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
print('출력 디렉터리:', OUTPUT_DIR.resolve())


출력 디렉터리: /home/work/llama_young/차량번호판인식_yolov8/runs/ocr_demo


In [4]:
import re
import time
from pathlib import Path

import cv2
import numpy as np
import pytesseract
import torch
from ultralytics import YOLO
import easyocr

PLATE_RE = re.compile(r"\b\d{2,3}[가-힣]-?\d{4}\b")
SUBS = str.maketrans({'O': '0', 'o': '0', 'I': '1', 'l': '1', 'S': '5', 'B': '8', 'D': '0', '—': '-'})

DEBUG_SAVE = False

EASY_OCR_READER = None

def get_easyocr_reader():
    global EASY_OCR_READER
    if EASY_OCR_READER is None:
        EASY_OCR_READER = easyocr.Reader(['ko', 'en'], gpu=torch.cuda.is_available())
    return EASY_OCR_READER


def order_points(pts: np.ndarray) -> np.ndarray:
    rect = np.zeros((4, 2), dtype=np.float32)
    s = pts.sum(axis=1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]
    diff = np.diff(pts, axis=1).reshape(-1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]
    return rect

def extract_plate_patch(roi: np.ndarray) -> np.ndarray:
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (5, 5), 0)
    edges = cv2.Canny(blur, 50, 150)
    edges = cv2.dilate(edges, np.ones((3, 3), np.uint8), iterations=1)
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    area = roi.shape[0] * roi.shape[1]
    for cnt in sorted(contours, key=cv2.contourArea, reverse=True):
        x, y, w, h = cv2.boundingRect(cnt)
        if h <= 0:
            continue
        ar = w / h
        coverage = (w * h) / max(1, area)
        if 1.5 <= ar <= 8.0 and coverage > 0.08:
            peri = cv2.arcLength(cnt, True)
            approx = cv2.approxPolyDP(cnt, 0.03 * peri, True)
            if len(approx) == 4:
                pts = approx.reshape(4, 2).astype(np.float32)
                rect = order_points(pts)
                W = int(max(np.linalg.norm(rect[0] - rect[1]), np.linalg.norm(rect[2] - rect[3])))
                H = int(max(np.linalg.norm(rect[0] - rect[3]), np.linalg.norm(rect[1] - rect[2])))
                W = max(W, 320); H = max(H, 80)
                dst = np.array([[0, 0], [W - 1, 0], [W - 1, H - 1], [0, H - 1]], dtype=np.float32)
                M = cv2.getPerspectiveTransform(rect, dst)
                return cv2.warpPerspective(roi, M, (W, H))
            return roi[y : y + h, x : x + w]
    return roi

def preprocess_variants(roi: np.ndarray) -> list[np.ndarray]:
    roi = extract_plate_patch(roi)
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    gray = cv2.resize(gray, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)
    gray = cv2.bilateralFilter(gray, 9, 75, 75)
    clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8, 8))
    eq = clahe.apply(gray)
    _, otsu = cv2.threshold(eq, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    otsu_inv = cv2.bitwise_not(otsu)
    gauss = cv2.adaptiveThreshold(eq, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 25, 3)
    gauss_inv = cv2.bitwise_not(gauss)
    morph = cv2.morphologyEx(gauss, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=2)
    return [eq, otsu, otsu_inv, gauss, gauss_inv, morph]

def score_text(text: str) -> tuple[int, int]:
    match = PLATE_RE.search(text)
    return (2 if match else 0, len(text))

def ocr_plate(roi: np.ndarray) -> tuple[str, np.ndarray]:
    variants = preprocess_variants(roi)
    configs = [
        "--oem 1 --psm 7 -l kor+eng -c tessedit_char_whitelist=0123456789가-힣-",
        "--oem 1 --psm 8 -l kor+eng -c tessedit_char_whitelist=0123456789가-힣-",
        "--oem 3 --psm 6 -l kor+eng -c tessedit_char_whitelist=0123456789가-힣-",
        "--oem 3 --psm 11 -l kor+eng -c tessedit_char_whitelist=0123456789가-힣-",
    ]
    best_text, best_img, best_score = '', variants[0], (-1, -1)
    candidate_texts: set[str] = set()

    for idx, variant in enumerate(variants):
        for cfg in configs:
            raw = pytesseract.image_to_string(variant, config=cfg)
            cleaned = re.sub(r"[^0-9가-힣-]", '', raw.translate(SUBS))
            if cleaned:
                candidate_texts.add(cleaned)
            score = score_text(cleaned)
            if score > best_score:
                best_text, best_img, best_score = cleaned, variant, score
                if DEBUG_SAVE:
                    cv2.imwrite(f"debug_variant_{idx}.png", variant)

    if best_score[0] < 2:
        reader = get_easyocr_reader()
        for idx, variant in enumerate(variants):
            results = reader.readtext(variant, detail=1, paragraph=False)
            results = sorted(results, key=lambda r: min(pt[0] for pt in r[0]))
            merged = ""
            for bbox, raw, conf in results:
                cleaned = re.sub(r"[^0-9가-힣-]", "", raw.translate(SUBS))
                if cleaned:
                    candidate_texts.add(cleaned)
                    merged += cleaned
                    score = score_text(cleaned)
                    if score > best_score:
                        best_text, best_img, best_score = cleaned, variant, score
            if merged:
                candidate_texts.add(merged)
                score = score_text(merged)
                if score > best_score:
                    best_text, best_img, best_score = merged, variant, score
    if candidate_texts:
        matches = set()
        for text in candidate_texts:
            matches.update(PLATE_RE.findall(text))
        if best_text:
            matches.update(PLATE_RE.findall(best_text))
        if matches:
            best_candidate = max(matches, key=lambda t: score_text(t))
            return best_candidate.replace('-', ''), best_img

    return best_text.replace('-', ''), best_img
def aspect_ratio_ok(x1: int, y1: int, x2: int, y2: int, min_area: int = 2000) -> bool:
    w, h = x2 - x1, y2 - y1
    if h <= 0:
        return False
    ar = w / h
    return (1.5 <= ar <= 8.0) and (w * h >= min_area)

def annotate(frame: np.ndarray, box, label: str | None, color=(0, 255, 0)) -> None:
    x1, y1, x2, y2 = map(int, box)
    cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
    if label:
        cv2.putText(frame, label, (x1, max(20, y1 - 8)), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

def load_model(path: Path) -> YOLO:
    if not path.exists():
        raise FileNotFoundError(f'탐지 가중치를 찾을 수 없습니다: {path}')
    return YOLO(str(path))

In [5]:
model = load_model(DET_WEIGHTS)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('탐지 모델 로드 완료. device:', device)

source_str = str(INPUT_PATH)
is_webcam = source_str.isdigit() and not Path(source_str).exists()


def process_frame(frame):
    start = time.time()
    res = model.predict(
        frame,
        imgsz=IMGSZ,
        conf=CONF_THRES,
        iou=IOU_THRES,
        device=device,
        agnostic_nms=True,
        max_det=2,
        half=USE_HALF and device != 'cpu',
        verbose=False,
    )[0]
    picks = []
    for box, score in zip(res.boxes.xyxy.cpu().numpy(), res.boxes.conf.cpu().numpy()):
        x1, y1, x2, y2 = map(int, box)
        if aspect_ratio_ok(x1, y1, x2, y2):
            picks.append((float(score), (x1, y1, x2, y2)))
    picks.sort(reverse=True)

    plate_text = ''
    if picks:
        _, (x1, y1, x2, y2) = picks[0]
        h, w = frame.shape[:2]
        pad = int(0.1 * max(x2 - x1, y2 - y1))
        x1, y1 = max(0, x1 - pad), max(0, y1 - pad)
        x2, y2 = min(w, x2 + pad), min(h, y2 + pad)
        roi = frame[y1:y2, x1:x2]
        if roi.size > 0:
            plate_text, _ = ocr_plate(roi)
        annotate(frame, (x1, y1, x2, y2), plate_text or 'reading...', (0, 255, 0))
    fps = 1.0 / max(1e-6, time.time() - start)
    cv2.putText(frame, f"{fps:.1f} FPS", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 0), 2)
    return frame, plate_text

if is_webcam:
    import cv2

    cap = cv2.VideoCapture(int(source_str))
    if not cap.isOpened():
        raise RuntimeError('웹캠을 열 수 없습니다.')
    print('웹캠 스트림 시작. 종료하려면 창에서 q 키를 누르세요.')
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        vis, plate = process_frame(frame)
        cv2.imshow('YOLO + Tesseract', vis)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
else:
    ext = INPUT_PATH.suffix.lower()
    if ext in {'.jpg', '.jpeg', '.png', '.bmp'}:
        frame = cv2.imread(str(INPUT_PATH))
        if frame is None:
            raise FileNotFoundError(f'이미지를 불러올 수 없습니다: {INPUT_PATH}')
        vis, plate = process_frame(frame)
        out_path = OUTPUT_DIR / INPUT_PATH.name
        cv2.imwrite(str(out_path), vis)
        print('저장 완료:', out_path)
        if plate:
            print('인식 결과:', plate)
    else:
        for res in model.predict(
            source=str(INPUT_PATH),
            imgsz=IMGSZ,
            conf=CONF_THRES,
            iou=IOU_THRES,
            device=device,
            show=False,
            half=USE_HALF and device != 'cpu',
            stream=True,
        ):
            frame = res.orig_img
            vis, plate = process_frame(frame)
            out_path = OUTPUT_DIR / f"frame_{res.frame_id:06d}.jpg"
            cv2.imwrite(str(out_path), vis)
            if plate:
                print(f"frame {res.frame_id}: {plate}")
        print('동영상 처리가 완료되었습니다.')


탐지 모델 로드 완료. device: cuda
저장 완료: runs/ocr_demo/example.jpg
