In [1]:
import torch
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os

In [None]:
import cv2
import numpy as np
from craft_text_detector import Craft
import os

# 이미지 경로 및 출력 경로
image_dir = (
    "C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/test"  # 처리할 이미지 경로
)
output_dir = "C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed"  # 결과를 저장할 출력 경로
coordinates_dir = os.path.join(output_dir, "coordinates")  # 좌표 정보를 저장할 경로
os.makedirs(output_dir, exist_ok=True)
os.makedirs(coordinates_dir, exist_ok=True)

# CRAFT 모델 초기화
craft = Craft(output_dir=output_dir, crop_type="box")


# CRAFT 텍스트 감지 함수
def process_images_with_craft(image_dir, craft, output_dir, coordinates_dir):
    for file_name in os.listdir(image_dir):
        if file_name.lower().endswith((".png", ".jpg", ".jpeg")):
            image_path = os.path.join(image_dir, file_name)
            print(f"Processing {file_name}...")

            # 이미지 로드
            image = cv2.imread(image_path)
            if image is None:
                print(f"Failed to load image: {file_name}")
                continue

            original_image = image.copy()

            try:
                # CRAFT로 텍스트 영역 감지
                craft_result = craft.detect_text(image_path)

                # polys가 비정상적인 데이터를 포함할 수 있으므로 필터링
                text_bboxes = [
                    np.array(box).astype(np.int32)
                    for box in craft_result["boxes"]
                    if box is not None and len(box) > 0
                ]

                # 텍스트 영역이 없는 경우 건너뛰기
                if len(text_bboxes) == 0:
                    print(f"No text detected in {file_name}. Skipping...")
                    continue

                # 좌표 저장
                coordinates_path = os.path.join(
                    coordinates_dir, file_name.split(".")[0] + ".txt"
                )
                with open(coordinates_path, "w") as coord_file:
                    for box in text_bboxes:
                        coord_file.write(
                            ",".join(map(str, box.flatten())) + "\n"
                        )  # 좌표 저장

                # 텍스트 영역 표시
                for box in text_bboxes:
                    cv2.polylines(original_image, [box], True, (0, 255, 0), 2)  # 초록색

                # 결과 저장
                output_path = os.path.join(output_dir, file_name)
                cv2.imwrite(output_path, original_image)
                print(f"Saved processed image to {output_path}")
                print(f"Saved coordinates to {coordinates_path}")

            except Exception as e:
                print(f"Error processing {file_name}: {e}")


# 이미지 처리 실행
process_images_with_craft(image_dir, craft, output_dir, coordinates_dir)

# CRAFT 모델 종료
craft.unload_craftnet_model()
craft.unload_refinenet_model()

Processing 20241227_110952.jpg...
Error processing 20241227_110952.jpg: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (16,) + inhomogeneous part.
Processing images (1).jpeg...
Saved processed image to C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed\images (1).jpeg
Saved coordinates to C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed\coordinates\images (1).txt
Processing P5_1_01_34415_137209.png...
Error processing P5_1_01_34415_137209.png: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (24,) + inhomogeneous part.


In [None]:
import cv2
import numpy as np
from craft_text_detector import Craft
from PIL import Image
import os

# 경로 설정
image_dir = (
    "C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/test"  # 처리할 이미지 경로
)
output_dir = "C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed"  # 결과를 저장할 출력 경로
coordinates_dir = os.path.join(output_dir, "coordinates")  # 좌표 정보를 저장할 경로
os.makedirs(output_dir, exist_ok=True)
os.makedirs(coordinates_dir, exist_ok=True)

# CRAFT 모델 초기화
craft = Craft(output_dir=output_dir, crop_type="box")


# 파일 로드 함수
def load_image(image_path):
    try:
        # Pillow로 파일 검증
        with Image.open(image_path) as img:
            img.verify()  # 파일 형식 검증

        # OpenCV로 이미지 로드
        with open(image_path, "rb") as f:
            file_bytes = np.asarray(bytearray(f.read()), dtype=np.uint8)
            image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
        return image
    except Exception as e:
        print(f"[ERROR] Unable to load image: {image_path}, {e}")
        return None


# CRAFT 텍스트 감지 함수
def process_images_with_craft(image_dir, craft, output_dir, coordinates_dir):
    for file_name in os.listdir(image_dir):
        if file_name.lower().endswith((".png", ".jpg", ".jpeg")):
            image_path = os.path.join(image_dir, file_name)

            try:
                print(f"Processing {file_name}...")

                # 이미지 로드
                image = load_image(image_path)
                if image is None:
                    raise ValueError(f"Failed to load image: {file_name}")

                original_image = image.copy()

                # CRAFT로 텍스트 영역 감지
                craft_result = craft.detect_text(image_path)
                text_bboxes = [
                    np.array(box).astype(np.int32)
                    for box in craft_result["boxes"]
                    if box is not None and len(box) > 0
                ]

                # 텍스트 영역이 없는 경우 건너뛰기
                if len(text_bboxes) == 0:
                    print(f"No text detected in {file_name}. Skipping...")
                    continue

                # 좌표 저장
                coordinates_path = os.path.join(
                    coordinates_dir, file_name.split(".")[0] + ".txt"
                )
                with open(coordinates_path, "w") as coord_file:
                    for box in text_bboxes:
                        coord_file.write(",".join(map(str, box.flatten())) + "\n")

                # 텍스트 영역 표시
                for box in text_bboxes:
                    cv2.polylines(original_image, [box], True, (0, 255, 0), 2)

                # 결과 저장
                output_path = os.path.join(output_dir, file_name)
                cv2.imwrite(output_path, original_image)
                print(f"Saved processed image to {output_path}")
                print(f"Saved coordinates to {coordinates_path}")

            except Exception as e:
                print(f"Error processing {file_name}: {e}")


# 이미지 처리 실행
process_images_with_craft(image_dir, craft, output_dir, coordinates_dir)

# CRAFT 모델 종료
craft.unload_craftnet_model()
craft.unload_refinenet_model()

Processing 20241227_110952.jpg...
Error processing 20241227_110952.jpg: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (16,) + inhomogeneous part.
Processing images (1).jpeg...


In [10]:
import cv2
import numpy as np
from craft_text_detector import Craft
import os

# 경로 설정
image_dir = (
    "C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/test"  # 처리할 이미지 경로
)
output_dir = "C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed"  # 결과를 저장할 출력 경로
coordinates_dir = os.path.join(output_dir, "coordinates")  # 좌표 정보를 저장할 경로
os.makedirs(output_dir, exist_ok=True)
os.makedirs(coordinates_dir, exist_ok=True)

# CRAFT 모델 초기화
craft = Craft(output_dir=output_dir, crop_type="box")


# 데이터 필터링 함수
def filter_boxes(boxes):
    """
    CRAFT의 반환값인 boxes 데이터를 검증하고 유효한 데이터만 반환
    """
    filtered_boxes = []
    for box in boxes:
        if isinstance(box, np.ndarray) and len(box.shape) == 2 and box.shape[1] == 2:
            filtered_boxes.append(box.astype(np.int32))
        else:
            print(f"[WARNING] Invalid box detected and skipped: {box}")
    return filtered_boxes


# CRAFT 텍스트 감지 함수
def process_images_with_craft(image_dir, craft, output_dir, coordinates_dir):
    for file_name in os.listdir(image_dir):
        if file_name.lower().endswith((".png", ".jpg", ".jpeg")):
            image_path = os.path.join(image_dir, file_name)

            try:
                print(f"Processing {file_name}...")

                # 이미지 로드
                image = cv2.imread(image_path)
                if image is None:
                    raise ValueError(f"Failed to load image: {file_name}")

                original_image = image.copy()

                # CRAFT로 텍스트 영역 감지
                craft_result = craft.detect_text(image_path)
                raw_boxes = craft_result["boxes"]

                # boxes 데이터 필터링
                text_bboxes = filter_boxes(raw_boxes)

                # 텍스트 영역이 없는 경우 건너뛰기
                if len(text_bboxes) == 0:
                    print(f"No valid text boxes detected in {file_name}. Skipping...")
                    continue

                # 좌표 저장
                coordinates_path = os.path.join(
                    coordinates_dir, file_name.split(".")[0] + ".txt"
                )
                with open(coordinates_path, "w") as coord_file:
                    for box in text_bboxes:
                        coord_file.write(",".join(map(str, box.flatten())) + "\n")

                # 텍스트 영역 표시
                for box in text_bboxes:
                    cv2.polylines(original_image, [box], True, (0, 255, 0), 2)

                # 결과 저장
                output_path = os.path.join(output_dir, file_name)
                cv2.imwrite(output_path, original_image)
                print(f"Saved processed image to {output_path}")
                print(f"Saved coordinates to {coordinates_path}")

            except Exception as e:
                print(f"Error processing {file_name}: {e}")


# 이미지 처리 실행
process_images_with_craft(image_dir, craft, output_dir, coordinates_dir)

# CRAFT 모델 종료
craft.unload_craftnet_model()
craft.unload_refinenet_model()

Processing 20241227_110952.jpg...
Error processing 20241227_110952.jpg: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (16,) + inhomogeneous part.
Processing P5_1_01_26792_89358.png...
Saved processed image to C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed\P5_1_01_26792_89358.png
Saved coordinates to C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed\coordinates\P5_1_01_26792_89358.txt
Processing P5_1_01_26795_90125.png...
Saved processed image to C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed\P5_1_01_26795_90125.png
Saved coordinates to C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed\coordinates\P5_1_01_26795_90125.txt
Processing P5_1_01_26919_90535.png...
Saved processed image to C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed\P5_1_01_26919_90535.png
Saved coordinates to C:/Users/user/Desktop/kst_pipeline/Model/Yolov8

In [1]:
import os
import cv2
import numpy as np
from craft_text_detector import Craft
from ultralytics import YOLO

# -----------------------------------------------------------
# 환경에 맞게 수정하세요
# -----------------------------------------------------------
image_dir = (
    "C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/test"  # 처리할 이미지 경로
)
output_dir = "C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed"
coordinates_dir = os.path.join(output_dir, "coordinates")
failed_boxes_dir = os.path.join(output_dir, "failed_boxes")
os.makedirs(output_dir, exist_ok=True)
os.makedirs(coordinates_dir, exist_ok=True)
os.makedirs(failed_boxes_dir, exist_ok=True)

# YOLO 모델 로드 (이미 학습된 모델 가정)
YOLO_MODEL_PATH = "C:/Users/user/Desktop/aws_s3_Data/yolov8_text_nontext.pt"  # 사용자 환경에 맞게 수정
yolo_model = YOLO(YOLO_MODEL_PATH)

# CRAFT 모델 초기화
craft = Craft(
    output_dir=output_dir, crop_type="box"  # 저장할 디렉토리  # "poly"도 가능
)


def save_coordinates(coordinates, file_path):
    """텍스트 좌표를 저장"""
    with open(file_path, "w", encoding="utf-8") as f:
        for pts in coordinates:
            f.write(",".join(map(str, pts.flatten())) + "\n")


def save_failed_boxes(failed_boxes, file_path):
    """CRAFT 실패 영역 정보 저장"""
    with open(file_path, "w", encoding="utf-8") as f:
        for box in failed_boxes:
            f.write(
                f"Class: {box['class']}, Confidence: {box['conf']:.2f}, BBox: {box['bbox']}\n"
            )


def process_image_with_craft(image_path):
    """CRAFT로 텍스트 영역을 감지 후 (boxes, score_text 등) 리턴"""
    try:
        craft_result = craft.detect_text(image_path)
        return craft_result
    except Exception as e:
        print(f"CRAFT Error: {e}")
        return None


def process_image_with_yolo_and_craft(
    image, file_name, target_classes=None, conf_thresh=0.5
):
    """YOLO로 텍스트 감지 후 CRAFT 수행"""
    if target_classes is None:
        target_classes = ["text"]  # 본인의 YOLO 클래스명

    height, width = image.shape[:2]
    original_image = image.copy()

    results = yolo_model.predict(image, conf=conf_thresh)
    if len(results) == 0 or len(results[0].boxes) == 0:
        print(f"No objects detected by YOLO in {file_name}. Skipping...")
        return [], []

    boxes = results[0].boxes
    all_text_boxes = []
    failed_boxes_info = []

    for box in boxes:
        cls_id = int(box.cls[0].item())  # 클래스 인덱스
        cls_conf = float(box.conf[0].item())  # confidence
        cls_name = results[0].names[cls_id]

        if cls_name in target_classes and cls_conf >= conf_thresh:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
            x1, y1 = max(0, x1), max(0, y1)
            x2, y2 = min(width - 1, x2), min(height - 1, y2)

            # YOLO로 잡은 영역 Crop
            cropped_region = original_image[y1:y2, x1:x2]

            # CRAFT 수행
            try:
                craft_result = craft.detect_text(cropped_region)
                text_bboxes = [
                    np.array(pt).astype(np.int32)
                    for pt in craft_result["boxes"]
                    if pt is not None and len(pt) > 0
                ]

                for pts in text_bboxes:
                    pts[:, 0] += x1
                    pts[:, 1] += y1
                    all_text_boxes.append(pts)

            except Exception:
                failed_boxes_info.append(
                    {"class": cls_name, "conf": cls_conf, "bbox": (x1, y1, x2, y2)}
                )

    return all_text_boxes, failed_boxes_info


def main_process(image_dir):
    for file_name in os.listdir(image_dir):
        if file_name.lower().endswith((".png", ".jpg", ".jpeg")):
            image_path = os.path.join(image_dir, file_name)
            print(f"Processing {file_name}...")

            image = cv2.imread(image_path)
            if image is None:
                print(f"Failed to load image: {file_name}")
                continue

            original_image = image.copy()

            # 1) CRAFT 단독 처리
            craft_result = process_image_with_craft(image_path)
            if craft_result is not None and len(craft_result["boxes"]) > 0:
                text_bboxes = [
                    np.array(box).astype(np.int32)
                    for box in craft_result["boxes"]
                    if box is not None and len(box) > 0
                ]
                for pts in text_bboxes:
                    cv2.polylines(original_image, [pts], True, (0, 255, 0), 2)

                coordinates_path = os.path.join(
                    coordinates_dir, f"{file_name.split('.')[0]}.txt"
                )
                save_coordinates(text_bboxes, coordinates_path)

                output_path = os.path.join(output_dir, file_name)
                cv2.imwrite(output_path, original_image)
                print(f"Saved CRAFT result: {output_path}")

            else:
                # 2) YOLO + CRAFT 조합 처리
                print(f"CRAFT failed. Trying YOLO + CRAFT for {file_name}...")
                all_text_boxes, failed_boxes = process_image_with_yolo_and_craft(
                    image, file_name
                )

                for pts in all_text_boxes:
                    cv2.polylines(original_image, [pts], True, (0, 255, 0), 2)

                if failed_boxes:
                    failed_boxes_path = os.path.join(
                        failed_boxes_dir, f"{file_name.split('.')[0]}_failed_boxes.txt"
                    )
                    save_failed_boxes(failed_boxes, failed_boxes_path)

                coordinates_path = os.path.join(
                    coordinates_dir, f"{file_name.split('.')[0]}.txt"
                )
                save_coordinates(all_text_boxes, coordinates_path)

                output_path = os.path.join(output_dir, file_name)
                cv2.imwrite(output_path, original_image)
                print(f"Saved YOLO+CRAFT result: {output_path}")


# 실행
main_process(image_dir)

# 모델 해제
craft.unload_craftnet_model()
craft.unload_refinenet_model()

  from .autonotebook import tqdm as notebook_tqdm


Processing 20241227_110952.jpg...
CRAFT Error: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (16,) + inhomogeneous part.
CRAFT failed. Trying YOLO + CRAFT for 20241227_110952.jpg...

0: 640x384 1 non-text, 73.8ms
Speed: 5.0ms preprocess, 73.8ms inference, 6.0ms postprocess per image at shape (1, 3, 640, 384)
Saved YOLO+CRAFT result: C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed\20241227_110952.jpg
Processing images (1).jpeg...
Saved CRAFT result: C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed\images (1).jpeg
Processing images (2).jpeg...
CRAFT Error: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (16,) + inhomogeneous part.
CRAFT failed. Trying YOLO + CRAFT for images (2).jpeg...

0: 384x640 1 non-text, 48.1ms
Speed: 2.0ms preprocess, 48.1ms inference, 1.0ms postprocess per image a

In [2]:
import os
import cv2
import numpy as np
from craft_text_detector import Craft
from ultralytics import YOLO

# -----------------------------------------------------------
# 환경에 맞게 수정하세요
# -----------------------------------------------------------
image_dir = (
    "C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/test"  # 처리할 이미지 경로
)
output_dir = "C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed"
coordinates_dir = os.path.join(output_dir, "coordinates")
failed_boxes_dir = os.path.join(output_dir, "failed_boxes")
cropped_dir = os.path.join(output_dir, "cropped")  # 크롭된 이미지를 저장할 디렉토리
os.makedirs(output_dir, exist_ok=True)
os.makedirs(coordinates_dir, exist_ok=True)
os.makedirs(failed_boxes_dir, exist_ok=True)
os.makedirs(cropped_dir, exist_ok=True)

# YOLO 모델 로드 (이미 학습된 모델 가정)
YOLO_MODEL_PATH = "C:/Users/user/Desktop/aws_s3_Data/yolov8_text_nontext.pt"  # 사용자 환경에 맞게 수정
yolo_model = YOLO(YOLO_MODEL_PATH)

# CRAFT 모델 초기화
craft = Craft(
    output_dir=output_dir, crop_type="box"  # 저장할 디렉토리  # "poly"도 가능
)


def save_coordinates(coordinates, file_path):
    """텍스트 좌표를 저장"""
    with open(file_path, "w", encoding="utf-8") as f:
        for pts in coordinates:
            f.write(",".join(map(str, pts.flatten())) + "\n")


def save_failed_boxes(failed_boxes, file_path):
    """CRAFT 실패 영역 정보 저장"""
    with open(file_path, "w", encoding="utf-8") as f:
        for box in failed_boxes:
            f.write(
                f"Class: {box['class']}, Confidence: {box['conf']:.2f}, BBox: {box['bbox']}\n"
            )


def process_image_with_craft(image_path):
    """CRAFT로 텍스트 영역을 감지 후 (boxes, score_text 등) 리턴"""
    try:
        craft_result = craft.detect_text(image_path)
        return craft_result
    except Exception as e:
        print(f"CRAFT Error: {e}")
        return None


def process_image_with_yolo_and_craft(
    image, file_name, target_classes=None, conf_thresh=0.5
):
    """YOLO로 텍스트 감지 후 CRAFT 수행"""
    if target_classes is None:
        target_classes = ["text"]  # 본인의 YOLO 클래스명

    height, width = image.shape[:2]
    original_image = image.copy()

    results = yolo_model.predict(image, conf=conf_thresh)
    if len(results) == 0 or len(results[0].boxes) == 0:
        print(f"No objects detected by YOLO in {file_name}. Skipping...")
        return [], []

    boxes = results[0].boxes
    all_text_boxes = []
    failed_boxes_info = []

    for box in boxes:
        cls_id = int(box.cls[0].item())  # 클래스 인덱스
        cls_conf = float(box.conf[0].item())  # confidence
        cls_name = results[0].names[cls_id]

        if cls_name in target_classes and cls_conf >= conf_thresh:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
            x1, y1 = max(0, x1), max(0, y1)
            x2, y2 = min(width - 1, x2), min(height - 1, y2)

            # YOLO로 잡은 영역 Crop (원본 이미지에서 크롭)
            cropped_region = image[y1:y2, x1:x2]

            # CRAFT 수행
            try:
                craft_result = craft.detect_text(cropped_region)
                text_bboxes = [
                    np.array(pt).astype(np.int32)
                    for pt in craft_result["boxes"]
                    if pt is not None and len(pt) > 0
                ]

                for pts in text_bboxes:
                    pts[:, 0] += x1
                    pts[:, 1] += y1
                    all_text_boxes.append(pts)

            except Exception:
                failed_boxes_info.append(
                    {"class": cls_name, "conf": cls_conf, "bbox": (x1, y1, x2, y2)}
                )

    return all_text_boxes, failed_boxes_info


def main_process(image_dir):
    for file_name in os.listdir(image_dir):
        if file_name.lower().endswith((".png", ".jpg", ".jpeg")):
            image_path = os.path.join(image_dir, file_name)
            print(f"Processing {file_name}...")

            image = cv2.imread(image_path)
            if image is None:
                print(f"Failed to load image: {file_name}")
                continue

            original_image = image.copy()

            # 각 이미지별로 크롭 이미지를 저장할 폴더 생성
            image_base_name = os.path.splitext(file_name)[0]
            image_cropped_dir = os.path.join(cropped_dir, image_base_name)
            os.makedirs(image_cropped_dir, exist_ok=True)

            # 1) CRAFT 단독 처리
            craft_result = process_image_with_craft(image_path)
            if craft_result is not None and len(craft_result["boxes"]) > 0:
                text_bboxes = [
                    np.array(box).astype(np.int32)
                    for box in craft_result["boxes"]
                    if box is not None and len(box) > 0
                ]
                for idx, pts in enumerate(text_bboxes):
                    # 폴리라인 그리기 (원본 이미지에만 그림)
                    cv2.polylines(original_image, [pts], True, (0, 255, 0), 2)

                    # 바운딩 박스 좌표 추출
                    x_coords = pts[:, 0]
                    y_coords = pts[:, 1]
                    x_min, x_max = x_coords.min(), x_coords.max()
                    y_min, y_max = y_coords.min(), y_coords.max()

                    # 이미지 크롭 (원본 이미지에서 크롭)
                    cropped_image = image[y_min:y_max, x_min:x_max]
                    cropped_image_path = os.path.join(image_cropped_dir, f"{image_base_name}_craft_{idx}.png")
                    cv2.imwrite(cropped_image_path, cropped_image)
                    print(f"Saved cropped image: {cropped_image_path}")

                coordinates_path = os.path.join(
                    coordinates_dir, f"{image_base_name}.txt"
                )
                save_coordinates(text_bboxes, coordinates_path)

                output_path = os.path.join(output_dir, file_name)
                cv2.imwrite(output_path, original_image)
                print(f"Saved CRAFT result: {output_path}")

            else:
                # 2) YOLO + CRAFT 조합 처리
                print(f"CRAFT failed. Trying YOLO + CRAFT for {file_name}...")
                all_text_boxes, failed_boxes = process_image_with_yolo_and_craft(
                    image, file_name
                )

                for idx, pts in enumerate(all_text_boxes):
                    cv2.polylines(original_image, [pts], True, (0, 255, 0), 2)

                    # 바운딩 박스 좌표 추출
                    x_coords = pts[:, 0]
                    y_coords = pts[:, 1]
                    x_min, x_max = x_coords.min(), x_coords.max()
                    y_min, y_max = y_coords.min(), y_coords.max()

                    # 이미지 크롭 (원본 이미지에서 크롭)
                    cropped_image = image[y_min:y_max, x_min:x_max]
                    cropped_image_path = os.path.join(image_cropped_dir, f"{image_base_name}_yolo_craft_{idx}.png")
                    cv2.imwrite(cropped_image_path, cropped_image)
                    print(f"Saved cropped image: {cropped_image_path}")

                if failed_boxes:
                    failed_boxes_path = os.path.join(
                        failed_boxes_dir, f"{image_base_name}_failed_boxes.txt"
                    )
                    save_failed_boxes(failed_boxes, failed_boxes_path)

                coordinates_path = os.path.join(
                    coordinates_dir, f"{image_base_name}.txt"
                )
                save_coordinates(all_text_boxes, coordinates_path)

                output_path = os.path.join(output_dir, file_name)
                cv2.imwrite(output_path, original_image)
                print(f"Saved YOLO+CRAFT result: {output_path}")

    # 모델 해제
    craft.unload_craftnet_model()
    craft.unload_refinenet_model()


# 실행
if __name__ == "__main__":
    main_process(image_dir)


Processing 20241227_110952.jpg...
CRAFT Error: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (16,) + inhomogeneous part.
CRAFT failed. Trying YOLO + CRAFT for 20241227_110952.jpg...

0: 640x384 1 non-text, 55.5ms
Speed: 2.0ms preprocess, 55.5ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)
Saved YOLO+CRAFT result: C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed\20241227_110952.jpg
Processing images (1).jpeg...
Saved cropped image: C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed\cropped\images (1)\images (1)_craft_0.png
Saved cropped image: C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed\cropped\images (1)\images (1)_craft_1.png
Saved cropped image: C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed\cropped\images (1)\images (1)_craft_2.png
Saved cropped image: C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/proc

In [None]:
import os
import cv2
import numpy as np
from craft_text_detector import Craft
from ultralytics import YOLO

# -----------------------------------------------------------
# 환경에 맞게 수정하세요
# -----------------------------------------------------------
image_dir = (
    "C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/test"  # 처리할 이미지 경로
)
output_dir = "C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed"
coordinates_dir = os.path.join(output_dir, "coordinates")
failed_boxes_dir = os.path.join(output_dir, "failed_boxes")
os.makedirs(output_dir, exist_ok=True)
os.makedirs(coordinates_dir, exist_ok=True)
os.makedirs(failed_boxes_dir, exist_ok=True)

# YOLO 모델 로드 (이미 학습된 모델 가정)
YOLO_MODEL_PATH = "C:/Users/user/Desktop/aws_s3_Data/yolov8_text_nontext.pt"  # 사용자 환경에 맞게 수정
yolo_model = YOLO(YOLO_MODEL_PATH)

# CRAFT 모델 초기화
craft = Craft(
    output_dir=output_dir, crop_type="box"  # 저장할 디렉토리  # "poly"도 가능
)


def save_coordinates(coordinates, file_path):
    """텍스트 좌표를 저장"""
    with open(file_path, "w", encoding="utf-8") as f:
        for pts in coordinates:
            f.write(",".join(map(str, pts.flatten())) + "\n")


def save_failed_boxes(failed_boxes, file_path):
    """CRAFT 실패 영역 정보 저장"""
    with open(file_path, "w", encoding="utf-8") as f:
        for box in failed_boxes:
            f.write(
                f"Class: {box['class']}, Confidence: {box['conf']:.2f}, BBox: {box['bbox']}\n"
            )


def process_image_with_craft(image_path):
    """CRAFT로 텍스트 영역을 감지 후 (boxes, score_text 등) 리턴"""
    try:
        craft_result = craft.detect_text(image_path)
        return craft_result
    except Exception as e:
        print(f"CRAFT Error: {e}")
        return None


def process_non_text_with_yolo(image, text_boxes, conf_thresh=0.5):
    """YOLO로 비텍스트 영역 감지 후 텍스트 영역과 겹치지 않는 영역 저장"""
    height, width = image.shape[:2]
    results = yolo_model.predict(image, conf=conf_thresh)
    if len(results) == 0 or len(results[0].boxes) == 0:
        print("YOLO detected no objects.")
        return []

    non_text_boxes = []
    for box in results[0].boxes:
        cls_id = int(box.cls[0].item())
        cls_conf = float(box.conf[0].item())
        cls_name = results[0].names[cls_id]

        # YOLO가 감지한 비텍스트 영역 좌표
        x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
        x1, y1 = max(0, x1), max(0, y1)
        x2, y2 = min(width - 1, x2), min(height - 1, y2)

        # CRAFT 텍스트 영역과 겹치는지 확인
        bbox = np.array([[x1, y1], [x2, y1], [x2, y2], [x1, y2]])
        overlap = any(
            cv2.pointPolygonTest(np.array(text_box), (x1, y1), False) >= 0
            for text_box in text_boxes
        )

        if not overlap:
            non_text_boxes.append(
                {"class": cls_name, "conf": cls_conf, "bbox": (x1, y1, x2, y2)}
            )

    return non_text_boxes


def main_process(image_dir):
    for file_name in os.listdir(image_dir):
        if file_name.lower().endswith((".png", ".jpg", ".jpeg")):
            image_path = os.path.join(image_dir, file_name)
            print(f"Processing {file_name}...")

            image = cv2.imread(image_path)
            if image is None:
                print(f"Failed to load image: {file_name}")
                continue

            # 1) CRAFT로 텍스트 영역 감지
            craft_result = process_image_with_craft(image_path)
            text_boxes = []
            if craft_result and "boxes" in craft_result:
                text_boxes = [
                    np.array(box).astype(np.int32)
                    for box in craft_result["boxes"]
                    if box is not None and len(box) > 0
                ]

            # 2) YOLO로 비텍스트 영역 감지
            non_text_boxes = process_non_text_with_yolo(image, text_boxes)

            # 3) 결과 저장
            if text_boxes:
                coordinates_path = os.path.join(
                    coordinates_dir, f"{file_name.split('.')[0]}_text.txt"
                )
                save_coordinates(text_boxes, coordinates_path)
                print(f"Saved CRAFT text coordinates: {coordinates_path}")

            if non_text_boxes:
                failed_boxes_path = os.path.join(
                    failed_boxes_dir, f"{file_name.split('.')[0]}_non_text.txt"
                )
                save_failed_boxes(non_text_boxes, failed_boxes_path)
                print(f"Saved YOLO non-text information: {failed_boxes_path}")


# 실행
main_process(image_dir)

# 모델 해제
craft.unload_craftnet_model()
craft.unload_refinenet_model()

Processing 20241227_110952.jpg...
CRAFT Error: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (16,) + inhomogeneous part.

0: 640x384 1 non-text, 58.9ms
Speed: 2.5ms preprocess, 58.9ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)
Saved YOLO non-text information: C:/Users/user/Desktop/kst_pipeline/Model/Yolov8/Result/processed\failed_boxes\20241227_110952_non_text.txt
Processing images (1).jpeg...

0: 640x576 1 non-text, 84.2ms
Speed: 2.0ms preprocess, 84.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 576)


error: OpenCV(4.10.0) :-1: error: (-5:Bad argument) in function 'pointPolygonTest'
> Overload resolution failed:
>  - Can't parse 'pt'. Sequence item with index 0 has a wrong type
>  - Can't parse 'pt'. Sequence item with index 0 has a wrong type


In [None]:
!