In [5]:
image_path = "dataset/Set_A_02/gray/1005306.jpg"
json_path = "dataset/anotations/via_project_15Apr2024_9h49m_json.json"

In [9]:
import cv2
import numpy as np
from PIL import Image
import pytesseract
from pytesseract import Output
import json

def load_json_data(json_path):
    with open(json_path, "r") as file:
        data = json.load(file)
    return data


def load_image(image_path):
    """画像をPIL形式で読み込み、OpenCV形式に変換して返す関数"""
    try:
        pil_image = Image.open(image_path)
        open_cv_image = np.array(pil_image)
        # 画像がグレースケールの場合、カラーに変換する
        if len(open_cv_image.shape) == 2:
            open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_GRAY2BGR)
        else:
            # Convert RGB to BGR
            open_cv_image = open_cv_image[:, :, ::-1]
        return open_cv_image
    except IOError:
        print(f"Error loading image {image_path}")
        return None


def extract_rectangles(json_data):
    """JSONデータから'rect'形状の座標を抽出する関数"""
    rectangles = []
    for file_data in json_data.values():
        for region in file_data["regions"]:
            shape_attributes = region["shape_attributes"]
            if shape_attributes["name"] == "rect":
                x = shape_attributes["x"]
                y = shape_attributes["y"]
                width = shape_attributes["width"]
                height = shape_attributes["height"]
                rectangles.append((x, y, width, height))
    return rectangles


def preprocess_image(image):
    """画像の前処理を行い、エッジ検出用の画像を返す関数"""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edged = cv2.Canny(blurred, 50, 150)
    return edged


def find_contours(edged):
    """エッジから輪郭を検出し、近似する関数"""
    contours, _ = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    shapes = []
    for cnt in contours:
        epsilon = 0.01 * cv2.arcLength(cnt, True)
        approx = cv2.approxPolyDP(cnt, epsilon, True)
        area = cv2.contourArea(cnt)
        if area > 100:
            shape_type = classify_shape(approx)
            x, y, w, h = cv2.boundingRect(approx)
            shapes.append((shape_type, x, y, w, h))
    return shapes


def classify_shape(approx):
    """輪郭の頂点の数によって形状を分類する関数"""
    num_vertices = len(approx)
    if num_vertices == 4:
        return "rectangle"
    elif num_vertices > 4:
        return "ellipse"
    return "unknown"


def extract_text_from_regions(image, shapes):
    """指定された形状の領域からテキストを抽出する関数"""
    texts = []
    for shape, x, y, w, h in shapes:
        roi = image[y : y + h, x : x + w]
        text = pytesseract.image_to_string(roi, lang="jpn", config="--psm 6")
        texts.append((shape, text))
    return texts


def detect_shapes_and_extract_text(image_path):
    """画像から形状を検出し、その形状の領域からテキストを抽出する一連の処理を行う関数"""
    image = load_image(image_path)
    if image is not None:
        edged = preprocess_image(image)
        shapes = find_contours(image)
        extracted_texts = extract_text_from_regions(image, shapes)
        return extracted_texts
    return []


def draw_rectangles(image, rectangles):
    for x, y, width, height in rectangles:
        cv2.rectangle(image, (x, y), (x + width, y + height), (0, 255, 0), 2)
    return image

In [10]:
json_data = load_json_data(json_path)
rectangles = extract_rectangles(json_data)
image = load_image(image_path)
image_with_rectangles = draw_rectangles(image, rectangles)
cv2.imshow("Image with Bounding Boxes", image_with_rectangles)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [2]:
import cv2
import numpy as np
from PIL import Image
import pytesseract
from pytesseract import Output


def load_image(image_path):
    """画像をPIL形式で読み込み、OpenCV形式に変換して返す関数"""
    try:
        pil_image = Image.open(image_path)
        open_cv_image = np.array(pil_image)
        # 画像がグレースケールの場合、カラーに変換する
        if len(open_cv_image.shape) == 2:
            open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_GRAY2BGR)
        else:
            # Convert RGB to BGR
            open_cv_image = open_cv_image[:, :, ::-1]
        return open_cv_image
    except IOError:
        print(f"Error loading image {image_path}")
        return None


def find_contours(image):
    """画像から輪郭を検出し、多角形に近似する関数"""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edged = cv2.Canny(blurred, 50, 150)
    contours, _ = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    return contours


def approximate_polygons(contours, accuracy=0.02):
    """輪郭を多角形に近似し、その結果を返す関数"""
    polygons = []
    for contour in contours:
        epsilon = accuracy * cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, epsilon, True)
        polygons.append(approx)
    return polygons


def draw_polygons(image, polygons):
    """多角形を画像に描画する関数"""
    for polygon in polygons:
        if len(polygon) >= 5:  # 5頂点以上の多角形を識別
            cv2.polylines(image, [polygon], True, (0, 255, 0), 2)
    cv2.imshow("Polygons Detected", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


def extract_text_from_regions(image, shapes):
    """指定された形状の領域からテキストを抽出する関数"""
    texts = []
    for shape, x, y, w, h in shapes:
        roi = image[y : y + h, x : x + w]
        text = pytesseract.image_to_string(roi, lang="jpn", config="--psm 6")
        texts.append((shape, text))
    return texts


def detect_shapes_and_extract_text(image_path):
    """画像から形状を検出し、その形状の領域からテキストを抽出する一連の処理を行う関数"""
    image = load_image(image_path)
    if image is not None:
        shapes = find_contours(image)
        extracted_texts = extract_text_from_regions(image, shapes)
        return extracted_texts
    return []

In [2]:
import cv2
import numpy as np

def detect_shapes(image_path):
    # 画像を読み込み、グレースケールに変換
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # エッジ検出
    edged = cv2.Canny(gray, 30, 200)

    # 輪郭を見つける
    contours, _ = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    shapes = []
    for cnt in contours:
        # 輪郭の近似
        epsilon = 0.02 * cv2.arcLength(cnt, True)
        approx = cv2.approxPolyDP(cnt, epsilon, True)

        # 四角形を検出（例: バスタブや洗面台）
        if len(approx) == 4:
            x, y, w, h = cv2.boundingRect(approx)
            shapes.append(("rectangle", x, y, w, h))

    return shapes

In [29]:
from PIL import Image
import pytesseract
from pytesseract import Output
import cv2

def load_image(image_path):
    """画像を読み込む関数"""
    return Image.open(image_path)


def extract_text_from_coordinates(image_path, coordinates):
    """指定された座標のバウンディングボックスからテキストを抽出する関数"""
    image = cv2.imread(image_path)
    texts = []

    for coord in coordinates:
        x, y, w, h = coord[1], coord[2], coord[3], coord[4]
        # バウンディングボックスに基づいて部分画像を切り出す
        roi = image[y : y + h, x : x + w]
        # ROIからテキストを抽出
        text = pytesseract.image_to_string(roi, lang="jpn", config="--psm 6")
        texts.append((coord, text))

    return texts

In [5]:
floor_labels = ["リビング", "キッチン", "浴室", "トイレ", "寝室", "玄関", "洗面所"]

In [45]:


results = detect_shapes_and_extract_text(image_path)
for result in results:
    print(f"Shape: {result[0]}, Extracted Text: {result[1]}")

Shape: ellipse, Extracted Text: | |

Shape: ellipse, Extracted Text: 
Shape: ellipse, Extracted Text: 
Shape: ellipse, Extracted Text: 浴

Shape: ellipse, Extracted Text: 玄関

Shape: ellipse, Extracted Text:   浴室

ゴゴッ*

_ 還E

|     洋室      に
ロ



In [23]:
import cv2
import pytesseract
from pytesseract import Output
import numpy as np

def extract_text(image):
    """OCRを使用して画像からテキストとその座標を抽出する関数"""
    custom_config = r"--oem 1 --psm 11 -l jpn"
    data = pytesseract.image_to_data(
        image, config=custom_config, output_type=Output.DICT
    )

    text_info = []
    num_items = len(data["text"])
    for i in range(num_items):
        if int(data["conf"][i]) > 30:
            x, y, w, h = (
                data["left"][i],
                data["top"][i],
                data["width"][i],
                data["height"][i],
            )
            text = data["text"][i]
            text_info.append((text, (x, y, w, h)))
    return text_info

In [27]:
from fuzzywuzzy import process

def find_best_match(text, labels):
    best_match = process.extractOne(text, labels)
    return best_match


def group_text_blocks(text_info, max_distance=10):
    grouped_text = []
    if not text_info:
        return grouped_text

    # 最初のテキストブロックを初期グループとして設定
    current_group = [text_info[0]]

    for current_text, (x, y, w, h) in text_info[1:]:
        last_text, (last_x, last_y, last_w, last_h) = current_group[-1]
        # 次のテキストブロックとの距離を計算
        if (x <= (last_x + last_w + max_distance)) and (
            y <= (last_y + last_h + max_distance)
        ):
            # 座標が近い場合、グループに追加
            current_group.append((current_text, (x, y, w, h)))
        else:
            # 遠い場合、現在のグループをまとめて新たにグループ化を開始
            grouped_text.append(" ".join([text for text, _ in current_group]))
            current_group = [(current_text, (x, y, w, h))]

    # 最後のグループを追加
    grouped_text.append(" ".join([text for text, _ in current_group]))
    return grouped_text


def process_image(image_path):
    image = load_image(image_path)
    text_info = extract_text(image)
    grouped_texts = group_text_blocks(text_info)
    return grouped_texts

In [28]:
resulting_texts = process_image(image_path)
for text in resulting_texts:
    print(text)

玄関
バ ル
同 洋室
