In [1]:
!printenv TESSDATA_PREFIX

In [21]:
import cv2
import pytesseract
from pytesseract import Output
import numpy as np

In [91]:
def load_image(image_path):
    """画像を読み込む関数"""
    return Image.open(image_path)


def enhance_image(img):
    """画像の解像度を向上させ、シャープネスを強化し、CLAHEを行う"""
    # アップスケール
    img = img.resize((img.width * 2, img.height * 2), Image.BICUBIC)
    img = np.array(img)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    # CLAHE
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img_gray = clahe.apply(img_gray)

    # シャープネスの強化
    kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
    img_gray = cv2.filter2D(img_gray, -1, kernel)

    return img_gray


def extract_text(image):
    """OCRを使用して画像からテキストとその座標を抽出する関数"""
    custom_config = r"--oem 1 --psm 11 -l jpn"
    data = pytesseract.image_to_data(
        image, config=custom_config, output_type=Output.DICT
    )

    text_info = []
    num_items = len(data["text"])
    for i in range(num_items):
        if int(data["conf"][i]) > 30:
            x, y, w, h = (
                data["left"][i],
                data["top"][i],
                data["width"][i],
                data["height"][i],
            )
            text = data["text"][i]
            text_info.append((text, (x, y, w, h)))
    return text_info


def extract_shapes(image):
    """画像から形状と座標データを抽出する関数"""
    blurred = cv2.GaussianBlur(image, (5, 5), 0)
    edged = cv2.Canny(blurred, 30, 50)
    contours, _ = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    shapes = []
    for contour in contours:
        epsilon = 0.01 * cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, epsilon, True)
        x, y, w, h = cv2.boundingRect(approx)
        shapes.append(
            {
                "type": classify_shape(approx),
                "coordinates": (x, y, w, h),
                "vertices": approx.tolist(),
            }
        )
    return shapes


def classify_shape(vertices):
    """形状の種類を分類する関数"""
    num_vertices = len(vertices)
    if num_vertices == 4:
        return "Rectangle"
    elif num_vertices > 4:
        return "Polygon"
    return "Unknown"


def main(image_path):
    img = load_image(image_path)
    enhanced_img = enhance_image(img)
    text_data = extract_text(enhanced_img)
    shape_data = extract_shapes(enhanced_img)

    print("Extracted Text:")
    for text, coords in text_data:
        print(f"Text: {text}, Coordinates: {coords}")

    print("\nExtracted Shapes:")
    for shape in shape_data:
        print(
            f"Shape Type: {shape['type']}, Coordinates: {shape['coordinates']}, Vertices: {shape['vertices']}"
        )

In [98]:
image_path = "dataset/Set_A_02/gray/1005300.jpg"
main(image_path)

Extracted Text:
Text: バ, Coordinates: (152, 22, 78, 14)
Text: コニー, Coordinates: (184, 22, 46, 14)
Text: 問, Coordinates: (208, 111, 22, 25)
Text: 7, Coordinates: (200, 136, 20, 24)
Text: 1, Coordinates: (48, 184, 22, 26)
Text: じ, Coordinates: (84, 225, 20, 20)
Text: (HH.3), Coordinates: (100, 424, 66, 26)
Text: 三, Coordinates: (149, 512, 21, 4)

Extracted Shapes:
Shape Type: Unknown, Coordinates: (88, 544, 1, 1), Vertices: [[[88, 544]]]
Shape Type: Polygon, Coordinates: (153, 543, 5, 2), Vertices: [[[153, 543]], [[153, 544]], [[157, 544]], [[157, 543]], [[156, 544]], [[154, 544]]]
Shape Type: Polygon, Coordinates: (122, 543, 23, 8), Vertices: [[[144, 543]], [[143, 543]], [[144, 544]], [[141, 547]], [[128, 547]], [[127, 548]], [[125, 546]], [[126, 545]], [[125, 547]], [[127, 548]], [[125, 550]], [[122, 550]], [[125, 550]], [[129, 547]], [[141, 547]], [[142, 546]], [[143, 547]]]
Shape Type: Unknown, Coordinates: (85, 543, 2, 2), Vertices: [[[85, 543]], [[86, 544]], [[86, 543]]]
Shape Type

In [96]:
!gcloud auth application-default login

Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=764086051850-6qr4p6gpi6hn506pt8ejuq83di341hur.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8085%2F&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fsqlservice.login&state=KCfaJO9pxzdSu3gupfwbRbMS4yNioV&access_type=offline&code_challenge=wvdNk77YzFE8I8Evr5FSL1RFQ1mV-R9XxRIgnK_iALg&code_challenge_method=S256

gio: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=764086051850-6qr4p6gpi6hn506pt8ejuq83di341hur.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8085%2F&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fsqlservice.login&state=KCfaJO9pxzdSu3gupfwbRbMS4yNioV&access_type=offlin

In [102]:
import io
import cv2
import numpy as np
from PIL import Image
from google.cloud import vision
from google.oauth2 import service_account


In [120]:
def detect_text(path):
    """指定された画像ファイルからテキストを検出する"""
    client = vision.ImageAnnotatorClient()

    with io.open(path, "rb") as image_file:
        content = image_file.read()

    image = vision.Image(content=content)
    response = client.text_detection(image=image)  # テキスト検出を実行

    texts = response.text_annotations

    if response.error.message:
        raise Exception(f"{response.error.message}")

    extracted_texts = []
    for text in texts:
        extracted_texts.append(
            {
                "description": text.description,
                "vertices": [
                    {"x": vertex.x, "y": vertex.y}
                    for vertex in text.bounding_poly.vertices
                ],
            }
        )

    print("Extracted Text:")
    for text in extracted_texts:
        print(f"Text: {text['description']}, Vertices: {text['vertices']}")

    return extracted_texts

In [126]:
import json

image_path = "dataset/Set_A_02/gray/1000040.jpg"
texts = detect_text(image_path)

# with open('texts.json', 'w', encoding='utf-8') as f:
#     json.dump(texts, f, indent=4, ensure_ascii=False)

Extracted Text:
Text: 玄関
浴室
洗面
バルコニー
キッチン
5.0帖
洋室
7.5帖
リビング
押入
和室
ダイニング
8.0%
8.8帖
和室
6.0帖
バルコニー, Vertices: [{'x': 31, 'y': 27}, {'x': 263, 'y': 27}, {'x': 263, 'y': 231}, {'x': 31, 'y': 231}]
Text: 玄関, Vertices: [{'x': 88, 'y': 48}, {'x': 96, 'y': 48}, {'x': 96, 'y': 65}, {'x': 88, 'y': 65}]
Text: 浴室, Vertices: [{'x': 31, 'y': 46}, {'x': 48, 'y': 46}, {'x': 48, 'y': 54}, {'x': 31, 'y': 54}]
Text: 洗面, Vertices: [{'x': 41, 'y': 78}, {'x': 48, 'y': 78}, {'x': 48, 'y': 94}, {'x': 41, 'y': 94}]
Text: バルコニー, Vertices: [{'x': 212, 'y': 27}, {'x': 254, 'y': 27}, {'x': 254, 'y': 33}, {'x': 212, 'y': 33}]
Text: キッチン, Vertices: [{'x': 133, 'y': 72}, {'x': 180, 'y': 72}, {'x': 180, 'y': 80}, {'x': 133, 'y': 80}]
Text: 5.0, Vertices: [{'x': 142, 'y': 85}, {'x': 160, 'y': 85}, {'x': 160, 'y': 93}, {'x': 142, 'y': 93}]
Text: 帖, Vertices: [{'x': 161, 'y': 85}, {'x': 174, 'y': 85}, {'x': 174, 'y': 93}, {'x': 161, 'y': 93}]
Text: 洋, Vertices: [{'x': 225, 'y': 74}, {'x': 237, 'y': 74}, {'x': 237, 'y': 86

In [7]:
%pip install tensorflow==2.8.0

Collecting tensorflow==2.8.0
  Downloading tensorflow-2.8.0-cp310-cp310-manylinux2010_x86_64.whl (497.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m497.6/497.6 MB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:03[0m
Collecting tf-estimator-nightly==2.8.0.dev2021122109
  Downloading tf_estimator_nightly-2.8.0.dev2021122109-py2.py3-none-any.whl (462 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m462.5/462.5 KB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting keras<2.9,>=2.8.0rc0
  Downloading keras-2.8.0-py2.py3-none-any.whl (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0mm
Collecting tensorboard<2.9,>=2.8
  Downloading tensorboard-2.8.0-py3-none-any.whl (5.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.8/5.8 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting keras-p

In [12]:
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# モデルの構築
model = Sequential(
    [
        Conv2D(32, (3, 3), activation="relu", input_shape=(64, 64, 3)),
        MaxPooling2D(2, 2),
        Conv2D(64, (3, 3), activation="relu"),
        MaxPooling2D(2, 2),
        Flatten(),
        Dense(128, activation="relu"),
        Dense(10, activation="softmax"),  # 10はクラスの数に応じて調整
    ]
)

# モデルのコンパイル
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# モデルのサマリーを表示
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 62, 62, 32)        896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 31, 31, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 29, 29, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 12544)             0         
_________________________________________________________________
dense (Dense)                (None, 128)               1605760   
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1

2024-04-14 15:34:02.227443: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-14 15:34:02.265024: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
