## Start Up

In [1]:
import io
import os
import os.path
import re
import matplotlib.pyplot as plt
import cv2
import csv
import numpy as np
from PIL import Image
from io import BytesIO
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from googleapiclient.http import MediaIoBaseDownload
import torch
import torchvision

In [2]:
cd "C:\Users\covid\text_recognition"

C:\Users\covid\text_recognition


In [3]:
SCOPES = ['https://www.googleapis.com/auth/drive.file']
MIME_TYPE = 'application/vnd.google-apps.document'
APPLICATION_NAME = 'ipa-google-drive-api-client'

def get_service():

    # credentialの取得
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'google-drive-api.json', SCOPES)
            creds = flow.run_local_server(port=0)
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
        
    # serviceの取得
    service = build('drive', 'v3', credentials=creds) 
    
    return service

def read_ocr(service, input_file, lang='jp'):
    # ファイルのアップロード

    # ローカルファイルの定義
    media_body = MediaFileUpload(input_file, mimetype=MIME_TYPE, resumable=True)

    # Google Drive上のファイル名
    newfile = 'output.pdf'

    body = {
        'name': newfile,
        'mimeType': MIME_TYPE
    }

    # 　creat関数でファイルアップロード実行
    # 同時にOCR読み取りも行う
    output = service.files().create(
        body=body,
        media_body=media_body,
        # ここで読み込み先言語の指定を行う
        ocrLanguage=lang,
    ).execute()

    # テキストファイルのダウンロード

    # リクエストオブジェクト生成
    request = service.files().export_media(
        fileId=output['id'],
        mimeType="text/plain"
    )
    output_path = 'output.txt'

    with open(output_path, 'a') as f:
        fh = io.FileIO(output_path, "wb")
        downloader = MediaIoBaseDownload(fh, request)
        done = False
        while done is False:
            status, done = downloader.next_chunk()

        service.files().delete(fileId=output['id']).execute()
    
        # テキストの取得
    with open(output_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    # 読み取り結果のリストを返す
    return lines[1:]


service = get_service()

In [4]:
def process_text_file(text_file):
    output_dir = "C:/Users/covid/text_recognition/output"
    if os.path.exists(output_dir):
        file_list = [f for f in os.listdir(output_dir) if os.path.isfile(os.path.join(output_dir, f))]
        for file_name in file_list:
            file_path = os.path.join(output_dir, file_name)
            os.remove(file_path)
    else:
        os.makedirs(output_dir)

    with open(text_file, 'r') as file:
        lines = file.readlines()

        lines = sorted(lines, key=lambda line: float(line.split()[1]))

        for i, line in enumerate(lines):
            line = line.strip()
            values = line.split()

            if len(values) == 5:
                object_class = values[0]
                a = float(values[1])
                b = float(values[2])
                c = float(values[3])
                d = float(values[4])

                # Calculate coordinates and dimensions
                x_center = int(wid * a)
                y_center = int(hei * b)
                width = int(wid * c)
                height = int(hei * d)

                x_min = x_center - width // 2
                y_min = y_center - height // 2
                x_max = x_center + width // 2
                y_max = y_center + height // 2

                output_filename = os.path.join(output_dir, f'book{i+1}.jpg')
                index = 1
                while os.path.exists(output_filename):
                    output_filename = os.path.join(output_dir, f'book{i+1}_{index}.jpg')
                    index += 1

                # Crop and save the image
                cropped = image.crop((x_min, y_min, x_max, y_max))
                cropped.save(output_filename)


In [170]:
# カメラの読込み
# 内蔵カメラがある場合、下記引数の数字を変更する必要あり
cap = cv2.VideoCapture(0)

# 動画終了まで、1フレームずつ読み込んで表示する。
while(cap.isOpened()):
    # 1フレーム毎　読込み
    ret, frame = cap.read()

    # GUIに表示
    cv2.imshow("Camera", frame)
    # qキーが押されたら途中終了
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# 終了処理
cap.release()
cv2.destroyAllWindows()

In [54]:
#Variable list
# ディレクトリのパス
directory_path = "C:/Users/covid/text_recognition/yolov7/runs/detect/"
# 画像ファイルの相対パスを指定
image_relative_path = "input.png"
# テキストファイルの相対パスを指定
text_file_relative_path = "labels/input.txt"

out_path = 'C:/Users/covid/text_recognition/output'
output_file = "C:/Users/covid/text_recognition/output_results.txt"

In [171]:
cap = cv2.VideoCapture(0)

ret, frame = cap.read()

if ret:#カメラと本棚の距離→0.6m
    x = 0  # トリミングの左上のX座標
    y = 90  # トリミングの左上のY座標
    width = 640  # トリミングする幅
    height = 200  # トリミングする高さ
    
    cropped_frame = frame[y:y+height, x:x+width]

    cv2.imwrite("C:/Users/covid/text_recognition/yolov7/input.png", cropped_frame)

    cap.release()

## Execution

In [20]:
cap = cv2.VideoCapture(0)

ret, frame = cap.read()
cv2.imwrite("C:/Users/covid/text_recognition/yolov7/input.png",frame)

cap.release()

In [21]:
cd "C:\Users\covid\text_recognition\yolov7"

C:\Users\covid\text_recognition\yolov7


In [22]:
# ディレクトリ内のサブディレクトリのリストを取得
subdirectories = [d for d in os.listdir(directory_path) if os.path.isdir(os.path.join(directory_path, d))]

# サブディレクトリの中で一番新しいものを取得
newest_subdirectory = max(subdirectories, key=lambda d: os.path.getctime(os.path.join(directory_path, d)))

# 最新のサブディレクトリのパスを作成
newest_subdirectory_path = os.path.join(directory_path, newest_subdirectory)

In [23]:
!python detect.py --source C:/Users/covid/text_recognition/yolov7/input.png --weights yolov7-e6e.pt --conf 0.25 --img-size 1280 --device 0 --save-txt

Namespace(weights=['yolov7-e6e.pt'], source='C:/Users/covid/text_recognition/yolov7/input.png', img_size=1280, conf_thres=0.25, iou_thres=0.45, device='0', view_img=False, save_txt=True, save_conf=False, nosave=False, classes=None, agnostic_nms=False, augment=False, update=False, project='runs/detect', name='exp', exist_ok=False, no_trace=False)
Fusing layers... 
 Convert model to Traced-model... 
 traced_script_module saved! 
 model is traced! 

19 books, Done. (27.0ms) Inference, (43.0ms) NMS
 The image with the result is saved in: runs\detect\exp16\input.png
Done. (0.612s)


YOLOR  v0.1-126-g84932d7 torch 2.1.0+cu118 CUDA:0 (NVIDIA GeForce RTX 3090, 24575.5MB)

Model Summary: 792 layers, 151687420 parameters, 817020 gradients
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [24]:
# ディレクトリ内のサブディレクトリのリストを取得
subdirectories = [d for d in os.listdir(directory_path) if os.path.isdir(os.path.join(directory_path, d))]

# サブディレクトリの中で一番新しいものを取得
newest_subdirectory = max(subdirectories, key=lambda d: os.path.getctime(os.path.join(directory_path, d)))

# 最新のサブディレクトリのパスを作成
newest_subdirectory_path = os.path.join(directory_path, newest_subdirectory)

# 新しいディレクトリに移動
os.chdir(newest_subdirectory_path)

# 画像ファイルの絶対パスを作成
image_absolute_path = os.path.join(newest_subdirectory_path, image_relative_path)
# テキストファイルの絶対パスを作成
text_file_absolute_path = os.path.join(newest_subdirectory_path, text_file_relative_path)

# 画像をImageクラスのインスタンスに読み込む
image = Image.open(image_absolute_path)
# テキストファイルを読み込む
with open(text_file_absolute_path, 'r') as file:
    text_content = file.read()

In [25]:
# Process the text file
wid,hei = image.size
process_text_file(text_file_absolute_path)

In [None]:
#output corresponding to list format
if __name__ == '__main__':
    output_list = []

    file_list = [filename for filename in os.listdir(out_path) if filename.endswith('.jpg')]
    file_list.sort(key=lambda x: int(''.join(filter(str.isdigit, x))))

    for filename in file_list:
        input_file = os.path.join(out_path, filename)
        output = read_ocr(service, input_file, 'ja')

        # 不要な文字（スペースとバックスラッシュ）を除去して一つの文字列に結合する
        cleaned_output = ''.join(line.strip().replace(' ', '').replace('/', '').replace('\n', '').replace('\\', '') for line in output)

        # 結果をリストに追加
        output_list.append(cleaned_output)

In [56]:
# Save the results to the output file
with open(output_file, 'w', encoding='utf-8') as file:
        for result in output_list:
            file.write(result + '\n')

print(f"Results saved to {output_file}")

Results saved to C:/Users/covid/text_recognition/output_results.txt


C:\Users\covid\text_recognition


In [61]:
import difflib
def main():
    file_a_path = "database.txt"
    file_b_path = "output_results.txt"

    with open(file_a_path, "r", encoding="utf-8") as file_a:
        lines_a = file_a.readlines()

    with open(file_b_path, "r", encoding="utf-8") as file_b:
        lines_b = file_b.readlines()

    num_columns = min(len(lines_a), len(lines_b))

    for col in range(num_columns):
        text_a = lines_a[col].strip()
        text_b = lines_b[col].strip()
        sim = difflib.SequenceMatcher(None, text_a, text_b).ratio()
        print(f"Column {col + 1}: ratio = {sim:.4f}")

if __name__ == "__main__":
    main()

Column 1: ratio = 0.3636
Column 2: ratio = 0.3030
Column 3: ratio = 0.0993
Column 4: ratio = 0.0465
Column 5: ratio = 0.0440
Column 6: ratio = 0.2857
Column 7: ratio = 0.3889
Column 8: ratio = 0.1928
Column 9: ratio = 0.0000
Column 10: ratio = 0.0328
Column 11: ratio = 0.0000
Column 12: ratio = 0.0000
Column 13: ratio = 0.1818
Column 14: ratio = 0.6275
Column 15: ratio = 0.4478
Column 16: ratio = 0.6777
Column 17: ratio = 0.0225
Column 18: ratio = 0.0392


In [68]:
import difflib

def main():
    file_a_path = "output_results.txt"
    file_b_path = "database.txt"

    with open(file_a_path, "r", encoding="utf-8") as file_a:
        lines_a = file_a.readlines()

    with open(file_b_path, "r", encoding="utf-8") as file_b:
        lines_b = file_b.readlines()

    for index_a, text_a in enumerate(lines_a):
        max_similarity = 0.0
        best_match = None
        best_match_text_b = None

        for index_b, text_b in enumerate(lines_b):
            similarity = difflib.SequenceMatcher(None, text_a, text_b).ratio()

            if similarity > max_similarity:
                max_similarity = similarity
                best_match = text_b
                best_match_text_b = text_b

        print(f"Text A (line {index_a + 1}): {text_a.strip()}")
        print(f"Best Match in Text B (line {lines_b.index(best_match) + 1}): {best_match.strip()}")
        print(f"Highest Similarity Ratio: {max_similarity:.4f}\n")

if __name__ == "__main__":
    main()


Text A (line 1): 機械振動学湖著
Best Match in Text B (line 1): 機械振動学佐藤秀紀岡部佐規一共著岩田佳雄元工業調査会
Highest Similarity Ratio: 0.4000

Text A (line 2): ロボット制御から動力学までロボット制御基礎から動力学まで浅ロボット制御
Best Match in Text B (line 2): 実践ロボット制御基礎から動力学まで細田耕[著]HosodaKoh株式会社アールティ[協力]RTCorporationOhmsha
Highest Similarity Ratio: 0.3168

Text A (line 3): book情報理論と符号理論Aプログラミングスマートスピーカー×自分でつくる人工知能AmazonEchoGoogleHuneジョーンズポンダッドw.マイナビ
Best Match in Text B (line 5): Aプログラミングスマートスピーカー×自分でつくる人工知能AmazonEcho.GoogleHomeポンダッド[著]W.マイナビ
Highest Similarity Ratio: 0.8028

Text A (line 4): P情報理論と符号理論ジョーンズTMジョーンズ
Best Match in Text B (line 13): MLPP機械学習プロフェッショナルシリーズトピックモデル岩田具治
Highest Similarity Ratio: 0.1786

Text A (line 5): 29パターン認識と機械学習上CM・ビショップベイズによる
Best Match in Text B (line 7): パターン認識と機械学習上ベイズ理論による統計的予測C・M・ビショップ著元田浩栗田多喜夫樋口知之松本裕治村田昇監訳丸善
Highest Similarity Ratio: 0.4773

Text A (line 6): CM・ビショップパターン認識と機械学習1:3
Best Match in Text B (line 6): はじめてのパターン認識平井有三著8497
Highest Similarity Ratio: 0.3182

Text A (line 7): パターン認識と機械学習による
Best Matc

In [76]:
import difflib

def main():
    file_a_path = "output_results.txt"
    file_b_path = "database.txt"

    with open(file_a_path, "r", encoding="utf-8") as file_a:
        lines_a = file_a.readlines()

    with open(file_b_path, "r", encoding="utf-8") as file_b:
        lines_b = file_b.readlines()

    for index_a, text_a in enumerate(lines_a):
        for index_b, text_b in enumerate(lines_b):
            similarity = difflib.SequenceMatcher(None, text_a, text_b).ratio()

            print(f"Text A (line {index_a + 1}): {text_a.strip()}")
            print(f"Text B (line {index_b + 1}): {text_b.strip()}")
            print(f"Similarity Ratio: {similarity:.4f}\n")

if __name__ == "__main__":
    main()


Text A (line 1): 機械振動学湖著
Text B (line 1): 機械振動学佐藤秀紀岡部佐規一共著岩田佳雄元工業調査会
Similarity Ratio: 0.4000

Text A (line 1): 機械振動学湖著
Text B (line 2): 実践ロボット制御基礎から動力学まで細田耕[著]HosodaKoh株式会社アールティ[協力]RTCorporationOhmsha
Similarity Ratio: 0.1096

Text A (line 1): 機械振動学湖著
Text B (line 3): 実践ロボット制御基礎から動力学まで細田耕[著]HosodaKoh株式会社アールティ[協力]RTCorporationOhmsha
Similarity Ratio: 0.1096

Text A (line 1): 機械振動学湖著
Text B (line 4): 実践ロボット制御基礎から動力学まで細田耕[著]HosodaKoh株式会社アールティ[協力]RTCorporationOhmsha
Similarity Ratio: 0.1096

Text A (line 1): 機械振動学湖著
Text B (line 5): Aプログラミングスマートスピーカー×自分でつくる人工知能AmazonEcho.GoogleHomeポンダッド[著]W.マイナビ
Similarity Ratio: 0.0556

Text A (line 1): 機械振動学湖著
Text B (line 6): はじめてのパターン認識平井有三著8497
Similarity Ratio: 0.1379

Text A (line 1): 機械振動学湖著
Text B (line 7): パターン認識と機械学習上ベイズ理論による統計的予測C・M・ビショップ著元田浩栗田多喜夫樋口知之松本裕治村田昇監訳丸善
Similarity Ratio: 0.1493

Text A (line 1): 機械振動学湖著
Text B (line 8): パターン認識と機械学習下ベイズ理論による統計的予測C・M・ビショップ著元田浩栗田多喜夫樋口知之松本裕治村田昇監訳丸善
Similarity Ratio: 0.1493

Text A (line 1): 機械振動学湖著
Text B