In [1]:
import os
import tkinter as tk
from tkinter import filedialog
from PIL import Image, ImageTk
from paddleocr import PaddleOCR, draw_ocr

ocr = PaddleOCR(lang='ch')  # 初始化 PaddleOCR

def extract_chinese_text(processed_image_path):
    result = ocr.ocr(processed_image_path, cls=False)
    extracted_text = "\n".join([line[1][0] for res in result for line in res])
    return extracted_text

def open_file():
    file_path = filedialog.askopenfilename(filetypes=[("Image files", "*.png;*.jpg;*.jpeg;*.bmp;*.tiff")])
    if not file_path:
        return
    processed_image_path = file_path
    extracted_text = extract_chinese_text(processed_image_path)
    text_display.delete("1.0", tk.END)
    text_display.insert(tk.END, extracted_text)
    load_image(file_path)

def load_image(file_path):
    img = Image.open(file_path)
    img.thumbnail((500, 500))
    img = ImageTk.PhotoImage(img)
    image_label.config(image=img)
    image_label.image = img

# 创建GUI
root = tk.Tk()
root.title("OCR文字识别")

frame = tk.Frame(root)
frame.pack(pady=10)

btn_open = tk.Button(frame, text="打开图片", command=open_file)
btn_open.pack()

image_label = tk.Label(root)
image_label.pack()

text_display = tk.Text(root, wrap=tk.WORD, height=10, width=60)
text_display.pack(pady=10)

root.mainloop()




[2025/02/21 21:33:40] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\epiph/.paddleocr/whl\\det\\ch\\ch_PP-OCRv4_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\epiph/.paddleocr/whl\\rec\\ch\\ch_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=

In [4]:
import cv2
from paddleocr import PaddleOCR
from PIL import Image
import tkinter as tk
from tkinter import filedialog, messagebox
import numpy as np
import os

ROI_CONFIG_PATH = "roi_config.npy"  # 预设区域保存文件
ocr = PaddleOCR(use_angle_cls=True, lang='ch')  # 初始化 OCR

def read_image_with_chinese_path(image_path):
    """ 使用 numpy 方式读取包含中文路径的图片 """
    image_stream = np.fromfile(image_path, dtype=np.uint8)
    img = cv2.imdecode(image_stream, cv2.IMREAD_COLOR)
    return img

def save_roi_config(roi1, roi2):
    """ 保存两个 ROI 预设区域 """
    np.save(ROI_CONFIG_PATH, np.array([roi1, roi2], dtype=object))
    print("识别区域已保存！")

def load_roi_config():
    """ 读取 ROI 预设区域 """
    if os.path.exists(ROI_CONFIG_PATH):
        return np.load(ROI_CONFIG_PATH, allow_pickle=True)
    return None

def select_roi(image_path):
    """ 手动选择 ROI 区域 """
    img = read_image_with_chinese_path(image_path)
    if img is None:
        print("错误：无法加载图片，请检查路径是否正确。")
        return None

    roi = cv2.selectROI("Select Region", img, fromCenter=False, showCrosshair=True)
    cv2.destroyAllWindows()
    return roi

def extract_chinese_text(image_path, roi):
    """ 使用 PaddleOCR 识别指定区域的中文文本 """
    img = read_image_with_chinese_path(image_path)
    if img is None:
        print("错误：无法加载图片，请检查路径是否正确。")
        return ""

    x, y, w, h = roi
    roi_img = img[y:y+h, x:x+w]

    # 转换为适合 PaddleOCR 的格式
    processed_image_path = "processed_region.png"
    cv2.imwrite(processed_image_path, roi_img)

    # 使用 PaddleOCR 进行识别
    results = ocr.ocr(processed_image_path, cls=True)
    extracted_text = "\n".join([line[1][0] for result in results for line in result])
    
    return extracted_text.strip()

class OCRApp:
    def __init__(self, root):
        self.root = root
        self.root.title("批量OCR识别")
        
        self.roi_config = load_roi_config()
        if self.roi_config is None:
            self.roi1, self.roi2 = None, None
        else:
            self.roi1, self.roi2 = self.roi_config
        
        self.current_roi = None

        # 按钮
        self.btn_set_roi1 = tk.Button(root, text="设定区域1", command=self.set_roi1)
        self.btn_set_roi1.pack(pady=5)

        self.btn_set_roi2 = tk.Button(root, text="设定区域2", command=self.set_roi2)
        self.btn_set_roi2.pack(pady=5)

        self.btn_select_roi1 = tk.Button(root, text="使用区域1", command=self.use_roi1, state=tk.DISABLED)
        self.btn_select_roi1.pack(pady=5)

        self.btn_select_roi2 = tk.Button(root, text="使用区域2", command=self.use_roi2, state=tk.DISABLED)
        self.btn_select_roi2.pack(pady=5)

        self.btn_select_image = tk.Button(root, text="选择图片进行OCR", command=self.process_image, state=tk.DISABLED)
        self.btn_select_image.pack(pady=10)

        self.text_result = tk.Text(root, height=10, width=50)
        self.text_result.pack(pady=10)

        if self.roi1 is not None:
            self.btn_select_roi1.config(state=tk.NORMAL)
        if self.roi2 is not None:
            self.btn_select_roi2.config(state=tk.NORMAL)

    def set_roi1(self):
        image_path = filedialog.askopenfilename(title="选择图片来设定区域1")
        if image_path:
            roi = select_roi(image_path)
            if roi:
                self.roi1 = roi
                self.save_roi()
                self.btn_select_roi1.config(state=tk.NORMAL)
                messagebox.showinfo("成功", "区域1 已设定！")

    def set_roi2(self):
        image_path = filedialog.askopenfilename(title="选择图片来设定区域2")
        if image_path:
            roi = select_roi(image_path)
            if roi:
                self.roi2 = roi
                self.save_roi()
                self.btn_select_roi2.config(state=tk.NORMAL)
                messagebox.showinfo("成功", "区域2 已设定！")

    def use_roi1(self):
        if self.roi1:
            self.current_roi = self.roi1
            self.btn_select_image.config(state=tk.NORMAL)
            messagebox.showinfo("成功", "已选择 区域1 进行 OCR")

    def use_roi2(self):
        if self.roi2:
            self.current_roi = self.roi2
            self.btn_select_image.config(state=tk.NORMAL)
            messagebox.showinfo("成功", "已选择 区域2 进行 OCR")

    def process_image(self):
        if self.current_roi is None:
            messagebox.showwarning("错误", "请先选择一个识别区域！")
            return

        image_path = filedialog.askopenfilename(title="选择要识别的图片")
        if not image_path:
            return

        text = extract_chinese_text(image_path, self.current_roi)
        self.text_result.delete("1.0", tk.END)
        self.text_result.insert(tk.END, text)

    def save_roi(self):
        save_roi_config(self.roi1, self.roi2)

if __name__ == "__main__":
    root = tk.Tk()
    app = OCRApp(root)
    root.mainloop()


[2025/02/21 21:44:21] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\epiph/.paddleocr/whl\\det\\ch\\ch_PP-OCRv4_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\epiph/.paddleocr/whl\\rec\\ch\\ch_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=

Exception in Tkinter callback
Traceback (most recent call last):
  File "d:\Users\epiph\anaconda3\Lib\tkinter\__init__.py", line 1948, in __call__
    return self.func(*args)
           ^^^^^^^^^^^^^^^^
  File "C:\Users\epiph\AppData\Local\Temp\ipykernel_1364\2870875789.py", line 118, in use_roi1
    if self.roi1:
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
Exception in Tkinter callback
Traceback (most recent call last):
  File "d:\Users\epiph\anaconda3\Lib\tkinter\__init__.py", line 1948, in __call__
    return self.func(*args)
           ^^^^^^^^^^^^^^^^
  File "C:\Users\epiph\AppData\Local\Temp\ipykernel_1364\2870875789.py", line 118, in use_roi1
    if self.roi1:
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
Exception in Tkinter callback
Traceback (most recent call last):
  File "d:\Users\epiph\anaconda3\Lib\tkinter\__init__.py", line 1948, in __call__
    return se

识别区域已保存！


Exception in Tkinter callback
Traceback (most recent call last):
  File "d:\Users\epiph\anaconda3\Lib\tkinter\__init__.py", line 1948, in __call__
    return self.func(*args)
           ^^^^^^^^^^^^^^^^
  File "C:\Users\epiph\AppData\Local\Temp\ipykernel_1364\2870875789.py", line 124, in use_roi2
    if self.roi2:
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()


[2025/02/21 21:44:55] ppocr DEBUG: dt_boxes num : 10, elapsed : 0.6089584827423096
[2025/02/21 21:44:55] ppocr DEBUG: cls num  : 10, elapsed : 0.17021989822387695
[2025/02/21 21:44:58] ppocr DEBUG: rec_res num  : 10, elapsed : 3.5089123249053955


In [7]:
import numpy as np

# 读取 roi_config.npy 文件
file_path = 'roi_config.npy'  # 替换为您的文件路径
try:
    roi_config = np.load(file_path, allow_pickle=True)
    print("roi_config 的形状:", roi_config.shape)
    print("roi_config 的数据类型:", roi_config.dtype)
    print("roi_config 的内容:", roi_config)
except Exception as e:
    print("读取文件时出现错误:", e)


roi_config 的形状: (2, 4)
roi_config 的数据类型: object
roi_config 的内容: [[144 101 973 655]
 [170 647 949 213]]


In [12]:
import cv2
from paddleocr import PaddleOCR
from PIL import Image, ImageTk
import tkinter as tk
from tkinter import filedialog, messagebox
import numpy as np
import os

ROI_CONFIG_PATH = "roi_config.npy"
ocr = PaddleOCR(use_angle_cls=True, lang='ch')

def read_image_with_chinese_path(image_path):
    image_stream = np.fromfile(image_path, dtype=np.uint8)
    img = cv2.imdecode(image_stream, cv2.IMREAD_COLOR)
    return img

def save_roi_config(roi1, roi2):
    np.save(ROI_CONFIG_PATH, np.array([roi1, roi2], dtype=object))
    print("识别区域已保存！")

def load_roi_config():
    if os.path.exists(ROI_CONFIG_PATH):
        return np.load(ROI_CONFIG_PATH, allow_pickle=True)
    return None

def select_roi(image_path):
    img = read_image_with_chinese_path(image_path)
    if img is None:
        print("错误：无法加载图像")
        return None
    roi = cv2.selectROI("选择区域", img, fromCenter=False, showCrosshair=True)
    cv2.destroyAllWindows()
    return roi

def extract_chinese_text(image_path, roi):
    img = read_image_with_chinese_path(image_path)
    if img is None:
        return ""

    x, y, w, h = roi
    roi_img = img[y:y+h, x:x+w]
    processed_image_path = "processed_region.png"
    cv2.imwrite(processed_image_path, roi_img)

    results = ocr.ocr(processed_image_path, cls=True)
    extracted_text = "\n".join([line[1][0] for result in results for line in result])
    
    return extracted_text.strip()

class OCRApp:
    def __init__(self, root):
        self.root = root
        self.root.title("批量OCR识别")

        self.roi_config = load_roi_config()
        self.roi1, self.roi2 = self.roi_config if self.roi_config is not None else (None, None)

        self.image_paths = []
        self.current_image_index = 0

        # 按钮
        self.btn_select_folder = tk.Button(root, text="选择文件夹", command=self.select_folder)
        self.btn_select_folder.pack(pady=5)

        self.btn_set_roi1 = tk.Button(root, text="设定区域1", command=self.set_roi1)
        self.btn_set_roi1.pack(pady=5)

        self.btn_set_roi2 = tk.Button(root, text="设定区域2", command=self.set_roi2)
        self.btn_set_roi2.pack(pady=5)

        self.btn_use_roi1 = tk.Button(root, text="使用区域1识别并显示下一张", command=self.process_next_image_roi1, state=tk.DISABLED)
        self.btn_use_roi1.pack(pady=5)

        self.btn_use_roi2 = tk.Button(root, text="使用区域2识别并显示下一张", command=self.process_next_image_roi2, state=tk.DISABLED)
        self.btn_use_roi2.pack(pady=5)

        self.btn_skip = tk.Button(root, text="跳过当前图片", command=self.skip_image, state=tk.DISABLED)
        self.btn_skip.pack(pady=5)

        self.btn_prev_image = tk.Button(root, text="返回上一张图片", command=self.prev_image, state=tk.DISABLED)
        self.btn_prev_image.pack(pady=5)

        self.img_label = tk.Label(root)
        self.img_label.pack(pady=10)

        self.text_result = tk.Text(root, height=20, width=50)
        self.text_result.pack(pady=10)

        self.btn_save_text = tk.Button(root, text="保存识别文本", command=self.save_text)
        self.btn_save_text.pack(pady=5)

    def select_folder(self):
        folder_path = filedialog.askdirectory(title="选择图片文件夹")
        if folder_path:
            self.image_paths = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.lower().endswith(('.png', '.jpg', '.jpeg'))]
            self.current_image_index = 0
            self.update_buttons()
            if self.image_paths:
                self.display_image(self.image_paths[self.current_image_index])

    def display_image(self, image_path):
        img = Image.open(image_path)
        img.thumbnail((400, 400))
        img_tk = ImageTk.PhotoImage(img)
        self.img_label.config(image=img_tk)
        self.img_label.image = img_tk

    def set_roi1(self):
        image_path = filedialog.askopenfilename(title="选择图片来设定区域1")
        if image_path:
            roi = select_roi(image_path)
            if roi:
                self.roi1 = roi
                self.save_roi()
                messagebox.showinfo("成功", "区域1 已设定！")

    def set_roi2(self):
        image_path = filedialog.askopenfilename(title="选择图片来设定区域2")
        if image_path:
            roi = select_roi(image_path)
            if roi:
                self.roi2 = roi
                self.save_roi()
                messagebox.showinfo("成功", "区域2 已设定！")

    def process_next_image_roi1(self):
        """ 使用区域1进行识别并跳到下一张图片 """
        self.process_next_image(self.roi1)

    def process_next_image_roi2(self):
        """ 使用区域2进行识别并跳到下一张图片 """
        self.process_next_image(self.roi2)

    def process_next_image(self, roi):
        if roi is None:
            messagebox.showwarning("错误", "请先设定识别区域！")
            return

        if self.current_image_index < len(self.image_paths):
            image_path = self.image_paths[self.current_image_index]
            text = extract_chinese_text(image_path, roi)
            # self.text_result.insert(tk.END, f"Image: {os.path.basename(image_path)}\n{text}\n\n")
            self.text_result.insert(tk.END, f"\n------{os.path.basename(image_path)}------\n{text}\n\n")
            self.current_image_index += 1
            self.update_buttons()
            if self.current_image_index < len(self.image_paths):
                self.display_image(self.image_paths[self.current_image_index])
            else:
                messagebox.showinfo("完成", "所有图片已处理！")

    def prev_image(self):
        """ 返回上一张图片 """
        if self.current_image_index > 0:
            self.current_image_index -= 1
            self.display_image(self.image_paths[self.current_image_index])
            self.update_buttons()

    def skip_image(self):
        """ 跳过当前图片，不进行识别 """
        if self.current_image_index < len(self.image_paths) - 1:
            self.current_image_index += 1
            self.display_image(self.image_paths[self.current_image_index])
            self.update_buttons()
        else:
            messagebox.showinfo("完成", "已到最后一张图片！")

    def save_text(self):
        with open("recognized_text.txt", "a", encoding="utf-8") as f:
            f.write(self.text_result.get("1.0", tk.END))
            messagebox.showinfo("成功", "识别文本已保存")

    def save_roi(self):
        save_roi_config(self.roi1, self.roi2)

    def update_buttons(self):
        """ 更新按钮状态 """
        has_images = self.current_image_index < len(self.image_paths)
        self.btn_use_roi1.config(state=tk.NORMAL if has_images else tk.DISABLED)
        self.btn_use_roi2.config(state=tk.NORMAL if has_images else tk.DISABLED)
        self.btn_skip.config(state=tk.NORMAL if has_images else tk.DISABLED)
        self.btn_prev_image.config(state=tk.NORMAL if self.current_image_index > 0 else tk.DISABLED)

if __name__ == "__main__":
    root = tk.Tk()
    app = OCRApp(root)
    root.mainloop()


[2025/02/21 23:09:29] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\epiph/.paddleocr/whl\\det\\ch\\ch_PP-OCRv4_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\epiph/.paddleocr/whl\\rec\\ch\\ch_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=