In [3]:
import cv2
import pytesseract
import numpy as np

# 如果在 Windows 上，确保设置 Tesseract 的路径
# pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

# 读取图像
image = cv2.imread('test.png')

# 图像预处理：将图像转换为灰度图
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# 1. 去噪（去除图像中的噪点）
gray_image = cv2.fastNlMeansDenoising(gray_image, None, 30, 7, 21)

# 2. 二值化（提高对比度，便于字符识别）
_, binary_image = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

# 3. 使用形态学操作来去除噪点（如果有的话）
kernel = np.ones((3,3), np.uint8)
morph_image = cv2.morphologyEx(binary_image, cv2.MORPH_CLOSE, kernel)

# 尝试旋转图像的不同角度来应对可能的旋转
def rotate_image(image, angle):
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated_image = cv2.warpAffine(image, matrix, (w, h))
    return rotated_image

angles = [0, 90, 180, 270]
recognized_text = None

for angle in angles:
    rotated_image = rotate_image(morph_image, angle)
    
    # 使用 pytesseract 进行 OCR 识别
    text = pytesseract.image_to_string(rotated_image)
    
    if text.strip():  # 如果识别到文本
        recognized_text = text
        break

# 输出识别到的文本
if recognized_text:
    print(f"识别到的文本：{recognized_text}")
else:
    print("未识别到任何文本")


未识别到任何文本


In [6]:
text = pytesseract.image_to_string(rotated_image, config='--psm 6')
text

'|\n\x0c'

In [None]:
import easyocr

# Initialize the reader
reader = easyocr.Reader(['en'], gpu=False)

# Recognize text in the image
result = reader.readtext('test.png', detail=0)

print("Recognized Text:", result)
