In [1]:
import cv2
import os
import numpy as np
from matplotlib import pyplot as plt

In [2]:
# 读取单页漫画
image_path = './dataset/color_comics/001-19.jpg'
img = cv2.imread(image_path)

In [3]:
img_h,img_w,_ = img.shape

# 获取panels的坐标

In [4]:
def get_top_contours(contours, hierarchy):
    """
    获得顶层轮廓的下标及轮廓信息
    :param contours: 所有的轮廓信息列表
    :param hierarchy: 对应的轮廓等级信息列表，尺寸为(N,4)
    :return top_index, top_contours: 所有父轮廓的下标，轮廓信息
    """    
    top_contour_index = -1
 
    top_contours = []   # 最外层轮廓坐标
    top_index = []  # 最外层轮廓索引
    for i, contour in enumerate(contours):     # 顶层轮廓没有父轮廓
        if hierarchy[i][3] == top_contour_index:
            top_contours.append(contour)
            top_index.append(i)
    
    return top_index, top_contours

In [5]:
def filterContoursBySize(contours, img_h, img_w, mode=1, ratio=1/10):
    """
    根据轮廓的尺寸筛选符合要求的轮廓
    :param contours: 所有轮廓的坐标信息
    :param img_h, img_w: 原始BGR图像的高度和宽度
    :param mode: _int_ 默认使用长宽or占比方案进行筛选.若值为2，则使用&占比方案;若值为3，则使用面积占比进行筛选
    :param ratio: 设置占比比例
    :return contours, rects: 筛选后的contours及其对应的最小外界矩形坐标
    """
    total_area = img_h * img_w
    filter_contours = []
    rects = [] # 存储最小外接矩形

    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        rect = np.array([x, y, w, h])
        if mode == 1:
            if (w <= img_w*ratio) or (h <= img_h*ratio):
                pass
            else:
                rects.append(rect)
                filter_contours.append(contour)
        elif mode == 2:
            if (w <= img_w*ratio) & (h <= img_h*ratio):
                pass
            else:
                rects.append(rect)
                filter_contours.append(contour)
        elif mode == 3:
            contour_area = cv2.contourArea(contour)
            area_ratio = contour_area / total_area
            if area_ratio > ratio:
                rects.append(rect)
                filter_contours.append(contour)
            else: 
                pass
    
    return filter_contours, rects

In [6]:
def panels(image, read_mode=1, filter_mode=2, ratio=0.1):
    """
    读取image并返回panels的坐标集

    Args:
        image (_type_): 编码图像
        read_mode(_int_): 默认阅读顺序为从左到右。若值为2，则阅读顺序为从右到左
        filter_mode(_int_): 默认使用长宽&占比方案进行筛选.若值为1,则使用or占比方案;若值为3，则使用面积占比进行筛选

    Returns:
        panel_contours: panel的轮廓信息列表 
        rects: panel最小外接矩形坐标列表
    """
    
    img = np.copy(image)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    threshold = 200

    # 应用阈值处理得到二值图像
    # 检测对象为白色，背景为黑色
    _, binary = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY_INV)

    img_h,img_w = gray.shape
    total_area = img_h * img_w

	# morphological transformation
    kernel = np.ones((5,5), np.uint8)
    closing = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel) # 去除白色对象中的黑色噪点
    gradient = cv2.morphologyEx(closing, cv2.MORPH_GRADIENT, kernel) # 留下白色对象的轮廓
	
    binary = gradient

    # 查找所有轮廓，并建立等级关系
    contours, hierarchy = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    hierarchy = hierarchy.reshape(-1, 4) # reshape hierarchy

    # 查找所有的最外层轮廓
    top_index, top_contours = get_top_contours(contours, hierarchy)
    
    # 根据长宽占比筛选符合要求的panel
    panel_contours, rects = filterContoursBySize(top_contours, img_h, img_w, filter_mode, ratio)

    # 根据阅读顺序重新排序panels列表
    sorted_panel_contours = []
    sorted_rects = []

    if(read_mode==1):
        sorted_index_rects = sorted(enumerate(rects), key=lambda x: (x[1][1], x[1][0])) # 按照y,x从小到大排序
        for index, value in sorted_index_rects:
            sorted_panel_contours.append(panel_contours[index])
            sorted_rects.append(value)
    else:
        sorted_index_rects = sorted(enumerate(rects), key=lambda x: (x[1][1], -x[1][0])) # 按照y从小到大, x从大到小
        for index, value in sorted_index_rects:
            sorted_panel_contours.append(panel_contours[index])
            sorted_rects.append(value)
                            
    return sorted_panel_contours, sorted_rects

In [7]:
# panel特征提取
panel_contours, rects = panels(img) # 坐标

In [8]:
def drawPanels(rects, image):
    """
    绘制image的panels边框
    :param rects: panels的边框信息
    :param image: 待绘制的image
    :return: image
    """
    img = np.copy(image)

    for i in range(len(rects)):
        x, y, w, h = rects[i]
        # 在图像上绘制矩形
        cv2.rectangle(img, (x, y), (x + w, y + h), (0,0,255), 10)
    
    return img

In [9]:
# 绘制并存储验证图像
panel_image = drawPanels(rects, img)

In [10]:
# 显示图像
cv2.namedWindow('Panels', cv2.WINDOW_NORMAL) 
cv2.resizeWindow('Panels', width=int(img_w*0.4), height=int(img_h*0.4))
cv2.imshow('Panels', panel_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

# 获取bubble的坐标

In [227]:
def shrinkByPixels(img, rect, pixels=0):
    """
    crop image by rect coordinates & removing some pixels
    """
    image = img.copy()
    img_h, img_w, _ = img.shape

    x, y, w, h = rect
    y_min = max(y-pixels, 0)
    y_max = min(y+h+pixels, img_h)
    x_min = max(x-pixels, 0)
    x_max = min(x+w+pixels, img_w)
    roi_image = image[y_min:y_max, x_min:x_max]
    roi_rect = [x_min, y_min, (x_max-x_min), (y_max-y_min)]

    return roi_image, roi_rect

In [228]:
def create_roi_image(img, contour, pixels=0):
    """
    提取给定轮廓信息的ROI
    :param img: BGR编码的image图像
    :param contour: 轮廓信息
    :return roi_image: BGR编码的roi图像
    """
    image = img.copy()

    # 创建img的一个全黑掩模
    h, w, _ = image.shape # 获取高度,宽度
    mask = np.zeros((h, w), dtype=np.uint8)
 
    # 在掩模上绘制所有轮廓并填充为白色（ROI）
    cv2.drawContours(mask, [contour], -1, 255, thickness=-1)
 
    # 分割ROI区域
    roi = cv2.bitwise_and(image, image, mask=mask)
    rect = cv2.boundingRect(contour)
    roi_image, roi_rect = shrinkByPixels(roi, rect, pixels)
    
    return roi_image, roi_rect

In [229]:
def create_roi_panels(img, panel_contours, pixels=0):
    """
    提取给定panel轮廓信息的ROI图像列表
    :param img: BGR编码的image图像
    :param panel_contours: panel轮廓信息列表
    :return roi_panels, roi_panel_rects: BGR编码的切割panel列表,panel在原始图像中的坐标信息 
    """
    image = img.copy()
    roi_panels = [] # 存储列表
    roi_panel_rects = [] # 坐标列表

    for contour in panel_contours:
        roi_panel, roi_panel_rect = create_roi_image(image, contour, pixels)
        roi_panels.append(roi_panel)
        roi_panel_rects.append(roi_panel_rect)

    return roi_panels, roi_panel_rects

In [230]:
def get_child_contours(contours, hierarchy, parent_index):
    """
    获得指定父轮廓的子轮廓
    :param contours: 所有的轮廓信息列表
    :param hierarchy: 对应的轮廓等级信息列表
    :param parent_index: 父轮廓对应的下标
    :return child_index, child_contours: 所有子轮廓的下标，轮廓信息
    """

    child_contours = []
    child_index = []

    for i in parent_index:
        for j, contour in enumerate(contours):
        # 存储二级轮廓的所有子轮廓
            if hierarchy[j][3] == i:
                child_contours.append(contour)
                child_index.append(j)
    
    return child_index, child_contours

In [231]:
def compute_grayscale_ratio(roi_image, lower_bound, upper_bound):
    """
    给定一个灰度值范围，计算范围内的像素点占比
    :param roi_image: 仅包含ROI区域的bgr图像
    :return background_ratio: 像素点占比
    """
    
    # 转换为灰度图像
    gray = cv2.cvtColor(roi_image, cv2.COLOR_BGR2GRAY)
    
    total_pixels = gray.size # roi_image的总像素
    
    thresholded_image = cv2.inRange(gray, lower_bound, upper_bound)

    # 统计符合条件的像素数量
    thresholded_pixels = np.sum(thresholded_image == 255)
    
    return round(thresholded_pixels/total_pixels, 2)

In [232]:
def filterContoursByGray(img, contours, lower_bound, upper_bound, ratio=0.3):
    """
    通过灰度值占比筛选轮廓
    :param img: 原始BGR图像
    :param contours: 所有轮廓的坐标信息
    :param lower_bound: 最小灰度值
    :param upper_bound: 最大灰度值
    :param ratio: 筛选区域所占最小像素比
    :return contours, rects: 筛选后的contours及其对应的最小外界矩形坐标
    """

    filter_contours = [] # 存储轮廓的坐标
    rects = [] # 存储最小外接矩形坐标
    for i, contour in enumerate(contours):
        roi_image, roi_image_rect = create_roi_image(img, contour)
        grayscale_ratio = compute_grayscale_ratio(roi_image, lower_bound, upper_bound)
        if grayscale_ratio >= ratio:
            # print(f"index: {i}, white_ratio: {grayscale_ratio}")
            filter_contours.append(contour)
            rect = cv2.boundingRect(contour)
            rects.append(rect)
    
    return filter_contours, rects

In [233]:
from paddleocr import PaddleOCR

In [234]:
# 初始化PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='ch')

[2025/01/14 03:39:41] ppocr DEBUG: Namespace(alpha=1.0, alphacolor=(255, 255, 255), benchmark=False, beta=1.0, binarize=False, cls_batch_num=6, cls_image_shape='3, 48, 192', cls_model_dir='C:\\Users\\lin kexiu/.paddleocr/whl\\cls\\ch_ppocr_mobile_v2.0_cls_infer', cls_thresh=0.9, cpu_threads=10, crop_res_save_dir='./output', det=True, det_algorithm='DB', det_box_type='quad', det_db_box_thresh=0.6, det_db_score_mode='fast', det_db_thresh=0.3, det_db_unclip_ratio=1.5, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_east_score_thresh=0.8, det_limit_side_len=960, det_limit_type='max', det_model_dir='C:\\Users\\lin kexiu/.paddleocr/whl\\det\\ch\\ch_PP-OCRv4_det_infer', det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, det_pse_thresh=0, det_sast_nms_thresh=0.2, det_sast_score_thresh=0.5, draw_img_save_dir='./inference_results', drop_score=0.5, e2e_algorithm='PGNet', e2e_char_dict_path='./ppocr/utils/ic15_dict.txt', e2e_limit_side_len=768, e2e_limit_type='max', e2e_model_d

In [235]:
def filterBubblesByText(img, contours, rects, text_direction=1, confidence_threshold=0.5):
    """
    通过是否包含文字筛选对话框
    :param img: roi_panel的灰度图像
    :param contours: bubble的轮廓信息
    :param rects: bubble的最小外接矩形坐标信息
    :param text_direction: 文字方向默认为横向。若为2,则为竖向
    :param confidence_threshold: 文字识别置信度阈值
    :return bubble_texts, bubble_rects, bubble_contours: 对话框文字列表,筛选对话框坐标及轮廓信息
    """
    panel = img.copy()
    panel_gray = cv2.cvtColor(panel, cv2.COLOR_BGR2GRAY)
    if text_direction == 2:
        panel_gray = cv2.rotate(panel_gray, cv2.ROTATE_90_COUNTERCLOCKWISE)  # 逆时针旋转90度
    
    bubble_texts = []
    bubble_rects = []
    bubble_contours = []
    for i in range(len(rects)):
        x, y, w, h = rects[i]
        bubble_gray = panel_gray[y:y+h, x:x+w]
        result = ocr.ocr(bubble_gray, cls=True)
        result = result[0]
        if result is not None:
            bubble_rects.append(rects[i])
            bubble_contours.append(contours[i])
            text_result = []
            for line in result:
                text = line[1][0]
                confidence = line[1][1]
                if confidence > confidence_threshold:
                    text_result.append(text)
            text = "".join(text_result)
            bubble_texts.append(text)      

    return bubble_texts, bubble_rects, bubble_contours

In [236]:
def panel_findSpeechBubbles(panelImg, mode=1, read_mode=1):
    """
    Find all speech bubbles in the given comic panel and return a list of their contours
    :param panelImg: given comic panel
    :param mode: 筛选bubble的条件
    :param read_mode(_int_): 默认文字阅读顺序为从左到右。若值为2，则阅读顺序为从右到左
    :return bubble_contours, bubble_rects: a list of speech bubbles 
    """
    image = panelImg.copy()

    # 转换为灰度图像
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    img_h,img_w = gray.shape

    # 应用阈值处理得到二值图像
    _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)    # 默认对话框背景为白色，字体为深色

    # 去除黑色背景里的白色噪点
    kernel = np.ones((5,5), np.uint8)
    opening = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)

    # 保留白色前景的轮廓
    kernel = np.ones((5,5), np.uint8)
    gradient = cv2.morphologyEx(opening, cv2.MORPH_GRADIENT, kernel)

    binary = gradient

    # 查找所有轮廓，并建立等级关系
    contours, hierarchy = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    if hierarchy is None:
        bubble_contours = []
        bubble_rects = []
    else:
        hierarchy = hierarchy.reshape(-1,4)

        # 按照尺寸筛选所有的二级轮廓
        top_index, top_contours = get_top_contours(contours, hierarchy)
        child_index, child_contours = get_child_contours(contours, hierarchy, top_index)

        contours, rects = filterContoursBySize(child_contours, img_h, img_w, mode, ratio=1/15)

        # 通过颜色检测进一步筛选轮廓得到对话框
        bubble_contours, bubble_rects = filterContoursByGray(image, contours, 230, 255, ratio=0.3)

    sorted_bubble_contours = []
    sorted_bubble_rects = []
    if(read_mode==1):
        sorted_index_rects = sorted(enumerate(bubble_rects), key=lambda x: (x[1][0], x[1][1])) # 按照x,y从小到大排序
        for index, value in sorted_index_rects:
            sorted_bubble_contours.append(bubble_contours[index])
            sorted_bubble_rects.append(value)
    else:
        sorted_index_rects = sorted(enumerate(bubble_rects), key=lambda x: (-x[1][0], x[1][1])) # 按照x从大到小,y从小到大排序
        for index, value in sorted_index_rects:
            sorted_bubble_contours.append(bubble_contours[index])
            sorted_bubble_rects.append(value)

    return sorted_bubble_contours, sorted_bubble_rects

In [237]:
def create_roi_not_bubble(panel, bubble_contours):
    """
    提取给定轮廓信息的ROI
    :param panel: BGR编码的panel图像
    :param bubble_contours: bubble轮廓信息
    :return roi_not_bubble: BGR编码的roi图像
    """
    image = panel.copy()

    h, w, _ = image.shape # 获取高度,宽度
    # 创建一个全白的掩膜，注意这里的图像类型是uint8，所以是255
    mask = np.ones((h, w), dtype=np.uint8) * 255
 
    # 在掩模上绘制所有轮廓并填充为黑色（ROI）
    for contour in bubble_contours:
        cv2.drawContours(mask, [contour], -1, 0, thickness=-1)
 
    roi_not_bubble = cv2.bitwise_and(image, image, mask=mask)
    
    return roi_not_bubble

In [251]:
def panel_getBackgroundText(panel, confidence_threshold=0.5):
    """
    :param panel: 去除对话框的BGR panel图像
    :param confidence_threshold: 识别文字置信度阈值
    :return background_texts: 背景文字
    """
    image = panel.copy()
    panel_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    background_texts = []
    result = ocr.ocr(panel, cls=True)
    result = result[0]
    if result is not None:
        texts = []
        for line in result:
            text = line[1][0]
            confidence = line[1][1]
            if confidence > confidence_threshold:
                texts.append(text)
        background_text = "".join(texts)
        background_texts.append(background_text)

    return background_texts

In [255]:
def comic_findSpeechBubbles(img, panel_contours, pixels=10, read_mode=1, text_direction=1, mode=1, confidence_threshold=0.5):
    """
    Find all speech bubbles in the given comic page and return a list of their contours
    :param img: 原始BGR编码图像
    :param panel_contours: 图像对应的panel轮廓信息
    :param pixels: 切割panel时向外移动的像素点
    :param read_mode(_int_): 默认文字阅读顺序为从左到右。若值为2，则阅读顺序为从右到左
    :param text_direction(_int_): 默认文字方向为横向。若值为2，则文字为竖向
    :param mode: 筛选bubble的条件
    :param confidence_threshold: 文字识别的置信度阈值
    :return page_bubbles: a dict of speech bubbles {[bubble_texts], [background_texts], [bubble_rects], [bubble_nums]}
    """
    image = img.copy()
    page_bubbles = {
        "bubble_texts": [],
        "background_texts": [],
        "bubble_rects": [],
        "bubble_nums": []
    } # 漫画页的bubble信息

    roi_panels, roi_panel_rects = create_roi_panels(image, panel_contours, pixels) # 原始尺寸的panels掩码列表

    for i, roi_panel in enumerate(roi_panels):
        bubble_contours, bubble_rects = panel_findSpeechBubbles(roi_panel, mode, read_mode)

        # 还原bubble_rects
        if bubble_rects is None:
            pass
        else:
            bubble_texts, bubble_rects, bubble_contours = filterBubblesByText(roi_panel, bubble_contours, bubble_rects, 
                                                                              text_direction, confidence_threshold)
            raw_bubble_rects = []
            panel_x, panel_y, panel_w, panel_h = roi_panel_rects[i]
            for bubble_rect in bubble_rects:
                bubble_x, bubble_y, bubble_w, bubble_h = bubble_rect
                bubble_x = bubble_x + panel_x
                bubble_y = bubble_y + panel_y
                raw_bubble_rect = [bubble_x, bubble_y, bubble_w, bubble_h]
                raw_bubble_rects.append(raw_bubble_rect)

            not_bubble_image = create_roi_not_bubble(roi_panel, bubble_contours)
            background_texts = panel_getBackgroundText(not_bubble_image, confidence_threshold)
            

        page_bubbles["bubble_texts"].append(bubble_texts)
        page_bubbles["background_texts"].append(background_texts)
        page_bubbles["bubble_rects"].append(raw_bubble_rects)
        page_bubbles["bubble_nums"].append(len(bubble_rects))
    
    return page_bubbles

In [252]:
# bubble特征提取（粒度panel）
page_bubbles = comic_findSpeechBubbles(img, panel_contours, pixels=10, mode=1)
bubble_texts_list = page_bubbles["bubble_texts"]
background_texts_list =page_bubbles["background_texts"]
bubble_rects_list =page_bubbles["bubble_rects"]
bubble_nums_list = page_bubbles["bubble_nums"]

[2025/01/14 03:48:35] ppocr DEBUG: dt_boxes num : 16, elapsed : 0.343947172164917
[2025/01/14 03:48:36] ppocr DEBUG: cls num  : 16, elapsed : 0.15780305862426758
[2025/01/14 03:48:36] ppocr DEBUG: rec_res num  : 16, elapsed : 0.5913879871368408
[2025/01/14 03:48:37] ppocr DEBUG: dt_boxes num : 88, elapsed : 0.552584171295166
[2025/01/14 03:48:37] ppocr DEBUG: cls num  : 88, elapsed : 0.20780467987060547
[2025/01/14 03:48:39] ppocr DEBUG: rec_res num  : 88, elapsed : 1.5403199195861816
[2025/01/14 03:48:39] ppocr DEBUG: dt_boxes num : 3, elapsed : 0.3159821033477783
[2025/01/14 03:48:39] ppocr DEBUG: cls num  : 3, elapsed : 0.06707477569580078
[2025/01/14 03:48:39] ppocr DEBUG: rec_res num  : 3, elapsed : 0.15576863288879395
[2025/01/14 03:48:39] ppocr DEBUG: dt_boxes num : 0, elapsed : 0.32787346839904785
[2025/01/14 03:48:39] ppocr DEBUG: cls num  : 0, elapsed : 0
[2025/01/14 03:48:39] ppocr DEBUG: rec_res num  : 0, elapsed : 0.0
[2025/01/14 03:48:40] ppocr DEBUG: dt_boxes num : 0, el

In [253]:
bubble_texts_list

[['不老七心热道领百依化放睛则迎事写的是《太上人物室退选欲认出精不是求活感应篇》道长力字的风骨奇特，笔梢落处竟然隐人胎隐有金石之意..成破'],
 ['还真有些道门符道大家的手段..'],
 ['，符道大家他们一生寞凝天地气息之间..', '您是说神符！？']]

In [254]:
background_texts_list

[['不老像道百事做過大刚香能小则不当向背乘宜之目冠神亦然凡人天曾言人罪過月路每到庆中日题称三尸种在人身中人罪恶拿具斗愿迅之愿星买之算香悉之利离随之吉太花人避之是道种君在人頭自台善遥之银上回為福無門推求長天小百算又天地石上站上第善有为成神仙可发有铁段人成不仁很自無城先生板真西良善暗海品助清理而立三百善药百善欲求地仙退之种雪街昌人银興人不迅将一父窕若袍&元玫许人肾破同君教用親学事親害行成'],
 [],
 ['世上亿万人众，符道大家也不过十数人…他们一生冥想苦修方能凝天地气息于金钩银划之间..']]

# 测试代码

## 测试代码：panels掩膜可视化

In [24]:
roi_panels, roi_panel_rects = create_roi_panels(img, panel_contours, pixels=10) # 原始尺寸的panels掩码列表

In [25]:
# 验证roi_panel_rects能够还原panel坐标
x, y, w, h = roi_panel_rects[0]
roi_image = img[y:y+h, x:x+w]

In [26]:
# 显示图像
cv2.namedWindow('Panels', cv2.WINDOW_NORMAL) 
cv2.resizeWindow('Panels', width=int(img_w*0.4), height=int(img_h*0.4))
cv2.imshow('Panels', roi_image)
cv2.waitKey(0)
cv2.imshow('Panels', roi_panels[0])
cv2.waitKey(0)
cv2.destroyAllWindows()

## 测试代码: 筛选文字bubble

In [27]:
for i, roi_panel in enumerate(roi_panels):
    bubble_contours, bubble_rects = panel_findSpeechBubbles(roi_panel, mode=1)
    # bubble_text_list, bubble_rects = filterBubblesByText(roi_panel, bubble_rects, confidence_threshold = 0.5)

In [28]:
# bubble_text_list

## 测试代码：还原bubble坐标

In [29]:
for i, roi_panel in enumerate(roi_panels):
    bubble_contours, bubble_rects = panel_findSpeechBubbles(roi_panel, mode=1)
    
    # 还原bubble_rects
    raw_bubble_rects = []
    if bubble_rects is None:
        pass
    else:
        panel_x, panel_y, panel_w, panel_h = roi_panel_rects[i]
        for bubble_rect in bubble_rects:
            bubble_x, bubble_y, bubble_w, bubble_h = bubble_rect
            bubble_x = bubble_x + panel_x
            bubble_y = bubble_y + panel_y
            raw_bubble_rect = [bubble_x, bubble_y, bubble_w, bubble_h]
            raw_bubble_rects.append(raw_bubble_rect)

In [30]:
len(raw_bubble_rects)

1

In [31]:
# 绘制并存储验证图像
bubble_image = drawPanels(raw_bubble_rects, img)

In [32]:
# 显示图像
cv2.namedWindow('Panels', cv2.WINDOW_NORMAL) 
cv2.resizeWindow('Panels', width=int(img_w*0.4), height=int(img_h*0.4))
cv2.imshow('Panels', bubble_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

## 在原始图像上绘制bubble与panel

In [33]:
def drawBubbles(bubble_rects_list, image):
    """
    绘制image的bubbles边框
    :param bubble_rects_list: 按panel记录的bubbles的边框信息
    :param image: 待绘制的image
    :return: image
    """
    img = np.copy(image)

    for bubble_rects in bubble_rects_list:
        for bubble_rect in bubble_rects:
            x, y, w, h = bubble_rect
            # 在图像上绘制矩形
            cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 5)
    
    return img

In [34]:
bubble_image = drawBubbles(bubble_rects_list, panel_image)

In [35]:
# 显示图像
cv2.namedWindow('Panels', cv2.WINDOW_NORMAL) 
cv2.resizeWindow('Panels', width=int(img_w*0.4), height=int(img_h*0.4))
cv2.imshow('Panels', bubble_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

# 测试代码：识别背景文字

In [178]:
roi_panels, roi_panel_rects = create_roi_panels(img, panel_contours, pixels=10) # 原始尺寸的panels掩码列表

In [179]:
# 显示图像
cv2.namedWindow('Panels', cv2.WINDOW_NORMAL) 
cv2.resizeWindow('Panels', width=int(img_w*0.4), height=int(img_h*0.4))
cv2.imshow('Panels', roi_panels[0])
cv2.waitKey(0)
cv2.destroyAllWindows()

In [38]:
def create_roi_not_bubble(panel, bubble_contours):
    """
    提取给定轮廓信息的ROI
    :param panel: BGR编码的panel图像
    :param bubble_contours: bubble轮廓信息
    :return roi_not_bubble: BGR编码的roi图像
    """
    image = panel.copy()

    h, w, _ = image.shape # 获取高度,宽度
    # 创建一个全白的掩膜，注意这里的图像类型是uint8，所以是255
    mask = np.ones((h, w), dtype=np.uint8) * 255
 
    # 在掩模上绘制所有轮廓并填充为黑色（ROI）
    for contour in bubble_contours:
        cv2.drawContours(mask, [contour], -1, 0, thickness=-1)
 
    roi_not_bubble = cv2.bitwise_and(image, image, mask=mask)
    
    return roi_not_bubble

In [180]:
not_bubble_image = create_roi_not_bubble(roi_panels[0], bubble_contours_list[0])

In [181]:
# 显示图像
cv2.namedWindow('Panels', cv2.WINDOW_NORMAL) 
cv2.imshow('Panels', not_bubble_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [182]:
panel = not_bubble_image.copy()
panel_gray = cv2.cvtColor(panel, cv2.COLOR_BGR2GRAY)
    
background_texts = []
result = ocr.ocr(panel, cls=True)
result = result[0]
if result is not None:
    text_result = []
    for line in result:
        text = line[1][0]
        confidence = line[1][1]
        if confidence > 0:
            text_result.append(text)
            print(text)
            print(confidence)

[2025/01/14 03:22:37] ppocr DEBUG: dt_boxes num : 2, elapsed : 0.5299656391143799
[2025/01/14 03:22:37] ppocr DEBUG: cls num  : 2, elapsed : 0.07863664627075195
[2025/01/14 03:22:38] ppocr DEBUG: rec_res num  : 2, elapsed : 0.18123745918273926
唐帝国天启元年
0.9956974983215332
钦天监
0.983307421207428


In [184]:
text_result

['唐帝国天启元年', '钦天监']