Skip to content

python docx---I want to search and highlight 2 words in the same line #1095

@supert1123

Description

@supert1123

i want to search 2 and more word the same in line and highlight it. but it skip the first word. Some can help me fix it. this is my source:

from email.mime import base
from docx import Document
from docx2pdf import convert
from subprocess import Popen
from PyPDF2 import PdfFileReader as read
from pdf2image import convert_from_path
import numpy as np
import base64
import cv2
import os
import docx
from docx.shared import Pt
from docx.enum.style import WD_STYLE_TYPE
import subprocess
import re
from docx.enum.text import WD_COLOR_INDEX
from docx.document import Document as _Document
from docx.oxml.text.paragraph import CT_P
from docx.oxml.table import CT_Tbl
from docx.table import _Cell, Table
from docx.text.paragraph import Paragraph
import re
import docx
from docx.shared import Pt
from docx.enum.style import WD_STYLE_TYPE
def findColor(filename,key,newName):
    doc = Document(filename)
    par = doc.paragraphs[0]
    font_styles = doc.styles
    font_charstyle = font_styles.add_style('CommentsStyle', WD_STYLE_TYPE.CHARACTER)
    font_object = font_charstyle.font
    if par.runs[0].font.name != None:
        font_object.name = f'{par.runs[0].font.name}'
    sizes = Size(filename)
    run = 0
    for p in doc.paragraphs:
        for i in p.runs:
            if i.font.size != None:
                font_object.size = Pt(sizes[run])
                run += 1
    for block in iter_block_items(doc):
        if isinstance(block, Paragraph):
            p1 = block.text
            match = re.findall(key,p1,re.IGNORECASE)
            if match:
                block.text = ""
                countKey=color_string(match,countKey,p1,block)
        else:
            for row in block.rows:
                for p in row.cells:
                    p1 = p.text
                    match = re.findall(key,p1,re.IGNORECASE)
                    if match: #so khớp không phân biệt hoa thường
                        p = p.add_paragraph()
                        countKey=color_string(match,countKey,p1,p)
    doc.save(newName)
    return countKey
def color_string(key,countKey,p1,p):
    substrings = p1.split(key)
    for substring in substrings[:-1]:
        countKey += 1
        b = p.add_run(substring,style = 'CommentsStyle') 
        font = p.add_run(key,style = 'CommentsStyle').font.highlight_color = WD_COLOR_INDEX.YELLOW 
        count = str(countKey)
        font = p.add_run(count,style = 'CommentsStyle').font.highlight_color = WD_COLOR_INDEX.RED 
    p.add_run(substrings[-1], style = 'CommentsStyle')
    return countKey
run :  findColor('filename(.docx)','key','newName(.docx)')

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions