-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Closed
Description
i want to search 2 and more word the same in line and highlight it. but it skip the first word. Some can help me fix it. this is my source:
from email.mime import base
from docx import Document
from docx2pdf import convert
from subprocess import Popen
from PyPDF2 import PdfFileReader as read
from pdf2image import convert_from_path
import numpy as np
import base64
import cv2
import os
import docx
from docx.shared import Pt
from docx.enum.style import WD_STYLE_TYPE
import subprocess
import re
from docx.enum.text import WD_COLOR_INDEX
from docx.document import Document as _Document
from docx.oxml.text.paragraph import CT_P
from docx.oxml.table import CT_Tbl
from docx.table import _Cell, Table
from docx.text.paragraph import Paragraph
import re
import docx
from docx.shared import Pt
from docx.enum.style import WD_STYLE_TYPE
def findColor(filename,key,newName):
doc = Document(filename)
par = doc.paragraphs[0]
font_styles = doc.styles
font_charstyle = font_styles.add_style('CommentsStyle', WD_STYLE_TYPE.CHARACTER)
font_object = font_charstyle.font
if par.runs[0].font.name != None:
font_object.name = f'{par.runs[0].font.name}'
sizes = Size(filename)
run = 0
for p in doc.paragraphs:
for i in p.runs:
if i.font.size != None:
font_object.size = Pt(sizes[run])
run += 1
for block in iter_block_items(doc):
if isinstance(block, Paragraph):
p1 = block.text
match = re.findall(key,p1,re.IGNORECASE)
if match:
block.text = ""
countKey=color_string(match,countKey,p1,block)
else:
for row in block.rows:
for p in row.cells:
p1 = p.text
match = re.findall(key,p1,re.IGNORECASE)
if match: #so khớp không phân biệt hoa thường
p = p.add_paragraph()
countKey=color_string(match,countKey,p1,p)
doc.save(newName)
return countKey
def color_string(key,countKey,p1,p):
substrings = p1.split(key)
for substring in substrings[:-1]:
countKey += 1
b = p.add_run(substring,style = 'CommentsStyle')
font = p.add_run(key,style = 'CommentsStyle').font.highlight_color = WD_COLOR_INDEX.YELLOW
count = str(countKey)
font = p.add_run(count,style = 'CommentsStyle').font.highlight_color = WD_COLOR_INDEX.RED
p.add_run(substrings[-1], style = 'CommentsStyle')
return countKey
run : findColor('filename(.docx)','key','newName(.docx)')
Metadata
Metadata
Assignees
Labels
No labels