Skip to content

How to make words that already exist in the document bold #1237

@bwsoftw

Description

@bwsoftw

I'm having trouble taking existing words in the document and modifying them with the library. The times I managed to leave it in bold, it was in the paragraph where it contained the words that I wanted to leave in bold, instead of leaving just the word.
Here is my code:

``
import os
import shutil
from docx import Document
import requests
from senhaapi import API_KEY
import json
from docxtpl import DocxTemplate
import docx
from docx.shared import Pt
import re
from docx.oxml.ns import qn
from docx.oxml import OxmlElement
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.shared import RGBColor

ans = read_multiple_choice(
"Escolha a matéria da prova que será submetida",
[{"label": "Português", "value": "portugues"},
{"label": "Matemática", "value": "matematica"},
{"label": "Geografia", "value": "geografia"},
{"label": "História", "value": "historia"},
{"label": "Física", "value": "fisica"},
{"label": "Química", "value": "quimica"},
{"label": "Literatura", "value": "Literatura"},
{"label": "Inglês", "value": "ingles"},
{"label": "Espanhol", "value": "espanhol"}, ],
)

if ans == "portugues":
print("Processing the file for Português") # Verifique se chegou a esta parte
file_response = read_file("Enviar")
file_name = file_response.name

# Verificar se o arquivo tem a extensão .docx
if not file_name.endswith(".docx"):
    display("A prova enviada deve estar no formato .docx", size='medium')
else:
    # Restante do seu código para processar o arquivo .docx
    script_dir = os.getcwd()
    destination_dir = os.path.join(script_dir, "foo/bar")
    os.makedirs(destination_dir, exist_ok=True)
    original_file_path = os.path.join(destination_dir, file_name)
    with open(original_file_path, "wb") as destination_file:
        shutil.copyfileobj(file_response.file, destination_file)

    # Abrir o documento com python-docx
    document = Document(original_file_path)

    texto_a_adicionar = "Prova Adaptada"

    for paragraph in document.paragraphs:
        if not paragraph.text.strip():  # Verificar se o parágrafo está vazio
            run = paragraph.add_run(texto_a_adicionar)
            font = run.font
            font.size = Pt(8)
            break  # Parar após adicionar o texto



    for paragraph in document.paragraphs:
        for run in paragraph.runs:
            run.font.name = 'Arial'
            run.font.size = Pt(14)  # Tamanho da fonte em pontos
    questoes = {}
    questao_atual = None

    # Percorra os parágrafos do documento
    for paragraph in document.paragraphs:
        text = paragraph.text.strip()

        # Verifique se o parágrafo começa com um número seguido por um parêntese
        if text and text[0].isdigit() and text[1:2] == ")":
            # Armazene a questão anterior, se houver
            if questao_atual is not None:
                questoes[questao_numero] = questao_atual.strip()

            # Inicialize a nova questão
            questao_numero = int(text.split(")", 1)[0])
            questao_atual = text.split(")", 1)[1]
        else:
            # Continue a construir a questão atual
            if questao_atual is not None:
                questao_atual += " " + text

    # Armazene a última questão
    if questao_atual is not None:
        questoes[questao_numero] = questao_atual.strip()

    keywords = []

    # Process the questions to extract keywords
    for question_number, question_content in questoes.items():
        # Split the question content into words
        words = re.findall(r'\w+', question_content)
        keywords.extend(words)  # Add words to the keywords list

    api_message = f"Verificar dentro de {', '.join(keywords)} quais são as palavras chaves ou verbos de comando, não mude o tempo verbal das palavras-chaves ou verbos de comando. Só me mostra na resposta apenas o que eu pedi, sem texto antes."

    headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
    link = "https://api.openai.com/v1/chat/completions"

    id_modelo = "gpt-3.5-turbo"

    body_api = {
       "model": id_modelo,
       "temperature": 0.3,
       "messages": [{"role": "user", "content": api_message}]
    }

    body_api = json.dumps(body_api)

    request = requests.post(link, headers=headers, data=body_api)
    response = request.json()
    message = response["choices"][0]["message"]["content"]
    print(request)
    print(request.text)

    # Converta a mensagem em minúsculas para facilitar a comparação
    message_lower = message.lower()

    # Crie uma lista para armazenar as palavras-chave encontradas no documento
    palavras_chave_encontradas = []

    # Percorra as palavras-chave e verifique se elas estão presentes na mensagem
    for keyword in keywords:
        keyword_pattern = rf'\b{re.escape(keyword.lower())}\b'  # Usamos \b para marcar limites de palavras completas
        if re.search(keyword_pattern, message_lower):
            palavras_chave_encontradas.append(keyword)

    # Imprima as palavras-chave encontradas
    print("Palavras-chave encontradas no documento:", palavras_chave_encontradas)


    def apply_bold_to_keywords(text, keywords):
        modified_text = text
        for keyword in keywords:
            keyword_pattern = rf'\b{re.escape(keyword.lower())}\b'
            modified_text = re.sub(keyword_pattern, lambda match: f'<b>{match.group()}</b>', modified_text,
                                   flags=re.IGNORECASE)
        return modified_text


    document_to_save = Document()

    for paragraph in document.paragraphs:
        modified_paragraph = document_to_save.add_paragraph()

        runs = []
        current_run_text = ''

        for char in paragraph.text:
            if char == '<':
                if current_run_text.strip():
                    runs.append(current_run_text)
                    current_run_text = ''
                current_run_text += char
            elif char == '>':
                current_run_text += char
                runs.append(current_run_text)
                current_run_text = ''
            else:
                current_run_text += char

        if current_run_text.strip():
            runs.append(current_run_text)

        for run_text in runs:
            if run_text.startswith('<') and run_text.endswith('>'):
                new_run = modified_paragraph.add_run()
                new_run.text = run_text
                new_run.bold = True
            else:
                new_run = modified_paragraph.add_run()
                new_run.text = run_text

    modified_file_path = os.path.join(destination_dir, "modified_" + file_name)
    document_to_save.save(modified_file_path)

else:
display("Selecione uma opção válida", size='medium')

display, file_response and read_multiple_choice are from a library of my work that I'm using. and that part of the code where you use this is working perfectly.

Thanks for the help!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions