In [None]:
#Script performs the following steps:
#-Reads the content of the Word file.
#-Reads the Excel file containing the old and new document IDs pairing.
#-Replaces the old document IDs with the new ones in the Word file.
#-Saves the updated Word file.

#Need an excel file which has colum names 'Document Number', 'Previous Document Number', 

import docx
import pandas as pd
import os

#Replaces old to new doc ids based on the provided dataset excel with mapping

#Read in word doc
def read_docx(file_path):
    try:
        doc = docx.Document(file_path)
        return doc
    except Exception as e:
        print(f"Error reading Word file: {e}")
        return None

#Replaces the ids in the main body text
def replace_ids_in_paragraphs(doc, id_map):
    for para in doc.paragraphs:
        for old_id, new_id in id_map.items():
            if old_id in para.text:
                para.text = para.text.replace(old_id, new_id)
    return doc

#Replace the ids in any tables
def replace_ids_in_tables(doc, id_map):
    for table in doc.tables:
        for row in table.rows:
            for cell in row.cells:
                for old_id, new_id in id_map.items():
                    if old_id in cell.text:
                        cell.text = cell.text.replace(old_id, new_id)
    return doc

#Read in the mapping of the old to new ids from the dataset csv
def read_id_map(excel_file):
    try:
        df = pd.read_excel(excel_file)
        id_map = dict(zip(df['Previous Document Number'], df['Document Number']))
        return id_map
    except Exception as e:
        print(f"Error reading Excel file: {e}")
        return None

#Run and Save
def main(word_file, excel_file, output_file):
    doc = read_docx(word_file)
    if doc is None:
        return
    id_map = read_id_map(excel_file)
    if id_map is None:
        return
    doc = replace_ids_in_paragraphs(doc, id_map)
    doc = replace_ids_in_tables(doc, id_map)
    try:
        doc.save(output_file)
        print(f"Updated document saved as {output_file}")
    except Exception as e:
        print(f"Error saving updated document: {e}")

#Customize the file paths (word_file, excel_file, and output_file) as needed.
if __name__ == "__main__":
    word_file = 'Input_Word_File.docx'
    excel_file = 'dataset5.xlsx'
    output_file = 'Output_Word_File.docx'
    main(word_file, excel_file, output_file)