In [1]:
import os
import win32com.client
from pypdf import PdfWriter
import shutil
import logging



In [4]:
# --- Configuration ---
# IMPORTANT: Replace these paths with your actual folder paths.
# Use an 'r' before the string to handle backslashes correctly.
DOC_FOLDER_WIN = r"C:\Users\silla\code_projects\shipping_labels\\27-08-2025"
OUTPUT_PDF_WIN = r"C:\Users\silla\code_projects\shipping_labels\\27-08-2025\combined_document.pdf"

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

def combine_docs_with_word(doc_folder, output_pdf):
    """
    Finds all .doc files in a folder, converts them to PDF using MS Word,
    and merges them into a single PDF file. Each original document will
    naturally start on a new page in the final PDF.
    """
    # Create a temporary folder to store the intermediate PDFs.
    temp_folder = os.path.join(doc_folder, "temp_pdfs_for_conversion")
    if os.path.exists(temp_folder):
        shutil.rmtree(temp_folder)
    os.makedirs(temp_folder)

    word_instance = None
    try:
        # --- Part 1: Convert .doc files to .pdf ---
        logging.info("Starting conversion from .doc to .pdf using Microsoft Word...")
        
        # Start a Word application instance.
        word_instance = win32com.client.Dispatch("Word.Application")
        # Keep Word hidden in the background.
        word_instance.Visible = False

        doc_files = sorted([f for f in os.listdir(doc_folder) if f.lower().endswith(".docx")])
        
        if not doc_files:
            logging.warning(f"No .doc files were found in '{doc_folder}'.")
            return

        for filename in doc_files:
            # Construct full paths for input and output files.
            doc_path = os.path.join(doc_folder, filename)
            pdf_path = os.path.join(temp_folder, f"{os.path.splitext(filename)[0]}.pdf")
            
            logging.info(f"Converting '{filename}' to PDF...")
            
            # Open the document.
            doc = word_instance.Documents.Open(doc_path)
            # The FileFormat constant for PDF is 17.
            doc.SaveAs(pdf_path, FileFormat=17)
            doc.Close()

        logging.info("All .doc files have been converted to PDF successfully.")

        # --- Part 2: Merge the generated PDFs ---
        logging.info("Merging individual PDF files...")
        pdf_merger = PdfWriter()
        
        pdf_files = [os.path.join(temp_folder, f"{os.path.splitext(f)[0]}.pdf") for f in doc_files]

        for pdf_path in pdf_files:
            if os.path.exists(pdf_path):
                pdf_merger.append(pdf_path)
                logging.info(f"Appended '{os.path.basename(pdf_path)}' to the final document.")

        # Write the merged PDF to the output file.
        with open(output_pdf, "wb") as out_file:
            pdf_merger.write(out_file)
        
        pdf_merger.close()
        logging.info(f"Successfully created the combined PDF: {output_pdf}")

    except Exception as e:
        logging.error(f"An error occurred: {e}")
        logging.error("Please ensure Microsoft Word is installed and you have permissions to access the folders.")
        
    finally:
        # --- Part 3: Clean up ---
        # Ensure the Word application is closed, even if an error occurred.
        if word_instance:
            word_instance.Quit()
        
        # Remove the temporary folder with intermediate PDFs.
        if os.path.exists(temp_folder):
            logging.info("Cleaning up temporary files...")
            shutil.rmtree(temp_folder)
        
        logging.info("Process finished.")

In [6]:
combine_docs_with_word(DOC_FOLDER_WIN, OUTPUT_PDF_WIN)

2025-08-27 16:43:06,235 - Starting conversion from .doc to .pdf using Microsoft Word...
2025-08-27 16:43:08,678 - Converting 'shipping_label_#2013.docx' to PDF...
2025-08-27 16:43:10,048 - Converting 'shipping_label_#2014.docx' to PDF...
2025-08-27 16:43:10,653 - Converting 'shipping_label_#2015.docx' to PDF...
2025-08-27 16:43:11,286 - All .doc files have been converted to PDF successfully.
2025-08-27 16:43:11,289 - Merging individual PDF files...
2025-08-27 16:43:11,308 - Appended 'shipping_label_#2013.pdf' to the final document.
2025-08-27 16:43:11,328 - Appended 'shipping_label_#2014.pdf' to the final document.
2025-08-27 16:43:11,344 - Appended 'shipping_label_#2015.pdf' to the final document.
2025-08-27 16:43:11,355 - Successfully created the combined PDF: C:\Users\silla\code_projects\shipping_labels\\27-08-2025\combined_document.pdf
2025-08-27 16:43:11,361 - Cleaning up temporary files...
2025-08-27 16:43:11,367 - Process finished.
