In [None]:
!pip install markitdown[all]

In [None]:
import multiprocessing
from pathlib import Path
from markitdown import MarkItDown
import re # Import the regular expression module

# Define a function to process a single file
def convert_file(fn):
    """
    Converts a single file using MarkItDown and writes the result.

    Args:
        fn (Path): The path to the input file.
    """
    # Create a new MarkItDown instance in each process
    # This is important because MarkItDown might not be thread-safe
    # or process-safe if shared across processes.
    md = MarkItDown(enable_plugins=True)

    try:
        # Ensure it's a file before attempting to convert
        if fn.is_file():
            print(f"Processing {fn}...")
            result = md.convert(fn)
            output_fn = fn.with_suffix(".md")
            # Create parent directories if they don't exist
            output_fn.parent.mkdir(parents=True, exist_ok=True)
            output_fn.write_text(result.text_content, encoding='utf-8') # Specify encoding
            print(f"Successfully converted {fn} to {output_fn}")
        else:
            print(f"Skipping {fn} as it is not a file.")

    except Exception as e:
        print(f"Error processing {fn}: {e}")
        # In a real application, you might want to log this error
        # or return an error indicator.

# Main execution block
if __name__ == "__main__":
    # Define the root directory to search for files
    root_dir = Path("examples")

    # Ensure the root directory exists
    if not root_dir.exists():
        print(f"Error: Directory '{root_dir}' not found.")
        exit()

    # Define the regex pattern to match files ending with .md (case-insensitive)
    # We will use this pattern to EXCLUDE matching files.
    md_pattern = re.compile(r'\.md$', re.IGNORECASE)

    # Find all files recursively in the root directory
    # Filter out directories and ignore files that match the .md pattern using re
    all_files = [
        f for f in root_dir.rglob("*")
        if f.is_file() and not md_pattern.search(str(f)) # Use regex to exclude .md files
    ]

    if not all_files:
        print(f"No files found in '{root_dir}' that are not already .md files.")
        exit()

    print(f"Found {len(all_files)} files to process.")

    # Set the number of worker processes.
    # Using None lets multiprocessing choose the optimal number (usually based on CPU cores).
    # You can specify an integer, e.g., processes=4
    num_processes = None

    # Define the chunk size for imap_unordered.
    # This means tasks are grouped into chunks of 10 before being sent to worker processes.
    # This can improve performance by reducing inter-process communication overhead.
    batch_size = 10

    # Use multiprocessing.Pool to process files in parallel
    # imap_unordered yields results as they are ready, which is suitable here
    # as the order of processing doesn't matter.
    print(f"Starting multiprocessing pool with chunk size {batch_size}...")
    with multiprocessing.Pool(processes=num_processes) as pool:
        # Map the convert_file function to the list of files
        # chunksize=batch_size tells the pool to send files in batches of 10
        # to the worker processes.
        for _ in pool.imap_unordered(convert_file, all_files, chunksize=batch_size):
            # We iterate through the results iterator to ensure all tasks are completed.
            # Since convert_file doesn't return a value, we just pass.
            pass

    print("All files processed.")


Found 56 files to process.
Starting multiprocessing pool with chunk size 10...
Processing examples/08 - 26August22 Overdue and Lookahead_BR_3_3 copy 5.xlsx...
Processing examples/60_ Design Documentation_2_2 copy 6.xlsx...
Processing examples/08 - 26August22 Overdue and Lookahead_BR_3_3 copy 4.xlsx...
Processing examples/1096-01-OHS-RA-01 - CRAW_3_3_3 copy 4.xlsx...Processing examples/1096-01-OHS-RA-01 - CRAW_2_2_2 copy 2.xlsx...

Processing examples/08 - 26August22 Overdue and Lookahead_BR_2_2_2 copy 2.xlsx...
Successfully converted examples/60_ Design Documentation_2_2 copy 6.xlsx to examples/60_ Design Documentation_2_2 copy 6.md


  warn(msg)
  warn(msg)


Processing examples/60_ Design Documentation_2_2 copy.xlsx...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


Successfully converted examples/60_ Design Documentation_2_2 copy.xlsx to examples/60_ Design Documentation_2_2 copy.md


  warn(msg)


Processing examples/08 - 26August22 Overdue and Lookahead_BR_3_3 copy 3.xlsx...
Successfully converted examples/1096-01-OHS-RA-01 - CRAW_3_3_3 copy 4.xlsx to examples/1096-01-OHS-RA-01 - CRAW_3_3_3 copy 4.md
Processing examples/30_ Design Review 07092022_2_2 copy 4.xlsx...


  warn(msg)


Successfully converted examples/30_ Design Review 07092022_2_2 copy 4.xlsx to examples/30_ Design Review 07092022_2_2 copy 4.md
Processing examples/60_ Design Documentation_2_2 copy 3.xlsx...
Successfully converted examples/60_ Design Documentation_2_2 copy 3.xlsx to examples/60_ Design Documentation_2_2 copy 3.md
Successfully converted examples/1096-01-OHS-RA-01 - CRAW_2_2_2 copy 2.xlsx to examples/1096-01-OHS-RA-01 - CRAW_2_2_2 copy 2.md
Processing examples/60_ Design Documentation_2_2 copy 4.xlsx...
Processing examples/60_ Design Documentation_2_2.xlsx...
Successfully converted examples/60_ Design Documentation_2_2.xlsx to examples/60_ Design Documentation_2_2.mdSuccessfully converted examples/60_ Design Documentation_2_2 copy 4.xlsx to examples/60_ Design Documentation_2_2 copy 4.md

Processing examples/1096-01-OHS-RA-01 - CRAW_3_3_3.xlsx...
Processing examples/1096-01-OHS-RA-01 - CRAW_3_3_3 copy 3.xlsx...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


Successfully converted examples/1096-01-OHS-RA-01 - CRAW_3_3_3.xlsx to examples/1096-01-OHS-RA-01 - CRAW_3_3_3.md
Processing examples/1096-01-OHS-RA-01 - CRAW_2_2_2 copy.xlsx...
Successfully converted examples/1096-01-OHS-RA-01 - CRAW_3_3_3 copy 3.xlsx to examples/1096-01-OHS-RA-01 - CRAW_3_3_3 copy 3.md
Processing examples/08 - 26August22 Overdue and Lookahead_BR_2_2_2 copy 5.xlsx...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


Successfully converted examples/1096-01-OHS-RA-01 - CRAW_2_2_2 copy.xlsx to examples/1096-01-OHS-RA-01 - CRAW_2_2_2 copy.md
Processing examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2 copy 2.xlsx...
Successfully converted examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2 copy 2.xlsx to examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2 copy 2.md
Processing examples/60_ Design Documentation_2_2 copy 5.xlsx...
Successfully converted examples/60_ Design Documentation_2_2 copy 5.xlsx to examples/60_ Design Documentation_2_2 copy 5.md
Processing examples/1. SEPD Monford DoR Draft_2_2 copy.xlsx...
Successfully converted examples/1. SEPD Monford DoR Draft_2_2 copy.xlsx to examples/1. SEPD Monford DoR Draft_2_2 copy.md
Processing examples/08 - 26August22 Overdue and Lookahead_BR_2_2_2.xlsx...
Successfully converted examples/08 - 26August22 Overdue and Lookahead_BR_3_3 copy 4.xlsx to examples/08 - 26August22 Overdue and Loo

  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


Successfully converted examples/30_ Design Review 07092022_2_2 copy.xlsx to examples/30_ Design Review 07092022_2_2 copy.md
Successfully converted examples/08 - 26August22 Overdue and Lookahead_BR_3_3 copy 3.xlsx to examples/08 - 26August22 Overdue and Lookahead_BR_3_3 copy 3.md
Processing examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2 copy 4.xlsx...Processing examples/1096-01-OHS-RA-01 - CRAW_3_3_3 copy 2.xlsx...



  warn(msg)


Successfully converted examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2 copy 4.xlsx to examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2 copy 4.md


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


Processing examples/08 - 26August22 Overdue and Lookahead_BR_2_2_2 copy 4.xlsx...
Successfully converted examples/1096-01-OHS-RA-01 - CRAW_2_2_2 copy 6.xlsx to examples/1096-01-OHS-RA-01 - CRAW_2_2_2 copy 6.md
Processing examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2 copy 6.xlsx...
Successfully converted examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2 copy 6.xlsx to examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2 copy 6.md
Processing examples/30_ Design Review 07092022_2_2 copy 5.xlsx...


  warn(msg)


Successfully converted examples/30_ Design Review 07092022_2_2 copy 5.xlsx to examples/30_ Design Review 07092022_2_2 copy 5.md
Processing examples/60_ Design Documentation_2_2 copy 2.xlsx...
Successfully converted examples/60_ Design Documentation_2_2 copy 2.xlsx to examples/60_ Design Documentation_2_2 copy 2.md
Processing examples/1096-01-OHS-RA-01 - CRAW_2_2_2 copy 3.xlsx...
Successfully converted examples/1096-01-OHS-RA-01 - CRAW_3_3_3 copy 2.xlsx to examples/1096-01-OHS-RA-01 - CRAW_3_3_3 copy 2.md
Processing examples/08 - 26August22 Overdue and Lookahead_BR_2_2_2 copy 3.xlsx...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


Successfully converted examples/1096-01-OHS-RA-01 - CRAW_2_2_2 copy 3.xlsx to examples/1096-01-OHS-RA-01 - CRAW_2_2_2 copy 3.md
Successfully converted examples/08 - 26August22 Overdue and Lookahead_BR_2_2_2 copy 5.xlsx to examples/08 - 26August22 Overdue and Lookahead_BR_2_2_2 copy 5.md
Processing examples/1096-01-OHS-RA-01 - CRAW_3_3_3 copy 5.xlsx...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


Successfully converted examples/08 - 26August22 Overdue and Lookahead_BR_2_2_2.xlsx to examples/08 - 26August22 Overdue and Lookahead_BR_2_2_2.md
Successfully converted examples/1096-01-OHS-RA-01 - CRAW_3_3_3 copy 5.xlsx to examples/1096-01-OHS-RA-01 - CRAW_3_3_3 copy 5.md
Processing examples/1. SEPD Monford DoR Draft_2_2 copy 5.xlsx...
Successfully converted examples/1. SEPD Monford DoR Draft_2_2 copy 5.xlsx to examples/1. SEPD Monford DoR Draft_2_2 copy 5.md
Processing examples/1. SEPD Monford DoR Draft_2_2 copy 2.xlsx...
Successfully converted examples/1. SEPD Monford DoR Draft_2_2 copy 2.xlsx to examples/1. SEPD Monford DoR Draft_2_2 copy 2.md
Processing examples/1. SEPD Monford DoR Draft_2_2.xlsx...
Successfully converted examples/1. SEPD Monford DoR Draft_2_2.xlsx to examples/1. SEPD Monford DoR Draft_2_2.md
Processing examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2.xlsx...
Successfully converted examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installat

  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


Successfully converted examples/08 - 26August22 Overdue and Lookahead_BR_2_2_2 copy 4.xlsx to examples/08 - 26August22 Overdue and Lookahead_BR_2_2_2 copy 4.md
Processing examples/1096-01-OHS-RA-01 - CRAW_2_2_2.xlsx...
Successfully converted examples/1096-01-OHS-RA-01 - CRAW_3_3_3 copy.xlsx to examples/1096-01-OHS-RA-01 - CRAW_3_3_3 copy.md
Processing examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2 copy.xlsx...
Successfully converted examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2 copy.xlsx to examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2 copy.md
Processing examples/1096-01-OHS-RA-01 - CRAW_2_2_2 copy 5.xlsx...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


Successfully converted examples/08 - 26August22 Overdue and Lookahead_BR_2_2_2 copy 3.xlsx to examples/08 - 26August22 Overdue and Lookahead_BR_2_2_2 copy 3.md


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


Processing examples/30_ Design Review 07092022_2_2 copy 2.xlsx...


  warn(msg)


Successfully converted examples/30_ Design Review 07092022_2_2 copy 2.xlsx to examples/30_ Design Review 07092022_2_2 copy 2.md
Processing examples/1096-01-OHS-RA-01 - CRAW_2_2_2 copy 4.xlsx...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


Successfully converted examples/1096-01-OHS-RA-01 - CRAW_2_2_2.xlsx to examples/1096-01-OHS-RA-01 - CRAW_2_2_2.md
Processing examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2 copy 3.xlsx...
Successfully converted examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2 copy 3.xlsx to examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2 copy 3.md
Processing examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2 copy 5.xlsx...
Successfully converted examples/1096-01-OHS-RA-01 - CRAW_2_2_2 copy 5.xlsx to examples/1096-01-OHS-RA-01 - CRAW_2_2_2 copy 5.md
Processing examples/1096-01-OHS-RA-01 - CRAW_3_3_3 copy 6.xlsx...
Successfully converted examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2 copy 5.xlsx to examples/1096-01-MFG-QLT-ITC-CIV-0035_Fence Gate Installation_Final_A_2_2 copy 5.md
Processing examples/08 - 26August22 Overdue and Lookahead_BR_3_3 copy.xlsx...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


Successfully converted examples/1096-01-OHS-RA-01 - CRAW_2_2_2 copy 4.xlsx to examples/1096-01-OHS-RA-01 - CRAW_2_2_2 copy 4.md
Processing examples/08 - 26August22 Overdue and Lookahead_BR_2_2_2 copy.xlsx...
Successfully converted examples/1096-01-OHS-RA-01 - CRAW_3_3_3 copy 6.xlsx to examples/1096-01-OHS-RA-01 - CRAW_3_3_3 copy 6.md
Processing examples/08 - 26August22 Overdue and Lookahead_BR_3_3 copy 2.xlsx...
Successfully converted examples/08 - 26August22 Overdue and Lookahead_BR_3_3 copy.xlsx to examples/08 - 26August22 Overdue and Lookahead_BR_3_3 copy.md
Processing examples/30_ Design Review 07092022_2_2.xlsx...


  warn(msg)


Successfully converted examples/30_ Design Review 07092022_2_2.xlsx to examples/30_ Design Review 07092022_2_2.md
Processing examples/30_ Design Review 07092022_2_2 copy 3.xlsx...


  warn(msg)


Successfully converted examples/30_ Design Review 07092022_2_2 copy 3.xlsx to examples/30_ Design Review 07092022_2_2 copy 3.md
Successfully converted examples/08 - 26August22 Overdue and Lookahead_BR_3_3 copy 2.xlsx to examples/08 - 26August22 Overdue and Lookahead_BR_3_3 copy 2.md
Processing examples/1. SEPD Monford DoR Draft_2_2 copy 3.xlsx...
Successfully converted examples/08 - 26August22 Overdue and Lookahead_BR_2_2_2 copy.xlsx to examples/08 - 26August22 Overdue and Lookahead_BR_2_2_2 copy.md
Processing examples/08 - 26August22 Overdue and Lookahead_BR_3_3 copy 6.xlsx...
Successfully converted examples/1. SEPD Monford DoR Draft_2_2 copy 3.xlsx to examples/1. SEPD Monford DoR Draft_2_2 copy 3.md
Processing examples/30_ Design Review 07092022_2_2 copy 6.xlsx...


  warn(msg)


Successfully converted examples/30_ Design Review 07092022_2_2 copy 6.xlsx to examples/30_ Design Review 07092022_2_2 copy 6.md
Processing examples/1. SEPD Monford DoR Draft_2_2 copy 6.xlsx...
Successfully converted examples/1. SEPD Monford DoR Draft_2_2 copy 6.xlsx to examples/1. SEPD Monford DoR Draft_2_2 copy 6.md
Successfully converted examples/08 - 26August22 Overdue and Lookahead_BR_3_3 copy 6.xlsx to examples/08 - 26August22 Overdue and Lookahead_BR_3_3 copy 6.md
Processing examples/08 - 26August22 Overdue and Lookahead_BR_3_3.xlsx...
Successfully converted examples/08 - 26August22 Overdue and Lookahead_BR_3_3.xlsx to examples/08 - 26August22 Overdue and Lookahead_BR_3_3.md
All files processed.
