In [None]:
import fitz  # This is the PyMuPDF library
import os
import sys

def split_pdf_to_markdown(pdf_path, output_dir='output_markdown_files'):
    """
    Extracts each page from a PDF file and saves its content into a 
    separate .md (Markdown) file to preserve formatting.

    Args:
        pdf_path (str): The full path to the input PDF file.
        output_dir (str): The name of the directory where the markdown files 
                          will be saved. This directory will be created if
                          it doesn't exist.
    """
    # --- 1. Validate the input PDF path ---
    if not os.path.isfile(pdf_path):
        print(f"Error: The file '{pdf_path}' was not found.")
        return
    
    if not pdf_path.lower().endswith('.pdf'):
        print(f"Error: The file '{pdf_path}' does not appear to be a PDF.")
        return

    # --- 2. Create the output directory ---
    try:
        os.makedirs(output_dir, exist_ok=True)
    except OSError as e:
        print(f"Error: Could not create output directory '{output_dir}': {e}")
        return

    # --- 3. Open the PDF and process each page ---
    try:
        pdf_document = fitz.open(pdf_path)
    except Exception as e:
        print(f"Error: Failed to open or process the PDF file: {e}")
        return

    page_count = pdf_document.page_count
    print(f"Processing '{os.path.basename(pdf_path)}' which has {page_count} pages.")

    # Loop through all the pages in the PDF
    for page_num in range(page_count):
        # Load the current page
        page = pdf_document.load_page(page_num)
        
        # Extract content as Markdown
        # This option preserves structure like headings, lists, tables, etc.
        text = page.get_text("markdown")
        
        # Define the output file path for this page's markdown content
        output_filename = f"page_{page_num + 1}.md"
        output_filepath = os.path.join(output_dir, output_filename)
        
        try:
            # Write the extracted markdown to a new .md file, using UTF-8 encoding
            with open(output_filepath, "w", encoding="utf-8") as md_file:
                md_file.write(text)
        except IOError as e:
            print(f"Error: Could not write to file '{output_filepath}': {e}")
            # Continue to the next page even if one fails
            continue

    # Close the PDF document to free up resources
    pdf_document.close()

    print(f"\nSuccess! All {page_count} pages have been extracted as Markdown files.")
    print(f"You can find them in the following directory:")
    print(os.path.abspath(output_dir))


if __name__ == "__main__":
    """
    This block allows the script to be run from the command line.
    It will either take a file path as an argument or prompt the user for it.
    """
    # Check if a file path was passed as a command-line argument
    if len(sys.argv) > 1:
        pdf_file_path = sys.argv[1]
    else:
        # If not, ask the user to input the path
        pdf_file_path = input("Please enter the full path to your PDF file: ").strip()

    # Clean up the path in case it's wrapped in quotes (e.g., from drag-and-drop)
    pdf_file_path = pdf_file_path.strip('\'"')
        
    # Call the main function to start the process
    split_pdf_to_markdown(pdf_file_path)

