In [None]:
# !pip install markdrop==3.5.0 # latest version

In [None]:
from markdrop import markdrop, MarkDropConfig, add_downloadable_tables
from pathlib import Path
import logging

# Configure processing options
config = MarkDropConfig(
    image_resolution_scale=2.0,        # Scale factor for image resolution
    download_button_color='#444444',   # Color for download buttons in HTML
    log_level=logging.INFO,           # Logging detail level
    log_dir='logs',                   # Directory for log files
    excel_dir='markdropped-excel-tables'  # Directory for Excel table exports
)

# Process PDF document
input_doc_path = "test.pdf" # @param {type:"string"}
output_dir_name = 'output_dir' # @param {type:"string"}

# Convert PDF and generate HTML with images and tables
html_path = markdrop(input_doc_path, output_dir_name, config)

# Add interactive table download functionality
downloadable_html = add_downloadable_tables(html_path, config)

In [None]:
from markdrop import setup_keys
setup_keys(key = 'gemini') #openai

In [None]:
from markdrop import setup_keys, process_markdown, ProcessorConfig, AIProvider, logger
from pathlib import Path

########### <Default prompts> ###########
DEFAULT_IMAGE_PROMPT = """
You are given an image embedded in a markdown document. Describe the image in detail,
focusing on visual content, structure, layout, and any relevant context. Highlight key elements,
text (if any), colors, data trends (for charts), and overall purpose. Your goal is to convey
what the image communicates so clearly that the reader doesn't need to see it.
"""

DEFAULT_TABLE_PROMPT = """
You are analyzing a markdown table. Explain its structure and contents clearly.
Summarize key data, trends, and insights. Highlight column meanings, significant rows,
any calculations or comparisons, and overall takeaways. Make the summary useful
as a standalone description of the table.

Table:
"""
########### </Default prompts> ###########


input_doc_stem = Path(input_doc_path).stem
markdown_path = Path(output_dir_name) / f"{input_doc_stem}-markdroped.md"


# Configure AI processing options
config = ProcessorConfig(
    input_path=str(markdown_path),
    output_dir=output_dir_name,
    ai_provider=AIProvider.GEMINI,            # AI provider (GEMINI or OPENAI)
    remove_images=False,                      # Keep or remove original images
    remove_tables=False,                      # Keep or remove original tables
    table_descriptions=False,                  # Generate table descriptions
    image_descriptions=True,                  # Generate image descriptions
    max_retries=3,                           # Number of API call retries
    retry_delay=2,                           # Delay between retries in seconds
    gemini_model_name="gemini-2.5-flash",    # Gemini model for images
    gemini_text_model_name="gemini-2.5-pro",     # Gemini model for text
    image_prompt=DEFAULT_IMAGE_PROMPT,        # Custom prompt for image analysis
    table_prompt=DEFAULT_TABLE_PROMPT         # Custom prompt for table analysis
)

# Process markdown with AI descriptions
try:
    logger.info(f"Starting markdown processing script with input {markdown_path}.")
    output_path = process_markdown(config)
    logger.info("Script completed successfully")
except Exception as e:
    logger.error(f"Script failed with error: {str(e)}", exc_info=True)
    exit(1)

In [None]:
from pathlib import Path
import zipfile

def zip_directory(directory_path, zip_filename):
    """Zips a directory and its contents."""
    with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, _, files in os.walk(directory_path):
            for file in files:
                file_path = os.path.join(root, file)
                zipf.write(file_path, arcname=os.path.relpath(file_path, directory_path))


import os
zip_directory(f'/content/{output_dir_name}', f'/content/markdrop-{output_dir_name}.zip')