# Markdown Converter Demo

This notebook demonstrates how to use the Markdown Converter to convert various document formats to clean, readable markdown.

## Setup

First, let's set up the environment and import the necessary modules.

In [None]:
import sys
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Add the project root to the path
sys.path.insert(0, str(Path.cwd().parent))

# Import the markdown converter
from src.markdown_converter.core.converter import MainConverter
from src.markdown_converter.core.exceptions import ConversionError

print("✅ Setup complete!")

## Initialize the Converter

Let's create a converter instance and explore its capabilities.

In [None]:
# Create converter instance
converter = MainConverter()

# Get supported formats
formats = converter.get_supported_formats()

print("🔧 Converter initialized successfully!")
print(f"📄 Supported input formats: {len(formats['input'])} formats")
print(f"📝 Supported output formats: {formats['output']}")

# Display supported formats in a nice table
input_formats = formats['input']
print("\n📋 Supported Input Formats:")
for i, fmt in enumerate(input_formats):
    print(f"  {i+1:2d}. {fmt}")
    if i >= 19:  # Show first 20 formats
        print(f"  ... and {len(input_formats) - 20} more")
        break

## Single File Conversion

Let's convert a single file to markdown and examine the results.

In [None]:
# Example: Convert a simple HTML file
input_file = "test_documents/simple_test.html"
output_file = "output/simple_test.md"

# Check if input file exists
if not Path(input_file).exists():
    print(f"❌ Input file not found: {input_file}")
    print("📝 Creating a sample HTML file for demonstration...")
    
    # Create a sample HTML file
    sample_html = """
<!DOCTYPE html>
<html>
<head><title>Sample Test Document</title></head>
<body>
    <h1>Sample Test Document</h1>
    <p>This is a sample HTML document for testing the markdown converter.</p>
    <ul>
        <li>Item 1</li>
        <li>Item 2</li>
        <li>Item 3</li>
    </ul>
    <h2>Section 2</h2>
    <p>This is another section with some <strong>bold text</strong> and <em>italic text</em>.</p>
</body>
</html>
    """
    
    # Ensure test_documents directory exists
    Path("test_documents").mkdir(exist_ok=True)
    Path(input_file).write_text(sample_html)
    print(f"✅ Created sample file: {input_file}")

# Ensure output directory exists
Path(output_file).parent.mkdir(parents=True, exist_ok=True)

print(f"🔄 Converting {input_file} to {output_file}...")

# Perform conversion
result = converter.convert_file(input_file, output_file)

if result.success:
    print(f"✅ Conversion successful!")
    print(f"📊 File size: {result.file_size_mb:.2f} MB")
    print(f"⏱️  Processing time: {result.processing_time:.2f} seconds")
    
    # Read and display the converted content
    if Path(output_file).exists():
        content = Path(output_file).read_text()
        print(f"\n📝 Converted content (first 200 characters):")
        print("-" * 50)
        print(content[:200] + "..." if len(content) > 200 else content)
        print("-" * 50)
else:
    print(f"❌ Conversion failed: {result.error_message}")

## Batch File Conversion

Let's convert multiple files and analyze the results.

In [None]:
# List of test files to convert
test_files = [
    "test_documents/simple_test.html",
    "test_documents/simple_test.txt",
    "test_documents/comprehensive_test.html",
    "test_documents/comprehensive_test.txt",
]

# Create sample files if they don't exist
sample_files = {
    "test_documents/simple_test.txt": "This is a simple text file.\n\nIt contains:\n- Line 1\n- Line 2\n- Line 3",
    "test_documents/comprehensive_test.html": """
<!DOCTYPE html>
<html>
<head><title>Comprehensive Test</title></head>
<body>
    <h1>Comprehensive Test Document</h1>
    <p>This is a more comprehensive test document.</p>
    <h2>Features</h2>
    <ul>
        <li>Multiple sections</li>
        <li>Different formatting</li>
        <li>Complex structure</li>
    </ul>
</body>
</html>
    """,
    "test_documents/comprehensive_test.txt": "Comprehensive text file\n\nThis file contains:\n1. Multiple lines\n2. Different content\n3. Various formatting"
}

# Create missing files
for file_path, content in sample_files.items():
    if not Path(file_path).exists():
        Path(file_path).parent.mkdir(exist_ok=True)
        Path(file_path).write_text(content)
        print(f"✅ Created sample file: {file_path}")

# Filter to only existing files
existing_files = [f for f in test_files if Path(f).exists()]
print(f"📁 Found {len(existing_files)} files to convert")

# Convert files one by one
results = []
for file_path in existing_files:
    output_file = f"output/{Path(file_path).stem}.md"
    
    print(f"🔄 Converting {file_path}...")
    result = converter.convert_file(file_path, output_file)
    
    results.append({
        'input_file': file_path,
        'output_file': output_file,
        'success': result.success,
        'file_size_mb': result.file_size_mb,
        'processing_time': result.processing_time,
        'error': result.error_message
    })

# Create results DataFrame only if we have results
if results:
    df_results = pd.DataFrame(results)
    print("\n📊 Conversion Results:")
    print(df_results[['input_file', 'success', 'file_size_mb', 'processing_time']])
else:
    print("\n❌ No files were converted.")

## Error Handling and Validation

Let's test error handling with invalid files and unsupported formats.

In [None]:
# Test error handling
print("🧪 Testing Error Handling...")

# Test 1: Non-existent file
print("\n1. Testing non-existent file:")
result = converter.convert_file("non_existent_file.txt", "output/test.md")
print(f"   Result: {'Success' if result.success else 'Failed'}")
if not result.success:
    print(f"   Error: {result.error_message}")

# Test 2: Unsupported format
print("\n2. Testing unsupported format:")
unsupported_file = Path("test_documents/test.xyz")
unsupported_file.write_text("This is a test file with unsupported format.")
result = converter.convert_file(str(unsupported_file), "output/test.md")
print(f"   Result: {'Success' if result.success else 'Failed'}")
if not result.success:
    print(f"   Error: {result.error_message}")
unsupported_file.unlink()  # Clean up

# Test 3: Check if file can be converted
print("\n3. Testing format support:")
test_files = [
    "test_documents/simple_test.html",
    "test_documents/simple_test.txt",
    "test_documents/test.xyz"
]

for file_path in test_files:
    can_convert = converter.can_convert(file_path)
    status = "✅" if can_convert else "❌"
    print(f"   {status} {file_path}: {'Supported' if can_convert else 'Not supported'}")

## Summary

This notebook demonstrated:

✅ **Basic Usage:** Single file conversion with error handling
✅ **Batch Processing:** Converting multiple files with detailed results
✅ **Error Handling:** Testing with invalid files and unsupported formats

The Markdown Converter provides a robust, flexible API for converting various document formats to clean markdown.