# Markdown to PDF Migration & Supabase Upload

This notebook converts all markdown files in the documents folder to PDF and uploads them to Supabase storage bucket.

## Features:
- Batch convert all .md files to PDF
- Upload PDFs to Supabase storage bucket
- Track progress and handle errors
- Clean up local PDF files after upload

## 2. Import Libraries

In [1]:
import os
import glob
from pathlib import Path
from markdown_pdf import MarkdownPdf, Section
from supabase import create_client, Client
from dotenv import load_dotenv
from datetime import datetime

# Load environment variables
load_dotenv()

print("✓ Libraries imported successfully")

✓ Libraries imported successfully


## 3. Configuration

In [2]:
# Directory paths
DOCUMENTS_DIR = Path(os.getcwd())
PDF_OUTPUT_DIR = DOCUMENTS_DIR / "pdfs"
PDF_OUTPUT_DIR.mkdir(exist_ok=True)

# Supabase configuration
SUPABASE_URL = os.getenv('SUPABASE_URL')
SUPABASE_KEY = os.getenv('SUPABASE_SECRET_KEY')
BUCKET_NAME = "enterprise-documents"  # Change this to your bucket name if different

# Validate configuration
if not SUPABASE_URL or not SUPABASE_KEY:
    raise ValueError("SUPABASE_URL and SUPABASE_SECRET_KEY must be set in .env file")

print(f"✓ Configuration loaded")
print(f"  Documents directory: {DOCUMENTS_DIR}")
print(f"  PDF output directory: {PDF_OUTPUT_DIR}")
print(f"  Supabase URL: {SUPABASE_URL}")
print(f"  Bucket name: {BUCKET_NAME}")

✓ Configuration loaded
  Documents directory: /Users/leechin/Documents/enterprise-ai-powered-sys/enterprise_documents
  PDF output directory: /Users/leechin/Documents/enterprise-ai-powered-sys/enterprise_documents/pdfs
  Supabase URL: https://rkxkcdxmuddkiqdgoeep.supabase.co
  Bucket name: enterprise-documents


## 4. Initialize Supabase Client

In [3]:
# Initialize Supabase client
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

print("✓ Supabase client initialized")

# Optional: Create bucket if it doesn't exist
# Note: If you get errors here, you can create the bucket manually in Supabase dashboard
try:
    # Check if bucket exists
    buckets = supabase.storage.list_buckets()
    bucket_exists = any(bucket.get('name') == BUCKET_NAME or bucket.get('id') == BUCKET_NAME for bucket in buckets)
    
    if not bucket_exists:
        # Create bucket - you may need to create this manually in Supabase dashboard
        # Go to Storage section and create a bucket named "enterprise-documents"
        try:
            supabase.storage.create_bucket(BUCKET_NAME)
            print(f"✓ Created bucket: {BUCKET_NAME}")
        except:
            print(f"⚠ Could not auto-create bucket. Please create bucket '{BUCKET_NAME}' manually in Supabase dashboard")
            print(f"   Storage > New Bucket > Name: {BUCKET_NAME} > Public: Yes")
    else:
        print(f"✓ Bucket already exists: {BUCKET_NAME}")
except Exception as e:
    print(f"Note: {e}")
    print("Continuing anyway - bucket operations will fail if bucket doesn't exist")

✓ Supabase client initialized
Storage endpoint URL should have a trailing slash.
Note: 'SyncBucket' object has no attribute 'get'
Continuing anyway - bucket operations will fail if bucket doesn't exist


## 5. Discover Markdown Files

In [4]:
# Find all markdown files
markdown_files = sorted(glob.glob(str(DOCUMENTS_DIR / "*.md")))

print(f"\nFound {len(markdown_files)} markdown files:")
for i, file_path in enumerate(markdown_files, 1):
    print(f"  {i}. {Path(file_path).name}")


Found 14 markdown files:
  1. 01_company_manifesto.md
  2. 02_active_personnel.md
  3. 03_refund_return_policy.md
  4. 04_employee_handbook.md
  5. 05_vinyl_grading_guide.md
  6. 06_shipping_packaging_policy.md
  7. 07_trade_in_consignment_policy.md
  8. 08_customer_privacy_policy.md
  9. 09_store_events_programs.md
  10. 10_inventory_sourcing_guide.md
  11. 11_customer_service_standards.md
  12. 12_financial_operations_guide.md
  13. 13_emergency_security_procedures.md
  14. 14_online_store_operations.md


## 6. Convert Markdown to PDF

In [5]:
# Source - https://stackoverflow.com/a
# Posted by Morgana, modified by community. See post 'Timeline' for change history
# Retrieved 2026-01-09, License - CC BY-SA 4.0

def convert_md_to_pdf(md_file_path: str, output_dir: Path) -> str:
    """
    Convert a markdown file to PDF
    
    Args:
        md_file_path: Path to markdown file
        output_dir: Directory to save PDF
    
    Returns:
        Path to generated PDF file
    """
    md_path = Path(md_file_path)
    pdf_filename = md_path.stem + ".pdf"
    pdf_path = output_dir / pdf_filename
    
    try:
        # Read markdown content
        with open(md_file_path, 'r', encoding='utf-8') as f:
            markdown_content = f.read()
        
        # Create PDF with minimal metadata to avoid errors
        pdf = MarkdownPdf()
        
        # Only set title and author - no other metadata
        try:
            pdf.meta["title"] = md_path.stem.replace('_', ' ').title()
        except:
            pass  # Skip if title not supported
        
        try:
            pdf.meta["author"] = "Enterprise AI System"
        except:
            pass  # Skip if author not supported
        
        # Add content and save
        pdf.add_section(Section(markdown_content, toc=False))
        pdf.save(str(pdf_path))
        
        return str(pdf_path)
    
    except Exception as e:
        raise Exception(f"Failed to convert {md_path.name} to PDF: {e}")


# IMPORTANT: Clear any previous results before running
converted_pdfs = []
failed_conversions = []

print("\nConverting markdown files to PDF...\n")

for i, md_file in enumerate(markdown_files, 1):
    md_name = Path(md_file).name
    try:
        print(f"[{i}/{len(markdown_files)}] Converting {md_name}...", end=" ")
        pdf_path = convert_md_to_pdf(md_file, PDF_OUTPUT_DIR)
        converted_pdfs.append(pdf_path)
        print("✓")
    except Exception as e:
        print(f"✗ Error: {e}")
        failed_conversions.append({"file": md_name, "error": str(e)})

print(f"\n{'='*60}")
print(f"Conversion Summary:")
print(f"  Success: {len(converted_pdfs)}")
print(f"  Failed: {len(failed_conversions)}")
if failed_conversions:
    print("\nFailed conversions:")
    for item in failed_conversions:
        print(f"  - {item['file']}: {item['error']}")


Converting markdown files to PDF...

[1/14] Converting 01_company_manifesto.md... ✓
[2/14] Converting 02_active_personnel.md... ✓
[3/14] Converting 03_refund_return_policy.md... ✓
[4/14] Converting 04_employee_handbook.md... ✓
[5/14] Converting 05_vinyl_grading_guide.md... ✓
[6/14] Converting 06_shipping_packaging_policy.md... ✓
[7/14] Converting 07_trade_in_consignment_policy.md... ✓
[8/14] Converting 08_customer_privacy_policy.md... ✓
[9/14] Converting 09_store_events_programs.md... ✓
[10/14] Converting 10_inventory_sourcing_guide.md... ✓
[11/14] Converting 11_customer_service_standards.md... ✓
[12/14] Converting 12_financial_operations_guide.md... ✓
[13/14] Converting 13_emergency_security_procedures.md... ✓
[14/14] Converting 14_online_store_operations.md... ✓

Conversion Summary:
  Success: 14
  Failed: 0


## 7. Upload PDFs to Supabase Storage

In [6]:
def upload_to_supabase(pdf_path: str, bucket_name: str) -> dict:
    """
    Upload a PDF file to Supabase storage
    
    Args:
        pdf_path: Path to PDF file
        bucket_name: Name of Supabase bucket
    
    Returns:
        Dict with upload result information
    """
    pdf_file = Path(pdf_path)
    file_name = pdf_file.name
    
    try:
        # Read PDF file
        with open(pdf_path, 'rb') as f:
            file_data = f.read()
        
        # Upload to Supabase
        # Note: If file exists, it will be overwritten
        response = supabase.storage.from_(bucket_name).upload(
            file=file_data,
            path=file_name,
            file_options={"content-type": "application/pdf", "upsert": "true"}
        )
        
        # Get public URL
        public_url = supabase.storage.from_(bucket_name).get_public_url(file_name)
        
        return {
            "success": True,
            "file_name": file_name,
            "public_url": public_url,
            "size": len(file_data)
        }
    
    except Exception as e:
        return {
            "success": False,
            "file_name": file_name,
            "error": str(e)
        }


# Upload all converted PDFs
upload_results = []
failed_uploads = []

print("\nUploading PDFs to Supabase...\n")

for i, pdf_path in enumerate(converted_pdfs, 1):
    pdf_name = Path(pdf_path).name
    try:
        print(f"[{i}/{len(converted_pdfs)}] Uploading {pdf_name}...", end=" ")
        result = upload_to_supabase(pdf_path, BUCKET_NAME)
        
        if result["success"]:
            upload_results.append(result)
            size_kb = result["size"] / 1024
            print(f"✓ ({size_kb:.1f} KB)")
        else:
            failed_uploads.append(result)
            print(f"✗ Error: {result['error']}")
    
    except Exception as e:
        print(f"✗ Error: {e}")
        failed_uploads.append({"file_name": pdf_name, "error": str(e)})

print(f"\n{'='*60}")
print(f"Upload Summary:")
print(f"  Success: {len(upload_results)}")
print(f"  Failed: {len(failed_uploads)}")
if failed_uploads:
    print("\nFailed uploads:")
    for item in failed_uploads:
        print(f"  - {item['file_name']}: {item.get('error', 'Unknown error')}")


Uploading PDFs to Supabase...

[1/14] Uploading 01_company_manifesto.pdf... ✓ (183.1 KB)
[2/14] Uploading 02_active_personnel.pdf... ✓ (191.6 KB)
[3/14] Uploading 03_refund_return_policy.pdf... ✓ (203.6 KB)
[4/14] Uploading 04_employee_handbook.pdf... ✓ (227.0 KB)
[5/14] Uploading 05_vinyl_grading_guide.pdf... ✓ (278.2 KB)
[6/14] Uploading 06_shipping_packaging_policy.pdf... ✓ (616.0 KB)
[7/14] Uploading 07_trade_in_consignment_policy.pdf... ✓ (627.0 KB)
[8/14] Uploading 08_customer_privacy_policy.pdf... ✓ (616.8 KB)
[9/14] Uploading 09_store_events_programs.pdf... ✓ (230.9 KB)
[10/14] Uploading 10_inventory_sourcing_guide.pdf... ✓ (630.4 KB)
[11/14] Uploading 11_customer_service_standards.pdf... ✓ (289.7 KB)
[12/14] Uploading 12_financial_operations_guide.pdf... ✓ (689.5 KB)
[13/14] Uploading 13_emergency_security_procedures.pdf... ✓ (250.0 KB)
[14/14] Uploading 14_online_store_operations.pdf... ✓ (604.3 KB)

Upload Summary:
  Success: 14
  Failed: 0


## 8. Display Upload Results

In [7]:
import pandas as pd

if upload_results:
    # Create DataFrame for better visualization
    df = pd.DataFrame(upload_results)
    df['size_kb'] = (df['size'] / 1024).round(2)
    df = df[['file_name', 'size_kb', 'public_url']]
    
    print("\n" + "="*60)
    print("Successfully Uploaded Files:")
    print("="*60)
    print(df.to_string(index=False))
    
    # Save to CSV for reference
    csv_path = DOCUMENTS_DIR / f"upload_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
    df.to_csv(csv_path, index=False)
    print(f"\n✓ Results saved to: {csv_path}")
else:
    print("\nNo files were successfully uploaded.")


Successfully Uploaded Files:
                           file_name  size_kb                                                                                                                  public_url
            01_company_manifesto.pdf   183.15             https://rkxkcdxmuddkiqdgoeep.supabase.co/storage/v1/object/public/enterprise-documents/01_company_manifesto.pdf
             02_active_personnel.pdf   191.63              https://rkxkcdxmuddkiqdgoeep.supabase.co/storage/v1/object/public/enterprise-documents/02_active_personnel.pdf
         03_refund_return_policy.pdf   203.57          https://rkxkcdxmuddkiqdgoeep.supabase.co/storage/v1/object/public/enterprise-documents/03_refund_return_policy.pdf
            04_employee_handbook.pdf   227.02             https://rkxkcdxmuddkiqdgoeep.supabase.co/storage/v1/object/public/enterprise-documents/04_employee_handbook.pdf
          05_vinyl_grading_guide.pdf   278.24           https://rkxkcdxmuddkiqdgoeep.supabase.co/storage/v1/object/publi

## 9. Optional: Clean Up Local PDF Files

In [None]:
# Uncomment the code below to delete local PDF files after successful upload

# cleanup = input("Do you want to delete local PDF files? (yes/no): ")

# if cleanup.lower() in ['yes', 'y']:
#     deleted_count = 0
#     for result in upload_results:
#         pdf_path = PDF_OUTPUT_DIR / result['file_name']
#         if pdf_path.exists():
#             pdf_path.unlink()
#             deleted_count += 1
#     
#     print(f"\n✓ Deleted {deleted_count} local PDF files")
#     
#     # Remove PDF directory if empty
#     if not list(PDF_OUTPUT_DIR.iterdir()):
#         PDF_OUTPUT_DIR.rmdir()
#         print(f"✓ Removed empty directory: {PDF_OUTPUT_DIR}")
# else:
#     print("\nLocal PDF files kept in:", PDF_OUTPUT_DIR)

print("\nLocal PDF files are in:", PDF_OUTPUT_DIR)
print("Uncomment the code above if you want to enable automatic cleanup.")

## 10. Verify Files in Supabase Bucket

In [8]:
# List all files in the bucket
try:
    files = supabase.storage.from_(BUCKET_NAME).list()
    
    print(f"\n{'='*60}")
    print(f"Files in Supabase bucket '{BUCKET_NAME}':")
    print(f"{'='*60}")
    
    if files:
        for i, file in enumerate(files, 1):
            file_name = file.get('name', 'Unknown')
            file_size = file.get('metadata', {}).get('size', 0)
            size_kb = file_size / 1024 if file_size else 0
            print(f"{i:2d}. {file_name:50s} ({size_kb:.1f} KB)")
        
        print(f"\nTotal files in bucket: {len(files)}")
    else:
        print("No files found in bucket.")
        
except Exception as e:
    print(f"Error listing bucket files: {e}")


Files in Supabase bucket 'enterprise-documents':
 1. 01_company_manifesto.pdf                           (183.1 KB)
 2. 02_active_personnel.pdf                            (191.6 KB)
 3. 03_refund_return_policy.pdf                        (203.6 KB)
 4. 04_employee_handbook.pdf                           (227.0 KB)
 5. 05_vinyl_grading_guide.pdf                         (278.2 KB)
 6. 06_shipping_packaging_policy.pdf                   (616.0 KB)
 7. 07_trade_in_consignment_policy.pdf                 (627.0 KB)
 8. 08_customer_privacy_policy.pdf                     (616.8 KB)
 9. 09_store_events_programs.pdf                       (230.9 KB)
10. 10_inventory_sourcing_guide.pdf                    (630.4 KB)
11. 11_customer_service_standards.pdf                  (289.7 KB)
12. 12_financial_operations_guide.pdf                  (689.5 KB)
13. 13_emergency_security_procedures.pdf               (250.0 KB)
14. 14_online_store_operations.pdf                     (604.3 KB)

Total files in bucket: 14

## 11. Final Summary

In [9]:
print("\n" + "="*60)
print("MIGRATION COMPLETE")
print("="*60)
print(f"\nMarkdown files found:     {len(markdown_files)}")
print(f"PDFs created:             {len(converted_pdfs)}")
print(f"PDFs uploaded:            {len(upload_results)}")
print(f"Failed conversions:       {len(failed_conversions)}")
print(f"Failed uploads:           {len(failed_uploads)}")
print(f"\nSupabase bucket:          {BUCKET_NAME}")
print(f"Local PDF directory:      {PDF_OUTPUT_DIR}")

if upload_results:
    total_size = sum(r['size'] for r in upload_results)
    print(f"\nTotal uploaded size:      {total_size / 1024 / 1024:.2f} MB")

print("\n" + "="*60)


MIGRATION COMPLETE

Markdown files found:     14
PDFs created:             14
PDFs uploaded:            14
Failed conversions:       0
Failed uploads:           0

Supabase bucket:          enterprise-documents
Local PDF directory:      /Users/leechin/Documents/enterprise-ai-powered-sys/enterprise_documents/pdfs

Total uploaded size:      5.51 MB

