# Generate Sample Contracts and Invoices

This notebook generates sample contracts and invoices for testing the Invoice Processing Agent.

**Usage:**
- Run this notebook manually when you need to regenerate sample documents
- The main notebook will automatically execute this if sample documents are missing

**Output:**
- Sample contracts in `docs/contracts/`
- Sample invoices in `docs/invoices/`


In [None]:
# Generate Sample Contracts and Invoices

from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from docx import Document
from docx.shared import Pt, Inches, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from datetime import datetime, timedelta
from pathlib import Path

# Create directories
data_dir = Path("docs")
contracts_dir = data_dir / "contracts"
invoices_dir = data_dir / "invoices"

contracts_dir.mkdir(parents=True, exist_ok=True)
invoices_dir.mkdir(parents=True, exist_ok=True)

print("Creating sample contracts and invoices (PDF + DOCX)...\n")


# Platform-specific font configuration
def setup_pdf_fonts():
    """Configure fonts for cross-platform compatibility."""
    # Helvetica is built-in to ReportLab and works on all platforms
    # No additional configuration needed
    pass


setup_pdf_fonts()


# ========== CREATE PDF CONTRACT ==========
def create_sample_contract_pdf(filename, net_days=30, client_name="ABC Corporation", vendor_name="XYZ Services Inc.", 
                               program_code=None, po_number=None, start_date=None, end_date=None):
    """Create a sample service contract PDF (cross-platform)."""
    filepath = contracts_dir / filename
    c = canvas.Canvas(str(filepath), pagesize=letter)
    width, height = letter

    # Use provided dates or default to current date
    if start_date is None:
        start_date = datetime.now()
    if end_date is None:
        end_date = start_date + timedelta(days=365)  # Default 1 year contract

    # Title
    c.setFont("Helvetica-Bold", 16)
    c.drawCentredString(width / 2, height - 50, "SERVICE AGREEMENT")

    # Contract details
    c.setFont("Helvetica", 10)
    y = height - 100

    lines = [
        f"This Service Agreement is entered into as of {start_date.strftime('%B %d, %Y')}",
        "",
        "BETWEEN:",
        f"{client_name} (Client)",
        "123 Business Street, New York, NY 10001",
        "",
        "AND:",
        f"{vendor_name} (Vendor)",
        "456 Service Avenue, Los Angeles, CA 90001",
        "",
        "1. SERVICES",
        f"Vendor agrees to provide software development and consulting services for the {program_code or 'Project'} Program." if program_code else "Vendor agrees to provide software development and consulting services.",
        "",
        "2. PAYMENT TERMS",
        f"- Payment terms: Net {net_days} days from invoice date",
        "- Invoices shall be submitted monthly",
        "- All invoices must include a valid Purchase Order (PO) number",
        f"- Contract Period: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}",
        "",
    ]
    
    # Add program code section if provided
    if program_code:
        lines.extend([
            "3. PROGRAM CODE",
            f"This contract is associated with Program Code: {program_code}",
            "",
        ])
    
    # Add PO number section if provided
    if po_number:
        lines.extend([
            "4. PO NUMBER",
            f"All services under this agreement require a Purchase Order. The primary PO for this contract is {po_number}.",
            "",
        ])
    
    # Continue with standard sections
    section_num = 3 if not program_code else (4 if not po_number else 5)
    lines.extend([
        f"{section_num}. INVOICE SUBMISSION REQUIREMENTS",
        "All invoices must include:",
        "- Valid PO number (format: PO-YYYY-####)",
        "- Detailed description of services",
        "- Invoice date and due date",
        "- Vendor tax identification number",
        "",
        f"{section_num + 1}. INVOICE APPROVAL PROCESS",
        "- All invoices must be approved by the Project Manager",
        "- Approval required within 5 business days",
        "- Finance department will process payment after approval",
        "",
        f"{section_num + 2}. PENALTIES AND FEES",
        "- Late payment penalty: 1.5% per month on overdue balance",
        "- Missing PO number: Automatic rejection",
        "",
        "",
        "___________________________        ___________________________",
        "Client Representative              Vendor Representative",
        f"Date: {start_date.strftime('%m/%d/%Y')}                    Date: {start_date.strftime('%m/%d/%Y')}",
    ])

    text_obj = c.beginText(50, y)
    text_obj.setFont("Helvetica", 9)
    for line in lines:
        text_obj.textLine(line)
    c.drawText(text_obj)
    c.save()
    print(f"  PDF: {filename} (Net {net_days} terms, {vendor_name})")


# ========== CREATE DOCX CONTRACT ==========
def create_sample_contract_docx(filename, net_days=30, client_name="ABC Corporation", vendor_name="XYZ Services Inc.",
                                program_code=None, po_number=None, start_date=None, end_date=None):
    """Create a sample service contract in MS Word format (cross-platform)."""
    filepath = contracts_dir / filename
    doc = Document()

    # Use provided dates or default to current date
    if start_date is None:
        start_date = datetime.now()
    if end_date is None:
        end_date = start_date + timedelta(days=365)  # Default 1 year contract

    # Title
    title = doc.add_heading("SERVICE AGREEMENT", level=0)
    title.alignment = WD_ALIGN_PARAGRAPH.CENTER

    # Date
    date_para = doc.add_paragraph(
        f"This Service Agreement is entered into as of {start_date.strftime('%B %d, %Y')}"
    )
    date_para.alignment = WD_ALIGN_PARAGRAPH.CENTER

    doc.add_paragraph()

    # Parties
    doc.add_paragraph("BETWEEN:").bold = True
    doc.add_paragraph(f"{client_name} (Client)")
    doc.add_paragraph("123 Business Street, New York, NY 10001")

    doc.add_paragraph()
    doc.add_paragraph("AND:").bold = True
    doc.add_paragraph(f"{vendor_name} (Vendor)")
    doc.add_paragraph("456 Service Avenue, Los Angeles, CA 90001")

    doc.add_paragraph()

    # Section 1: Services
    doc.add_heading("1. SERVICES", level=1)
    if program_code:
        doc.add_paragraph(
            f"Vendor agrees to provide software development and consulting services for the {program_code} Program."
        )
    else:
        doc.add_paragraph(
            "Vendor agrees to provide software development and consulting services."
        )

    # Section 2: Payment Terms
    doc.add_heading("2. PAYMENT TERMS", level=1)
    doc.add_paragraph(f"- Payment terms: Net {net_days} days from invoice date")
    doc.add_paragraph("- Invoices shall be submitted monthly")
    doc.add_paragraph("- All invoices must include a valid Purchase Order (PO) number")
    doc.add_paragraph(f"- Contract Period: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")

    # Add program code section if provided
    section_num = 3
    if program_code:
        doc.add_heading("3. PROGRAM CODE", level=1)
        doc.add_paragraph(f"This contract is associated with Program Code: {program_code}")
        section_num = 4

    # Add PO number section if provided
    if po_number:
        doc.add_heading(f"{section_num}. PO NUMBER", level=1)
        doc.add_paragraph(f"All services under this agreement require a Purchase Order. The primary PO for this contract is {po_number}.")
        section_num += 1

    # Section: Submission Requirements
    doc.add_heading(f"{section_num}. INVOICE SUBMISSION REQUIREMENTS", level=1)
    doc.add_paragraph("All invoices must include:")
    doc.add_paragraph("- Valid PO number (format: PO-YYYY-####)")
    doc.add_paragraph("- Detailed description of services")
    doc.add_paragraph("- Invoice date and due date")
    doc.add_paragraph("- Vendor tax identification number")

    # Section: Approval Process
    doc.add_heading(f"{section_num + 1}. INVOICE APPROVAL PROCESS", level=1)
    doc.add_paragraph("- All invoices must be approved by the Project Manager")
    doc.add_paragraph("- Approval required within 5 business days")
    doc.add_paragraph("- Finance department will process payment after approval")

    # Section: Penalties
    doc.add_heading(f"{section_num + 2}. PENALTIES AND FEES", level=1)
    doc.add_paragraph("- Late payment penalty: 1.5% per month on overdue balance")
    doc.add_paragraph("- Missing PO number: Automatic rejection")

    doc.add_paragraph()
    doc.add_paragraph()

    # Signatures
    doc.add_paragraph("___________________________        ___________________________")
    doc.add_paragraph(f"Client Representative              Vendor Representative")
    doc.add_paragraph(
        f"Date: {start_date.strftime('%m/%d/%Y')}                    Date: {start_date.strftime('%m/%d/%Y')}"
    )

    doc.save(str(filepath))
    print(f"  DOCX: {filename} (Net {net_days} terms, {vendor_name})")


# ========== CREATE PDF INVOICE (FIXED FORMATTING) ==========
def create_sample_invoice_pdf(
    filename, invoice_num, po_num, days_offset=0, amount=5000.00, vendor_name="XYZ Services Inc.", 
    program_code=None, client_name="ABC Corporation"
):
    """Create a sample invoice PDF with FIXED formatting (cross-platform)."""
    filepath = invoices_dir / filename
    c = canvas.Canvas(str(filepath), pagesize=letter)
    width, height = letter

    invoice_date = datetime.now() + timedelta(days=days_offset)
    due_date = invoice_date + timedelta(days=30)

    # Header
    c.setFont("Helvetica-Bold", 20)
    c.drawString(50, height - 50, "INVOICE")

    # Vendor info (left side)
    c.setFont("Helvetica-Bold", 12)
    c.drawString(50, height - 100, vendor_name)
    c.setFont("Helvetica", 10)
    c.drawString(50, height - 115, "456 Service Avenue")
    c.drawString(50, height - 130, "Los Angeles, CA 90001")
    c.drawString(50, height - 145, "Tax ID: 12-3456789")

    # Invoice details (right side) - FIXED spacing
    c.setFont("Helvetica-Bold", 10)
    c.drawString(380, height - 100, f"Invoice #:")
    c.setFont("Helvetica", 10)
    c.drawString(460, height - 100, f"{invoice_num}")

    c.setFont("Helvetica-Bold", 10)
    c.drawString(380, height - 115, f"Invoice Date:")
    c.setFont("Helvetica", 10)
    c.drawString(460, height - 115, f"{invoice_date.strftime('%m/%d/%Y')}")

    c.setFont("Helvetica-Bold", 10)
    c.drawString(380, height - 130, f"Due Date:")
    c.setFont("Helvetica", 10)
    c.drawString(460, height - 130, f"{due_date.strftime('%m/%d/%Y')}")

    if po_num:
        c.setFont("Helvetica-Bold", 10)
        c.drawString(380, height - 145, f"Purchase Order Number:")
        c.setFont("Helvetica", 10)
        c.drawString(460, height - 145, f"{po_num}")

    # Bill To
    c.setFont("Helvetica-Bold", 10)
    c.drawString(50, height - 180, "Bill To:")
    c.setFont("Helvetica", 10)
    c.drawString(50, height - 195, client_name)
    c.drawString(50, height - 210, "123 Business Street")
    c.drawString(50, height - 225, "New York, NY 10001")

    # Line items header
    c.setFont("Helvetica-Bold", 10)
    y = height - 270
    c.drawString(50, y, "Description")
    c.drawString(350, y, "Hours")
    c.drawString(420, y, "Rate")
    c.drawString(500, y, "Amount")
    c.line(50, y - 5, 550, y - 5)

    # Item
    c.setFont("Helvetica", 10)
    y -= 25
    description = f"Software Development Services - {program_code} Program" if program_code else "Software Development Services"
    c.drawString(50, y, description)
    c.drawString(355, y, "40")
    c.drawString(415, y, "$125.00")
    c.drawString(500, y, f"${amount:,.2f}")

    # Horizontal line before total
    y -= 30
    c.line(400, y, 550, y)

    # Total - FIXED: More spacing to prevent overlap
    y -= 25
    c.setFont("Helvetica-Bold", 12)
    c.drawString(370, y, "Total Amount Due:")
    c.drawString(500, y, f"${amount:,.2f}")

    # Payment terms
    y -= 50
    c.setFont("Helvetica", 9)
    c.drawString(50, y, "Payment Terms: Net 30 days")
    c.drawString(50, y - 15, "Late payments subject to 1.5% monthly penalty")

    c.save()
    print(f"  PDF: {filename} (Invoice #{invoice_num}, ${amount:,.2f}, {vendor_name})")


# ========== CREATE DOCX INVOICE ==========
def create_sample_invoice_docx(
    filename, invoice_num, po_num, days_offset=0, amount=5000.00, vendor_name="XYZ Services Inc.", 
    program_code=None, client_name="ABC Corporation"
):
    """Create a sample invoice in MS Word format (cross-platform)."""
    filepath = invoices_dir / filename
    doc = Document()

    invoice_date = datetime.now() + timedelta(days=days_offset)
    due_date = invoice_date + timedelta(days=30)

    # Header
    title = doc.add_heading("INVOICE", level=0)
    title.alignment = WD_ALIGN_PARAGRAPH.LEFT

    # Create table for vendor info and invoice details
    header_table = doc.add_table(rows=4, cols=2)
    header_table.autofit = False
    header_table.allow_autofit = False

    # Left column - Vendor info
    header_table.cell(0, 0).text = vendor_name
    header_table.cell(1, 0).text = "456 Service Avenue"
    header_table.cell(2, 0).text = "Los Angeles, CA 90001"
    header_table.cell(3, 0).text = "Tax ID: 12-3456789"

    # Right column - Invoice details
    header_table.cell(0, 1).text = f"Invoice #: {invoice_num}"
    header_table.cell(1, 1).text = f"Invoice Date: {invoice_date.strftime('%m/%d/%Y')}"
    header_table.cell(2, 1).text = f"Due Date: {due_date.strftime('%m/%d/%Y')}"
    if po_num:
        header_table.cell(3, 1).text = f"Purchase Order Number: {po_num}"
    else:
        header_table.cell(3, 1).text = "PO Number: N/A"

    doc.add_paragraph()

    # Bill To
    doc.add_paragraph("Bill To:").bold = True
    doc.add_paragraph(client_name)
    doc.add_paragraph("123 Business Street")
    doc.add_paragraph("New York, NY 10001")

    doc.add_paragraph()

    # Line items table
    items_table = doc.add_table(rows=3, cols=4)
    items_table.style = "Light Grid Accent 1"

    # Header row
    header_cells = items_table.rows[0].cells
    header_cells[0].text = "Description"
    header_cells[1].text = "Hours"
    header_cells[2].text = "Rate"
    header_cells[3].text = "Amount"

    # Make header bold
    for cell in header_cells:
        for paragraph in cell.paragraphs:
            for run in paragraph.runs:
                run.font.bold = True

    # Data row
    data_cells = items_table.rows[1].cells
    description = f"Software Development Services - {program_code} Program" if program_code else "Software Development Services"
    data_cells[0].text = description
    data_cells[1].text = "40"
    data_cells[2].text = "$125.00"
    data_cells[3].text = f"${amount:,.2f}"

    # Total row
    total_cells = items_table.rows[2].cells
    total_cells[0].text = ""
    total_cells[1].text = ""
    total_cells[2].text = "Total Amount Due:"
    total_cells[3].text = f"${amount:,.2f}"

    # Make total row bold
    for paragraph in total_cells[2].paragraphs:
        for run in paragraph.runs:
            run.font.bold = True
    for paragraph in total_cells[3].paragraphs:
        for run in paragraph.runs:
            run.font.bold = True

    doc.add_paragraph()
    doc.add_paragraph()

    # Payment terms
    doc.add_paragraph("Payment Terms: Net 30 days")
    doc.add_paragraph("Late payments subject to 1.5% monthly penalty")

    doc.save(str(filepath))
    print(f"  DOCX: {filename} (Invoice #{invoice_num}, ${amount:,.2f}, {vendor_name})")



# Check if sample documents already exist
# Skip generation if directories contain any files
# Note: Directories are created above, so they always exist
contracts_has_files = contracts_dir.exists() and any(f.is_file() for f in contracts_dir.iterdir())
invoices_has_files = invoices_dir.exists() and any(f.is_file() for f in invoices_dir.iterdir())

if contracts_has_files or invoices_has_files:
    print("Sample documents already exist. Skipping generation.")
    print("To regenerate, delete files in docs/contracts/ and docs/invoices/ directories.")
else:
    print("Generating sample documents...")


    # ========== GENERATE ALL DOCUMENTS ==========

    print("CONTRACTS (PDF + DOCX):")

    print("-" * 70)

    create_sample_contract_pdf("sample_contract_net30.pdf", net_days=30)

    create_sample_contract_docx("sample_contract_net30.docx", net_days=30)

    create_sample_contract_pdf("sample_contract_net60.pdf", net_days=60)

    create_sample_contract_docx("sample_contract_net60.docx", net_days=60)


    print("\nINVOICES (PDF + DOCX):")

    print("-" * 70)

    # Valid invoice

    create_sample_invoice_pdf(

    "invoice_001_valid.pdf", "INV-2025-001", "PO-2025-1234", -10, 5000.00

    )

    create_sample_invoice_docx(

    "invoice_001_valid.docx", "INV-2025-001", "PO-2025-1234", -10, 5000.00

    )


    # Missing PO

    create_sample_invoice_pdf("invoice_002_no_po.pdf", "INV-2025-002", "", -5, 3500.00)

    create_sample_invoice_docx("invoice_002_no_po.docx", "INV-2025-002", "", -5, 3500.00)


    # Overdue

    create_sample_invoice_pdf(

    "invoice_003_overdue.pdf", "INV-2025-003", "PO-2025-1235", -45, 7500.00

    )

    create_sample_invoice_docx(

    "invoice_003_overdue.docx", "INV-2025-003", "PO-2025-1235", -45, 7500.00

    )


    # Recent

    create_sample_invoice_pdf(

    "invoice_004_recent.pdf", "INV-2025-004", "PO-2025-1236", -2, 4200.00

    )

    create_sample_invoice_docx(

    "invoice_004_recent.docx", "INV-2025-004", "PO-2025-1236", -2, 4200.00

    )


    # ========== GENERATE MATCHING CONTRACT/INVOICE PAIRS ==========
    
    print("\nMATCHING CONTRACT/INVOICE PAIRS:")
    print("-" * 70)
    
    # Pair 1: TechVendor Solutions + GlobalCorp Contract with BCH Program Code
    print("\nüìÑ Pair 1: TechVendor Solutions + GlobalCorp (BCH Program)")
    contract1_start = datetime(2022, 1, 1)
    contract1_end = datetime(2023, 12, 31)
    # Calculate invoice date to be within contract period (mid-point of contract)
    contract1_midpoint = contract1_start + (contract1_end - contract1_start) / 2
    invoice1_date_offset = (contract1_midpoint - datetime.now()).days
    
    create_sample_contract_pdf(
        "contract_techvendor_bch_2022.pdf",
        client_name="GlobalCorp Inc.",
        vendor_name="TechVendor Solutions",
        program_code="BCH",
        po_number="PO-2022-5678",
        start_date=contract1_start,
        end_date=contract1_end,
        net_days=30
    )
    create_sample_contract_docx(
        "contract_techvendor_bch_2022.docx",
        client_name="GlobalCorp Inc.",
        vendor_name="TechVendor Solutions",
        program_code="BCH",
        po_number="PO-2022-5678",
        start_date=contract1_start,
        end_date=contract1_end,
        net_days=30
    )
    create_sample_invoice_pdf(
        "invoice_techvendor_bch_po5678.pdf",
        "INV-TECH-001",
        "PO-2022-5678",
        days_offset=invoice1_date_offset,  # Invoice date within contract period
        amount=15000.00,
        vendor_name="TechVendor Solutions Inc.",
        program_code="BCH",
        client_name="GlobalCorp Inc."
    )
    create_sample_invoice_docx(
        "invoice_techvendor_bch_po5678.docx",
        "INV-TECH-001",
        "PO-2022-5678",
        days_offset=invoice1_date_offset,  # Invoice date within contract period
        amount=15000.00,
        vendor_name="TechVendor Solutions Inc.",
        program_code="BCH",
        client_name="GlobalCorp Inc."
    )
    
    # Pair 2: ACME Corp Contract
    print("\nüìÑ Pair 2: ACME Corp (ACME Program)")
    contract2_start = datetime(2024, 1, 1)
    contract2_end = datetime(2024, 12, 31)
    # Calculate invoice date to be within contract period (mid-point of contract)
    contract2_midpoint = contract2_start + (contract2_end - contract2_start) / 2
    invoice2_date_offset = (contract2_midpoint - datetime.now()).days
    
    create_sample_contract_pdf(
        "contract_acme_2024.pdf",
        client_name="Client Inc.",
        vendor_name="ACME Corp",
        program_code="ACME",
        po_number="PO-2024-9999",
        start_date=contract2_start,
        end_date=contract2_end,
        net_days=60
    )
    create_sample_contract_docx(
        "contract_acme_2024.docx",
        client_name="Client Inc.",
        vendor_name="ACME Corp",
        program_code="ACME",
        po_number="PO-2024-9999",
        start_date=contract2_start,
        end_date=contract2_end,
        net_days=60
    )
    create_sample_invoice_pdf(
        "invoice_acme_po9999.pdf",
        "INV-ACME-001",
        "PO-2024-9999",
        days_offset=invoice2_date_offset,  # Invoice date within contract period
        amount=25000.00,
        vendor_name="ACME Corp",
        program_code="ACME",
        client_name="Client Inc."
    )
    create_sample_invoice_docx(
        "invoice_acme_po9999.docx",
        "INV-ACME-001",
        "PO-2024-9999",
        days_offset=invoice2_date_offset,  # Invoice date within contract period
        amount=25000.00,
        vendor_name="ACME Corp",
        program_code="ACME",
        client_name="Client Inc."
    )

    # ========== GENERATE INVALID TEST INVOICES (for testing validation) ==========
    
    print("\nINVALID TEST INVOICES (for validation testing):")
    print("-" * 70)
    
    # Invalid Invoice 1: Missing PO number (should be REJECTED)
    print("\n‚ùå Invalid Invoice 1: Missing PO Number")
    create_sample_invoice_pdf(
        "invoice_techvendor_bch_no_po.pdf",
        "INV-TECH-002",
        "",  # Missing PO number
        days_offset=invoice1_date_offset,
        amount=12000.00,
        vendor_name="TechVendor Solutions Inc.",
        program_code="BCH",
        client_name="GlobalCorp Inc."
    )
    create_sample_invoice_docx(
        "invoice_techvendor_bch_no_po.docx",
        "INV-TECH-002",
        "",  # Missing PO number
        days_offset=invoice1_date_offset,
        amount=12000.00,
        vendor_name="TechVendor Solutions Inc.",
        program_code="BCH",
        client_name="GlobalCorp Inc."
    )
    
    # Invalid Invoice 2: Wrong PO number (doesn't match any contract)
    print("\n‚ùå Invalid Invoice 2: Wrong PO Number")
    create_sample_invoice_pdf(
        "invoice_techvendor_wrong_po.pdf",
        "INV-TECH-003",
        "PO-2022-9999",  # PO doesn't exist in any contract
        days_offset=invoice1_date_offset,
        amount=18000.00,
        vendor_name="TechVendor Solutions Inc.",
        program_code="BCH",
        client_name="GlobalCorp Inc."
    )
    create_sample_invoice_docx(
        "invoice_techvendor_wrong_po.docx",
        "INV-TECH-003",
        "PO-2022-9999",  # PO doesn't exist in any contract
        days_offset=invoice1_date_offset,
        amount=18000.00,
        vendor_name="TechVendor Solutions Inc.",
        program_code="BCH",
        client_name="GlobalCorp Inc."
    )
    
    # Invalid Invoice 3: Date outside contract period
    print("\n‚ùå Invalid Invoice 3: Date Outside Contract Period")
    # Invoice date way before contract start
    invalid_date_offset = (contract1_start - timedelta(days=30) - datetime.now()).days
    create_sample_invoice_pdf(
        "invoice_techvendor_out_of_range.pdf",
        "INV-TECH-004",
        "PO-2022-5678",  # Valid PO
        days_offset=invalid_date_offset,  # Date before contract period
        amount=15000.00,
        vendor_name="TechVendor Solutions Inc.",
        program_code="BCH",
        client_name="GlobalCorp Inc."
    )
    create_sample_invoice_docx(
        "invoice_techvendor_out_of_range.docx",
        "INV-TECH-004",
        "PO-2022-5678",  # Valid PO
        days_offset=invalid_date_offset,  # Date before contract period
        amount=15000.00,
        vendor_name="TechVendor Solutions Inc.",
        program_code="BCH",
        client_name="GlobalCorp Inc."
    )
    
    # Invalid Invoice 4: Wrong vendor name (should not match contract)
    print("\n‚ùå Invalid Invoice 4: Wrong Vendor Name")
    create_sample_invoice_pdf(
        "invoice_wrong_vendor.pdf",
        "INV-WRONG-001",
        "PO-2022-5678",  # Valid PO for TechVendor contract
        days_offset=invoice1_date_offset,
        amount=15000.00,
        vendor_name="Different Vendor Inc.",  # Wrong vendor
        program_code="BCH",
        client_name="GlobalCorp Inc."
    )
    create_sample_invoice_docx(
        "invoice_wrong_vendor.docx",
        "INV-WRONG-001",
        "PO-2022-5678",  # Valid PO for TechVendor contract
        days_offset=invoice1_date_offset,
        amount=15000.00,
        vendor_name="Different Vendor Inc.",  # Wrong vendor
        program_code="BCH",
        client_name="GlobalCorp Inc."
    )
    
    # Invalid Invoice 5: Wrong program code
    print("\n‚ùå Invalid Invoice 5: Wrong Program Code")
    create_sample_invoice_pdf(
        "invoice_wrong_program.pdf",
        "INV-WRONG-002",
        "PO-2022-5678",  # Valid PO
        days_offset=invoice1_date_offset,
        amount=15000.00,
        vendor_name="TechVendor Solutions Inc.",
        program_code="XYZ",  # Wrong program code (should be BCH)
        client_name="GlobalCorp Inc."
    )
    create_sample_invoice_docx(
        "invoice_wrong_program.docx",
        "INV-WRONG-002",
        "PO-2022-5678",  # Valid PO
        days_offset=invoice1_date_offset,
        amount=15000.00,
        vendor_name="TechVendor Solutions Inc.",
        program_code="XYZ",  # Wrong program code (should be BCH)
        client_name="GlobalCorp Inc."
    )
    
    # Valid Invoice 6: Perfect match (for comparison)
    print("\n‚úÖ Valid Invoice 6: Perfect Match (for comparison)")
    create_sample_invoice_pdf(
        "invoice_acme_valid.pdf",
        "INV-ACME-002",
        "PO-2024-9999",  # Valid PO
        days_offset=invoice2_date_offset,  # Within contract period
        amount=22000.00,
        vendor_name="ACME Corp",
        program_code="ACME",
        client_name="Client Inc."
    )
    create_sample_invoice_docx(
        "invoice_acme_valid.docx",
        "INV-ACME-002",
        "PO-2024-9999",  # Valid PO
        days_offset=invoice2_date_offset,  # Within contract period
        amount=22000.00,
        vendor_name="ACME Corp",
        program_code="ACME",
        client_name="Client Inc."
    )



    # ========== CREATE IMAGE FILES FOR OCR TESTING ==========

    print("\nIMAGES FOR OCR TESTING (PNG):")

    print("-" * 70)



    # Platform-independent font loader
    def load_system_fonts(title_size=80, text_size=40):
        """Load system fonts in a platform-independent way."""
        from PIL import ImageFont
        import os

        # List of font paths to try (Windows, Mac, Linux)
        font_options = [
            # Windows
            (r"C:\Windows\Fonts\arial.ttf", r"C:\Windows\Fonts\arialbd.ttf"),
            (r"C:\Windows\Fonts\calibri.ttf", r"C:\Windows\Fonts\calibrib.ttf"),
            # macOS
            ("/System/Library/Fonts/Helvetica.ttc", "/System/Library/Fonts/Helvetica.ttc"),
            (
                "/System/Library/Fonts/Supplemental/Arial.ttf",
                "/System/Library/Fonts/Supplemental/Arial Bold.ttf",
            ),
            # Linux
            (
                "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
                "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
            ),
            (
                "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
                "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
            ),
        ]

        # Try each font option
        for text_path, title_path in font_options:
            try:
                if os.path.exists(text_path):
                    title_font = ImageFont.truetype(title_path, title_size)
                    text_font = ImageFont.truetype(text_path, text_size)
                    return title_font, text_font
            except:
                continue

        # Fallback to default font
        return ImageFont.load_default(), ImageFont.load_default()



    def create_sample_contract_image(filename, net_days=30):
        """Create a sample contract as an image file for OCR testing."""
        from PIL import Image, ImageDraw, ImageFont

        filepath = contracts_dir / filename

        # Create image (letter size: 612x792 points = 2550x3300 pixels at 300 DPI)
        img = Image.new("RGB", (2550, 3300), color="white")
        draw = ImageDraw.Draw(img)

        # Load fonts (platform-independent)
        title_font, text_font = load_system_fonts(title_size=80, text_size=40)

        y = 200

        # Title
        draw.text(
            (1275, y), "SERVICE AGREEMENT", fill="black", font=title_font, anchor="mm"
        )
        y += 150

        # Contract text
        lines = [
            f"This Service Agreement is entered into as of {datetime.now().strftime('%B %d, %Y')}",
            "",
            "BETWEEN:",
            "ABC Corporation (Client)",
            "123 Business Street, New York, NY 10001",
            "",
            "AND:",
            "XYZ Services Inc. (Vendor)",
            "456 Service Avenue, Los Angeles, CA 90001",
            "",
            "1. SERVICES",
            "Vendor agrees to provide software development services.",
            "",
            "2. PAYMENT TERMS",
            f"- Payment terms: Net {net_days} days from invoice date",
            "- Invoices shall be submitted monthly",
            "- All invoices must include a valid PO number",
            "- Late payments: 1.5% monthly penalty",
            "",
            "3. INVOICE SUBMISSION REQUIREMENTS",
            "All invoices must include:",
            "- Valid PO number (format: PO-YYYY-####)",
            "- Detailed description of services",
            "- Invoice date and due date",
            "- Vendor tax identification number",
            "",
            "4. APPROVAL PROCESS",
            "- Approved by Project Manager within 5 days",
            "- Finance processes payment after approval",
            "",
            "5. PENALTIES",
            "- Late payment: 1.5% per month",
            "- Missing PO: Automatic rejection",
        ]

        for line in lines:
            draw.text((200, y), line, fill="black", font=text_font)
            y += 60

        img.save(str(filepath), dpi=(300, 300))
        print(f"  IMG: {filename} (Net {net_days} terms)")



    def create_sample_invoice_image(
        filename, invoice_num, po_num, days_offset=0, amount=5000.00
    ):
        """Create a sample invoice as an image file for OCR testing."""
        from PIL import Image, ImageDraw, ImageFont

        filepath = invoices_dir / filename

        invoice_date = datetime.now() + timedelta(days=days_offset)
        due_date = invoice_date + timedelta(days=30)

        # Create image (letter size at 300 DPI)
        img = Image.new("RGB", (2550, 3300), color="white")
        draw = ImageDraw.Draw(img)

        # Load fonts (platform-independent)
        title_font, text_font = load_system_fonts(title_size=100, text_size=40)
        bold_font, _ = load_system_fonts(title_size=50, text_size=40)

        y = 150

        # Title
        draw.text((200, y), "INVOICE", fill="black", font=title_font)
        y += 200

        # Vendor info
        draw.text((200, y), "XYZ Services Inc.", fill="black", font=bold_font)
        y += 60
        draw.text((200, y), "456 Service Avenue", fill="black", font=text_font)
        y += 50
        draw.text((200, y), "Los Angeles, CA 90001", fill="black", font=text_font)
        y += 50
        draw.text((200, y), "Tax ID: 12-3456789", fill="black", font=text_font)

        # Invoice details (right side)
        y2 = 350
        draw.text((1700, y2), f"Invoice #: {invoice_num}", fill="black", font=text_font)
        y2 += 60
        draw.text(
            (1700, y2),
            f"Invoice Date: {invoice_date.strftime('%m/%d/%Y')}",
            fill="black",
            font=text_font,
        )
        y2 += 60
        draw.text(
            (1700, y2),
            f"Due Date: {due_date.strftime('%m/%d/%Y')}",
            fill="black",
            font=text_font,
        )
        y2 += 60
        if po_num:
            draw.text((1700, y2), f"PO Number: {po_num}", fill="black", font=text_font)

        # Bill To
        y = 650
        draw.text((200, y), "Bill To:", fill="black", font=bold_font)
        y += 60
        draw.text((200, y), "ABC Corporation", fill="black", font=text_font)
        y += 50
        draw.text((200, y), "123 Business Street", fill="black", font=text_font)
        y += 50
        draw.text((200, y), "New York, NY 10001", fill="black", font=text_font)

        # Line items
        y = 900
        draw.text((200, y), "Description", fill="black", font=bold_font)
        draw.text((1300, y), "Hours", fill="black", font=bold_font)
        draw.text((1600, y), "Rate", fill="black", font=bold_font)
        draw.text((1900, y), "Amount", fill="black", font=bold_font)
        draw.line([(200, y + 50), (2300, y + 50)], fill="black", width=3)

        y += 100
        draw.text((200, y), "Software Development Services", fill="black", font=text_font)
        draw.text((1330, y), "40", fill="black", font=text_font)
        draw.text((1580, y), "$125.00", fill="black", font=text_font)
        draw.text((1900, y), f"${amount:,.2f}", fill="black", font=text_font)

        # Total
        y += 150
        draw.line([(1500, y), (2300, y)], fill="black", width=3)
        y += 60
        draw.text((1500, y), "Total Amount Due:", fill="black", font=bold_font)
        draw.text((1900, y), f"${amount:,.2f}", fill="black", font=bold_font)

        # Payment terms
        y += 150
        draw.text((200, y), "Payment Terms: Net 30 days", fill="black", font=text_font)
        y += 60
        draw.text(
            (200, y),
            "Late payments subject to 1.5% monthly penalty",
            fill="black",
            font=text_font,
        )

        img.save(str(filepath), dpi=(300, 300))
        print(f"  IMG: {filename} (Invoice #{invoice_num}, ${amount:,.2f})")



    # Create image versions for OCR testing

    create_sample_contract_image("sample_contract_net30.png", net_days=30)

    create_sample_invoice_image(

    "invoice_005_ocr_valid.png", "INV-2025-005", "PO-2025-1237", -8, 6000.00

    )

    create_sample_invoice_image(

    "invoice_006_ocr_no_po.png", "INV-2025-006", "", -3, 4500.00

    )


# Summary
print("\n" + "=" * 70)
print("Summary:")
print("=" * 70)
contracts = list(contracts_dir.glob("*.*"))
invoices = list(invoices_dir.glob("*.*"))

print(f"\nContracts: {len(contracts)} files in docs/contracts/")
for f in sorted(contracts):
    print(f"  - {f.name}")

print(f"\nInvoices: {len(invoices)} files in docs/invoices/")
for f in sorted(invoices):
    print(f"  - {f.name}")

print("\nDocument Formats:")
print(f"  - PDF contracts: {len(list(contracts_dir.glob('*.pdf')))}")
print(f"  - DOCX contracts: {len(list(contracts_dir.glob('*.docx')))}")
print(f"  - PNG contracts: {len(list(contracts_dir.glob('*.png')))}")
print(f"  - PDF invoices: {len(list(invoices_dir.glob('*.pdf')))}")
print(f"  - DOCX invoices: {len(list(invoices_dir.glob('*.docx')))}")
print(f"  - PNG invoices (OCR): {len(list(invoices_dir.glob('*.png')))}")

print("\nTest scenarios:")
print("  PDF/DOCX:")
print("    1. invoice_001_valid - Should be APPROVED")
print("    2. invoice_002_no_po - Should be REJECTED (missing PO)")
print("    3. invoice_003_overdue - Should be FLAGGED (overdue)")
print("    4. invoice_004_recent - Should be APPROVED")
print("  Matching Pairs:")
print("    5. invoice_techvendor_bch_po5678 - Should MATCH contract_techvendor_bch_2022")
print("    6. invoice_acme_po9999 - Should MATCH contract_acme_2024")
print("  Invalid Test Invoices:")
print("    7. invoice_techvendor_bch_no_po - Missing PO (should be REJECTED)")
print("    8. invoice_techvendor_wrong_po - Wrong PO (should be UNMATCHED)")
print("    9. invoice_techvendor_out_of_range - Date outside contract (should be UNMATCHED)")
print("    10. invoice_wrong_vendor - Wrong vendor (should be UNMATCHED)")
print("    11. invoice_wrong_program - Wrong program code (should be UNMATCHED)")
print("  PNG (OCR):")
print("    12. invoice_005_ocr_valid - Should be APPROVED (tests OCR)")
print("    13. invoice_006_ocr_no_po - Should be REJECTED (tests OCR + missing PO)")

print(f"\n[OK] All documents created successfully!")
print("[OK] OCR test files (PNG) ready for pytesseract testing!")


Creating sample contracts and invoices (PDF + DOCX)...

CONTRACTS (PDF + DOCX):
----------------------------------------------------------------------
  PDF: sample_contract_net30.pdf (Net 30 terms)
  DOCX: sample_contract_net30.docx (Net 30 terms)
  PDF: sample_contract_net60.pdf (Net 60 terms)
  DOCX: sample_contract_net60.docx (Net 60 terms)

INVOICES (PDF + DOCX):
----------------------------------------------------------------------
  PDF: invoice_001_valid.pdf (Invoice #INV-2025-001, $5,000.00)
  DOCX: invoice_001_valid.docx (Invoice #INV-2025-001, $5,000.00)
  PDF: invoice_002_no_po.pdf (Invoice #INV-2025-002, $3,500.00)
  DOCX: invoice_002_no_po.docx (Invoice #INV-2025-002, $3,500.00)
  PDF: invoice_003_overdue.pdf (Invoice #INV-2025-003, $7,500.00)
  DOCX: invoice_003_overdue.docx (Invoice #INV-2025-003, $7,500.00)
  PDF: invoice_004_recent.pdf (Invoice #INV-2025-004, $4,200.00)
  DOCX: invoice_004_recent.docx (Invoice #INV-2025-004, $4,200.00)

IMAGES FOR OCR TESTING (PNG):
-