In [30]:
# ==================== CELL 1: Imports ====================
import requests
import json
import time
from pprint import pprint
from typing import Dict, List

print("✓ Imports successful")

✓ Imports successful


In [31]:
# ==================== CELL 2: Configuration ====================

# API Configuration
BASE_URL = "http://localhost:8000"
HEADERS = {"Content-Type": "application/json"}

# Test URLs to index
TEST_URLS = [
    "https://www.illinoislegalaid.org/legal-information/written-eviction-notices",
    "https://www.illinoislegalaid.org/legal-information/personal-service-written-eviction-notice",
    "https://www.illinoislegalaid.org/legal-information/understanding-eviction-tenant",
    "https://www.illinoislegalaid.org/legal-information/getting-domestic-violence-order-protection-common-questions",
    "https://www.illinoislegalaid.org/legal-information/getting-domestic-violence-order-protection"

]

# Test law change scenarios
TEST_LAW_WITHOUT_CHANGE = "Illinois Domestic Violence Act"

TEST_LAW_WITH_CHANGE = "Illinois Domestic Violence Act"
TEST_WHAT_CHANGED = """
Section 103(14) was amended to expand the definition of 'abuse' to explicitly include 
coercive control, defined as a pattern of behavior that unreasonably interferes with a 
person's free will and personal liberty through isolation, degradation, exploitation, 
or regulation of everyday activities. Section 214(b)(1) now requires judges to 
specifically inquire about the presence of tracking devices or spyware during Order of 
Protection hearings and may order respondents to disable or remove such technology. 
Section 220 was modified to extend Emergency Orders of Protection from 14-21 days to 
21-30 days to provide survivors additional time to file for plenary orders. Additionally, 
Section 217(a)(5) now grants courts authority to order exclusive care, custody, and 
control of pets to survivors, recognizing the role of pet abuse in domestic violence 
situations. The amendments also clarify that survivors can request address confidentiality 
for children's schools in all Order of Protection filings.
"""

# Helper function to pretty print responses
def print_response(title: str, response: requests.Response):
    print(f"\n{'='*60}")
    print(f"{title}")
    print(f"{'='*60}")
    print(f"Status Code: {response.status_code}")
    try:
        pprint(response.json(), width=100, depth=3)
    except:
        print(response.text)
    print(f"{'='*60}\n")

print("✓ Configuration set")
print(f"Base URL: {BASE_URL}")
print(f"Test URLs: {len(TEST_URLS)} documents")

✓ Configuration set
Base URL: http://localhost:8000
Test URLs: 5 documents


In [32]:
# ==================== CELL 3: Health Check ====================

response = requests.get(f"{BASE_URL}/health")
print_response("HEALTH CHECK", response)

assert response.status_code == 200, "API is not healthy!"
print("✓ API is healthy and running")


HEALTH CHECK
Status Code: 200
{'stats': {'collection_name': 'legal_documents',
           'total_chunks': 285,
           'total_documents': 5,
           'total_flagged': 2},
 'status': 'healthy'}

✓ API is healthy and running


In [33]:
print("⚠️  WARNING: This will DELETE ALL DATA!")
print("=" * 60)

response = requests.delete(f"{BASE_URL}/reset")
print_response("SYSTEM RESET", response)


SYSTEM RESET
Status Code: 200
{'message': 'All data deleted successfully',
 'mongodb': {'deleted': {'documents': 5, 'flags': 2, 'jobs': 3}, 'status': 'success'},
 'status': 'success',
 'vector_db': {'message': 'Vector database cleared', 'status': 'success'}}



In [34]:
# ==================== CELL 4: Bulk Index Documents ====================

payload = {
    "urls": TEST_URLS
}

response = requests.post(
    f"{BASE_URL}/index/bulk",
    headers=HEADERS,
    json=payload
)

print_response("BULK INDEX REQUEST", response)

# Save job_id for tracking
index_job_id = response.json().get("job_id")
print(f"✓ Indexing job started: {index_job_id}")


BULK INDEX REQUEST
Status Code: 200
{'job_id': 'e931a67e-cf73-43f6-aa10-cdc6536709c3',
 'message': 'Indexing 5 documents',
 'status': 'pending'}

✓ Indexing job started: e931a67e-cf73-43f6-aa10-cdc6536709c3


In [24]:
# ==================== CELL 5: Monitor Indexing Job ====================

def wait_for_job(job_id: str, max_wait: int = 300, interval: int = 5):
    """Wait for job to complete"""
    print(f"Monitoring job: {job_id}")
    
    start_time = time.time()
    while time.time() - start_time < max_wait:
        response = requests.get(f"{BASE_URL}/job/{job_id}")
        
        if response.status_code == 200:
            job = response.json()
            status = job.get("status")
            
            print(f"⏳ Status: {status} | Elapsed: {int(time.time() - start_time)}s")
            
            if status == "completed":
                print_response("JOB COMPLETED", response)
                return job
            elif status == "failed":
                print_response("JOB FAILED", response)
                return job
        
        time.sleep(interval)
    
    print(f"⚠️ Job timeout after {max_wait}s")
    return None

# Wait for indexing to complete
index_result = wait_for_job(index_job_id)

if index_result and index_result.get("status") == "completed":
    print("✓ All documents indexed successfully!")
else:
    print("⚠️ Indexing job did not complete successfully")

Monitoring job: 63595437-d412-4950-869a-4c2a7913cb2d
⏳ Status: completed | Elapsed: 0s

JOB COMPLETED
Status Code: 200
{'created_at': '2026-02-11T13:08:44.397000',
 'error': None,
 'job_id': '63595437-d412-4950-869a-4c2a7913cb2d',
 'params': {'urls': ['https://www.illinoislegalaid.org/legal-information/written-eviction-notices',
                     'https://www.illinoislegalaid.org/legal-information/personal-service-written-eviction-notice',
                     'https://www.illinoislegalaid.org/legal-information/understanding-eviction-tenant',
                     'https://www.illinoislegalaid.org/legal-information/getting-domestic-violence-order-protection-common-questions',
                     'https://www.illinoislegalaid.org/legal-information/getting-domestic-violence-order-protection']},
 'result': {'failed': 0, 'failed_urls': [], 'indexed': 5},
 'status': 'completed',
 'type': 'bulk_index',
 'updated_at': '2026-02-11T13:09:34.679000'}

✓ All documents indexed successfully!


In [25]:
# ==================== CELL 6: Check Indexed Documents ====================

response = requests.get(f"{BASE_URL}/health")
health_data = response.json()

print_response("SYSTEM STATS AFTER INDEXING", response)

total_docs = health_data.get("stats", {}).get("total_documents", 0)
total_chunks = health_data.get("stats", {}).get("total_chunks", 0)

print(f"✓ Total Documents: {total_docs}")
print(f"✓ Total Chunks: {total_chunks}")


SYSTEM STATS AFTER INDEXING
Status Code: 200
{'stats': {'collection_name': 'legal_documents',
           'total_chunks': 285,
           'total_documents': 5,
           'total_flagged': 0},
 'status': 'healthy'}

✓ Total Documents: 5
✓ Total Chunks: 285


In [36]:
# ==================== CELL 7: Flag Documents (WITHOUT Analysis) ====================

print("TEST SCENARIO 1: Flag documents WITHOUT change analysis")
print("=" * 60)

payload = {
    "changed_law": TEST_LAW_WITHOUT_CHANGE,
    "similarity_threshold": 0.5
}

response = requests.post(
    f"{BASE_URL}/flag",
    headers=HEADERS,
    json=payload
)

print_response("FLAG REQUEST (NO ANALYSIS)", response)

flag_job_id_1 = response.json().get("job_id")
print(f"✓ Flagging job started: {flag_job_id_1}")

# Wait for flagging to complete
print("\nWaiting for flagging job to complete...")
flag_result_1 = wait_for_job(flag_job_id_1, max_wait=60)

if flag_result_1:
    result = flag_result_1.get("result", {})
    print(f"\n✓ Documents found: {result.get('total_found', 0)}")
    print(f"✓ Documents flagged: {result.get('flagged', 0)}")
    print(f"✓ Documents analyzed: {result.get('analyzed', 0)} (should be 0)")

TEST SCENARIO 1: Flag documents WITHOUT change analysis

FLAG REQUEST (NO ANALYSIS)
Status Code: 200
{'job_id': '9de5b2eb-1559-40e5-a705-8d217357fc10',
 'message': 'Searching and flagging documents...',
 'status': 'processing',
 'total_documents_found': 0}

✓ Flagging job started: 9de5b2eb-1559-40e5-a705-8d217357fc10

Waiting for flagging job to complete...
Monitoring job: 9de5b2eb-1559-40e5-a705-8d217357fc10
⏳ Status: processing | Elapsed: 3s
⏳ Status: completed | Elapsed: 8s

JOB COMPLETED
Status Code: 200
{'created_at': '2026-02-11T13:37:26.510000',
 'error': None,
 'job_id': '9de5b2eb-1559-40e5-a705-8d217357fc10',
 'params': {'law': 'Illinois Domestic Violence Act', 'threshold': 0.5, 'what_changed': None},
 'result': {'analyzed': 0,
            'flagged': 1,
            'flagged_documents': [{...}],
            'message': 'Successfully flagged 1 documents. Use GET /flagged to see detailed '
                       'suggestions.',
            'total_found': 3,
            'validated'

In [None]:
# ==================== CELL 9: Flag Documents (WITH Analysis & Suggestions) ====================

print("TEST SCENARIO 2: Flag documents WITH detailed change analysis")
print("=" * 60)

payload = {
    "changed_law": TEST_LAW_WITH_CHANGE,
    "what_changed": TEST_WHAT_CHANGED,
    "similarity_threshold": 0.5
}

response = requests.post(
    f"{BASE_URL}/flag",
    headers=HEADERS,
    json=payload
)

print_response("FLAG REQUEST (WITH ANALYSIS)", response)

flag_job_id_2 = response.json().get("job_id")
print(f"✓ Flagging job started: {flag_job_id_2}")

# Wait for flagging with analysis (takes longer due to LLM calls)
print("\nWaiting for analysis job to complete (this may take a while)...")
flag_result_2 = wait_for_job(flag_job_id_2, max_wait=600, interval=10)

if flag_result_2:
    result = flag_result_2.get("result", {})
    print(f"\n✓ Documents found: {result.get('total_found', 0)}")
    print(f"✓ Documents flagged: {result.get('flagged', 0)}")
    print(f"✓ Documents analyzed: {result.get('analyzed', 0)}")

TEST SCENARIO 2: Flag documents WITH detailed change analysis

FLAG REQUEST (WITH ANALYSIS)
Status Code: 200
{'job_id': '5964dd9b-f293-43b6-9646-6680a22279d6',
 'message': 'Searching and flagging documents...',
 'status': 'processing',
 'total_documents_found': 0}

✓ Flagging job started: 5964dd9b-f293-43b6-9646-6680a22279d6

Waiting for analysis job to complete (this may take a while)...
Monitoring job: 5964dd9b-f293-43b6-9646-6680a22279d6
