In [3]:
import os
import sys
import json
import requests
from pprint import pprint
from pymongo import MongoClient

def test_mongodb_connection():
    """Test direct MongoDB connection and data"""
    try:
        print("\n=== Testing MongoDB Connection ===")
        client = MongoClient("mongodb://db:27017/")
        db = client["chelle"]
        
        # Test connection
        client.admin.command('ping')
        print("✓ MongoDB connection successful")
        
        # Check raw_assets collection
        raw_assets = db["raw_assets"]
        count = raw_assets.count_documents({})
        print(f"✓ Found {count} documents in raw_assets collection")
        
        # Get sample documents
        print("\nFirst few documents in raw_assets:")
        for doc in raw_assets.find().limit(3):
            # Convert ObjectId to string for printing
            doc['_id'] = str(doc['_id'])
            pprint(doc)
            print("-" * 80)
            
    except Exception as e:
        print(f"✗ MongoDB Error: {str(e)}")
        return False
    
    return True

def test_api_endpoints():
    """Test FastAPI endpoints"""
    try:
        print("\n=== Testing API Endpoints ===")
        
        # Test main files endpoint
        print("\nTesting GET /api/files")
        response = requests.get("http://api:8000/files")
        print(f"Status Code: {response.status_code}")
        print("Headers:")
        pprint(dict(response.headers))
        
        if response.ok:
            try:
                data = response.json()
                print("\nResponse Data:")
                pprint(data)
                print(f"\n✓ Found {len(data)} files in response")
            except json.JSONDecodeError:
                print("\n✗ Could not parse JSON response")
                print("Raw Response:")
                print(response.text)
        else:
            print(f"\n✗ API request failed: {response.text}")
        
        # Test debug endpoint
        print("\nTesting GET /api/files/debug")
        debug_response = requests.get("http://localhost:8000/files/debug")
        print(f"Status Code: {debug_response.status_code}")
        
        if debug_response.ok:
            debug_data = debug_response.json()
            print("\nDebug Response:")
            pprint(debug_data)
        else:
            print(f"\n✗ Debug endpoint failed: {debug_response.text}")
            
    except requests.RequestException as e:
        print(f"\n✗ API Request Error: {str(e)}")
        return False
    except Exception as e:
        print(f"\n✗ Unexpected Error: {str(e)}")
        return False
    
    return True

def main():
    """Main test function"""
    print("=== API Testing Script ===")
    
    # Test MongoDB first
    if not test_mongodb_connection():
        print("\n✗ MongoDB tests failed - stopping")
        sys.exit(1)
        
    # Test API endpoints
    if not test_api_endpoints():
        print("\n✗ API tests failed")
        sys.exit(1)
        
    print("\n✓ All tests completed")

if __name__ == "__main__":
    main()

=== API Testing Script ===

=== Testing MongoDB Connection ===
✓ MongoDB connection successful
✓ Found 1 documents in raw_assets collection

First few documents in raw_assets:
{'_id': '672e06bbae0bfef8f7d00fd4',
 'file_hash': 'f0d2591e65a7e60e4c0dc5e4656a95905e4e891215cf134f61ebf6e98cdbc1f7',
 'file_path': '/app/filestore/raw/f0d2591e65a7e60e4c0dc5e4656a95905e4e891215cf134f61ebf6e98cdbc1f7.docx',
 'file_size': 156537,
 'file_type': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
 'has_images': True,
 'has_tables': True,
 'image_count': 1,
 'job_ids': {'images': 'images_f0d2591e65a7e60e4c0dc5e4656a95905e4e891215cf134f61ebf6e98cdbc1f7',
             'refined': 'refined_f0d2591e65a7e60e4c0dc5e4656a95905e4e891215cf134f61ebf6e98cdbc1f7',
             'tables': 'tables_f0d2591e65a7e60e4c0dc5e4656a95905e4e891215cf134f61ebf6e98cdbc1f7'},
 'metadata': {'documentMetadata': {'accessibility': {'features': [{'coverage': 100,
                                               

SystemExit: 1