# Day 5 Checkpoint 1: Qdrant Client & Collection Setup

This notebook tests the Qdrant client wrapper and collection schema implementation.

## Test Coverage
1. ‚úÖ Qdrant server connection
2. ‚úÖ Health check functionality
3. ‚úÖ Collection creation
4. ‚úÖ Collection schema verification
5. ‚úÖ Payload index creation
6. ‚úÖ Collection recreation

---

In [None]:
# Setup
import sys
from pathlib import Path

# Add project root to path
project_root = Path.cwd().parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

print(f"Project root: {project_root}")

# Import modules
from app.vector_db import (
    QdrantClientWrapper,
    get_qdrant_client,
    CollectionSchema,
    initialize_vector_db,
    setup_collection,
    verify_collection_schema,
)
from app.core.config import settings

print("‚úÖ Imports successful")

## Test 1: Client Initialization & Health Check

In [None]:
# Initialize Qdrant client
client = QdrantClientWrapper()
print(f"Client: {client}")

# Perform health check
health = client.health_check()
print(f"\nüè• Health Check:")
print(f"  Status: {health['status']}")
print(f"  Connected: {health['connected']}")
print(f"  Host: {health['host']}")
print(f"  Port: {health['port']}")
print(f"  Collections: {health.get('collections', [])}")

assert health['status'] == 'healthy', "‚ùå Qdrant health check failed"
assert health['connected'] is True, "‚ùå Qdrant not connected"

print("\n‚úÖ Test 1 passed: Client initialization and health check successful")

## Test 2: Collection Schema Information

In [None]:
# Get schema information
schema_info = CollectionSchema.get_schema_info()
print("üìã Collection Schema:")
print(f"  Collection Name: {schema_info['collection_name']}")
print(f"  Vector Size: {schema_info['vector_size']}")
print(f"  Distance Metric: {schema_info['distance_metric']}")
print(f"\n  Payload Schema:")
for field, field_type in schema_info['payload_schema'].items():
    print(f"    - {field}: {field_type}")
print(f"\n  Payload Indexes:")
for idx in schema_info['payload_indexes']:
    print(f"    - {idx['field']}: {idx['type']}")

assert schema_info['collection_name'] == 'research_articles'
assert schema_info['vector_size'] == 1536
assert schema_info['distance_metric'] == 'Cosine'

print("\n‚úÖ Test 2 passed: Schema information retrieved successfully")

## Test 3: Collection Creation

In [None]:
# Check if collection exists (should not exist initially)
collection_name = CollectionSchema.COLLECTION_NAME
exists_before = client.collection_exists(collection_name)
print(f"Collection '{collection_name}' exists before creation: {exists_before}")

# If exists, delete it for clean test
if exists_before:
    print(f"Deleting existing collection...")
    client.delete_collection(collection_name)
    exists_after_delete = client.collection_exists(collection_name)
    print(f"Collection exists after deletion: {exists_after_delete}")
    assert not exists_after_delete, "‚ùå Failed to delete collection"

# Create collection
print(f"\nCreating collection '{collection_name}'...")
success = setup_collection(client, recreate=False)
assert success, "‚ùå Failed to create collection"

# Verify collection exists
exists_after = client.collection_exists(collection_name)
print(f"Collection exists after creation: {exists_after}")
assert exists_after, "‚ùå Collection was not created"

print("\n‚úÖ Test 3 passed: Collection created successfully")

## Test 4: Collection Information & Schema Verification

In [None]:
# Get collection information
info = client.get_collection_info(collection_name)
print("üìä Collection Information:")
print(f"  Name: {info['name']}")
print(f"  Vector Size: {info['vector_size']}")
print(f"  Points Count: {info['points_count']}")
print(f"  Status: {info['status']}")
print(f"  Optimizer Status: {info['optimizer_status']}")

assert info['vector_size'] == 1536, "‚ùå Vector size mismatch"
assert info['points_count'] == 0, "‚ùå Collection should be empty"

# Verify schema
print("\nüîç Verifying Schema...")
verification = verify_collection_schema(client)
print(f"  Exists: {verification['exists']}")
print(f"  Schema Valid: {verification['schema_valid']}")
if verification['errors']:
    print(f"  Errors: {verification['errors']}")

assert verification['exists'], "‚ùå Collection does not exist"
assert verification['schema_valid'], f"‚ùå Schema validation failed: {verification['errors']}"

print("\n‚úÖ Test 4 passed: Collection info and schema verification successful")

## Test 5: Collection Recreation

In [None]:
# Recreate collection
print(f"Recreating collection '{collection_name}'...")
success = client.recreate_collection(collection_name)
assert success, "‚ùå Failed to recreate collection"

# Verify it exists and is empty
exists = client.collection_exists(collection_name)
info = client.get_collection_info(collection_name)

print(f"  Collection exists: {exists}")
print(f"  Points count: {info['points_count']}")

assert exists, "‚ùå Collection does not exist after recreation"
assert info['points_count'] == 0, "‚ùå Collection should be empty after recreation"

print("\n‚úÖ Test 5 passed: Collection recreation successful")

## Test 6: Full Initialization (End-to-End)

In [None]:
# Test the complete initialization flow
print("üöÄ Running full vector DB initialization...\n")
success = initialize_vector_db(recreate=True)
assert success, "‚ùå Vector DB initialization failed"

# Verify final state
final_health = client.health_check()
final_verification = verify_collection_schema(client)
final_info = client.get_collection_info(collection_name)

print("\nüìä Final State:")
print(f"  Server Health: {final_health['status']}")
print(f"  Collections: {final_health.get('collections', [])}")
print(f"  Schema Valid: {final_verification['schema_valid']}")
print(f"  Vector Size: {final_info['vector_size']}")
print(f"  Points Count: {final_info['points_count']}")

assert final_health['status'] == 'healthy'
assert collection_name in final_health['collections']
assert final_verification['schema_valid']

print("\n‚úÖ Test 6 passed: Full initialization successful")

## Test 7: Global Client Singleton

In [None]:
# Test global client instance
global_client1 = get_qdrant_client()
global_client2 = get_qdrant_client()

print(f"Global Client 1: {global_client1}")
print(f"Global Client 2: {global_client2}")
print(f"\nAre they the same instance? {global_client1 is global_client2}")

assert global_client1 is global_client2, "‚ùå Global client is not a singleton"

# Verify it works
health = global_client1.health_check()
assert health['status'] == 'healthy', "‚ùå Global client health check failed"

print("\n‚úÖ Test 7 passed: Global client singleton working correctly")

## Test 8: Context Manager

In [None]:
# Test context manager usage
print("Testing context manager...")
with QdrantClientWrapper() as ctx_client:
    health = ctx_client.health_check()
    print(f"  Health status: {health['status']}")
    assert health['status'] == 'healthy'

print("Context manager exited successfully")
print("\n‚úÖ Test 8 passed: Context manager working correctly")

## Summary

In [None]:
print("\n" + "="*60)
print("üéâ CHECKPOINT 1 - ALL TESTS PASSED! üéâ")
print("="*60)
print("\n‚úÖ Qdrant Client Wrapper Implementation")
print("‚úÖ Collection Schema Definition")
print("‚úÖ Collection Creation & Management")
print("‚úÖ Health Checks & Verification")
print("‚úÖ Payload Indexes Created")
print("‚úÖ Full Initialization Pipeline")
print("‚úÖ Global Singleton Pattern")
print("‚úÖ Context Manager Support")
print("\nüöÄ Ready to proceed to Checkpoint 2: Embedding Pipeline")
print("="*60)