In [None]:
# Cell 3: Final Production-Ready Models with Annotated Types

from pydantic import BaseModel, Field, field_validator, model_validator, ConfigDict, StringConstraints
from typing import Optional, List, Dict, Any, Annotated, Union
from datetime import datetime, date, timedelta, timezone
from decimal import Decimal
import re
import unicodedata

# ===== ANNOTATED TYPE DEFINITIONS =====

# Transaction ID with all constraints
TransactionId = Annotated[
    str,
    StringConstraints(
        min_length=16,
        max_length=16,
        pattern=r'^TP[A-Z]{2}\d{12}$',
        strip_whitespace=True,
        to_upper=True
    ),
    Field(
        description="Transaction ID in format TP[A-Z]{2}[0-9]{12}",
        examples=["TPAB123456789012", "TPXY987654321098"]
    )
]

# Country and Currency codes
CountryCode = Annotated[
    str,
    StringConstraints(min_length=2, max_length=3, strip_whitespace=True, to_upper=True),
    Field(description="ISO country code", examples=["US", "IN", "UK"])
]

CurrencyCode = Annotated[
    str,
    StringConstraints(min_length=3, max_length=3, strip_whitespace=True, to_upper=True),
    Field(description="ISO currency code", examples=["USD", "INR", "GBP"])
]

# Mandatory string fields
MandatoryString = Annotated[
    str,
    StringConstraints(min_length=1, strip_whitespace=True),
    Field(description="Non-empty string field")
]

# Amount field
Amount = Annotated[
    float,
    Field(gt=0, le=10_000_000, description="Transaction amount", examples=[50000.00, 100000.50])
]

# Date fields
DateFilter = Annotated[
    date,
    Field(description="Date in YYYY-MM-DD format", examples=["2025-06-08"])
]

# ===== CONFIGURATION =====

class QueryConfig:
    DEFAULT_DAYS_BACK = 30
    MAX_DAYS_BACK = 90
    MAX_BATCH_SIZE = 20
    DEFAULT_TIMEOUT_SECONDS = 30
    MAX_AMOUNT = 10_000_000

# ===== VALIDATION FUNCTIONS =====

def deep_validate_transaction_id(v: str) -> str:
    """Enhanced transaction ID validation with security checks"""
    if not v:
        raise ValueError("Transaction ID cannot be empty")
    
    # Normalize Unicode (handle lookalikes)
    v = unicodedata.normalize('NFKC', v)
    
    # Remove ALL whitespace and convert to upper
    v = ''.join(v.split()).upper()
    
    # Security checks
    dangerous_patterns = ["'", '"', ';', '--', '/*', '*/', '\x00', '\n', '\r', '\t']
    if any(char in v for char in dangerous_patterns):
        raise ValueError(f"Transaction ID contains potentially dangerous characters")
    
    # Final format check
    if not re.match(r'^TP[A-Z]{2}\d{12}$', v):
        raise ValueError(
            f"Invalid format. Expected: TP + 2 letters + 12 digits, got: '{v}'"
        )
    
    return v

# ===== REQUEST MODELS =====

class TransactionIdRequest(BaseModel):
    """Single transaction lookup request"""
    model_config = ConfigDict(validate_assignment=True)
    
    transaction_id: TransactionId
    start_date: Optional[DateFilter] = Field(
        default_factory=lambda: date.today() - timedelta(days=QueryConfig.DEFAULT_DAYS_BACK)
    )
    end_date: Optional[DateFilter] = Field(
        default_factory=date.today
    )
    search_all_dates: Annotated[
        bool,
        Field(
            default=False,
            description="Override date range filter (impacts performance)",
            examples=[False, True]
        )
    ]
    
    @field_validator('transaction_id')
    @classmethod
    def extra_validation(cls, v: str) -> str:
        return deep_validate_transaction_id(v)
    
    @model_validator(mode='after')
    def validate_request_logic(self):
        """Validate request parameters"""
        if not self.search_all_dates:
            # Date range validation
            if self.start_date > self.end_date:
                raise ValueError("start_date cannot be after end_date")
            
            # Prevent too old queries
            max_allowed_start = date.today() - timedelta(days=QueryConfig.MAX_DAYS_BACK)
            if self.start_date < max_allowed_start:
                raise ValueError(
                    f"start_date older than {QueryConfig.MAX_DAYS_BACK} days. "
                    f"Set search_all_dates=true to override."
                )
        
        return self

class BatchTransactionRequest(BaseModel):
    """Batch transaction lookup request"""
    model_config = ConfigDict(validate_assignment=True)
    
    transaction_ids: Annotated[
        List[TransactionId],
        Field(
            min_length=1,
            max_length=QueryConfig.MAX_BATCH_SIZE,
            description=f"List of transaction IDs (max {QueryConfig.MAX_BATCH_SIZE})",
            examples=[["TPAB123456789012", "TPCD234567890123"]]
        )
    ]
    start_date: Optional[DateFilter] = Field(
        default_factory=lambda: date.today() - timedelta(days=QueryConfig.DEFAULT_DAYS_BACK)
    )
    end_date: Optional[DateFilter] = Field(
        default_factory=date.today
    )
    search_all_dates: bool = Field(default=False)
    include_warnings: Annotated[
        bool,
        Field(default=True, description="Include data quality warnings")
    ]
    timeout_seconds: Annotated[
        int,
        Field(
            default=QueryConfig.DEFAULT_TIMEOUT_SECONDS,
            ge=5,
            le=60,
            description="Query timeout in seconds"
        )
    ]
    
    @field_validator('transaction_ids')
    @classmethod
    def validate_batch_ids(cls, v: List[str]) -> List[str]:
        """Validate and normalize all IDs"""
        validated = []
        for i, txn_id in enumerate(v, 1):
            try:
                validated.append(deep_validate_transaction_id(txn_id))
            except ValueError as e:
                raise ValueError(f"Invalid ID at position {i}: {e}")
        
        # Check for duplicates AFTER normalization
        unique_ids = set(validated)
        if len(unique_ids) != len(validated):
            dupes = [id for id in validated if validated.count(id) > 1]
            raise ValueError(f"Duplicate IDs found: {set(dupes)}")
        
        return validated

# ===== RESPONSE MODELS =====

class TransactionResponse(BaseModel):
    """Transaction details response"""
    model_config = ConfigDict(
        populate_by_name=True,
        str_strip_whitespace=True,
        validate_assignment=True
    )
    
    # Mandatory fields - with proper aliases
    transaction_id: Annotated[str, Field(alias="hub_transaction_id")]
    response_code: MandatoryString
    response_message: MandatoryString
    
    # Conditionally mandatory
    credited_on: Annotated[
        Optional[datetime],
        Field(
            None,
            alias="modified_on",
            description="Credit timestamp (mandatory for success)",
            examples=["2025-05-24T10:35:00Z"]
        )
    ]
    
    # Optional fields
    created_on: Annotated[
        Optional[datetime],
        Field(None, alias="transaction_date_time_local")
    ]
    source_country_code: Optional[CountryCode] = None
    destination_country_code: Optional[CountryCode] = None
    source_currency_code: Optional[CurrencyCode] = None
    destination_currency_code: Optional[CurrencyCode] = None
    total_amount_destination: Optional[Amount] = None
    sender_name: Annotated[
        Optional[str],
        Field(None, max_length=500)
    ]
    receiver_name: Annotated[
        Optional[str],
        Field(None, max_length=500)
    ]
    source_partner_id: Optional[str] = None
    dest_partner_id: Optional[str] = None
    source_partner_name: Optional[str] = None
    dest_partner_name: Optional[str] = None
    
    # Quality tracking
    _warnings: List[str] = []
    
    @field_validator('transaction_id')
    @classmethod
    def validate_txn_id_response(cls, v: str) -> str:
        return deep_validate_transaction_id(v)
    
    @field_validator('response_code', 'response_message')
    @classmethod
    def ensure_not_empty(cls, v: Optional[str]) -> str:
        if not v or not v.strip():
            raise ValueError("Cannot be empty or null")
        return v.strip()
    
    @field_validator('total_amount_destination')
    @classmethod
    def validate_amount_field(cls, v: Optional[Union[float, Decimal]]) -> Optional[float]:
        if v is None:
            return None
        
        # Handle Decimal
        v = float(v) if isinstance(v, Decimal) else v
        
        # Validate range
        if v <= 0:
            raise ValueError("Amount must be positive")
        if v > QueryConfig.MAX_AMOUNT:
            raise ValueError(f"Amount exceeds maximum: {QueryConfig.MAX_AMOUNT}")
        
        return round(v, 2)
    
    @field_validator('created_on', 'credited_on')
    @classmethod
    def validate_datetime_fields(cls, v: Optional[datetime]) -> Optional[datetime]:
        if v is None:
            return None
        
        # Sanity check
        if v.year < 2020 or v.year > date.today().year + 1:
            raise ValueError(f"Unrealistic date: {v}")
        
        # Ensure timezone aware
        if v.tzinfo is None:
            v = v.replace(tzinfo=timezone.utc)
        
        return v
    
    @model_validator(mode='after')
    def final_business_validation(self):
        """Business rule validation with warnings"""
        warnings = []
        
        # Timestamp consistency
        if self.created_on and self.credited_on:
            if self.credited_on < self.created_on:
                warnings.append("credited_on is before created_on")
        
        # Success must have credited_on
        if "Remit Success" in self.response_message and not self.credited_on:
            warnings.append("Success transaction missing credited_on")
        
        # Failed/Delayed should have detailed message
        if any(s in self.response_message for s in ["Remit Failed", "Remit Delayed"]):
            if len(self.response_message) < 15:  # "Remit Failed" is 12 chars
                warnings.append("Failed/Delayed transaction has generic message")
        
        # In-progress detection
        progress_indicators = ['pending', 'processing', 'in progress', 'initiated']
        if any(ind in self.response_message.lower() for ind in progress_indicators):
            warnings.append("Transaction may still be in progress")
        
        self._warnings = warnings
        return self
    
    @property
    def has_warnings(self) -> bool:
        return len(self._warnings) > 0
    
    @property
    def warnings(self) -> List[str]:
        return self._warnings.copy()

# ===== API RESPONSE WRAPPERS =====

class SingleTransactionResponse(BaseModel):
    """API response for single transaction"""
    success: bool
    data: Optional[TransactionResponse] = None
    error: Optional[str] = None
    processing_time_ms: Annotated[float, Field(ge=0)]
    query_metadata: Dict[str, Any] = Field(
        default_factory=dict,
        examples=[{"date_range": {"start": "2025-05-08", "end": "2025-06-08"}}]
    )

class BatchTransactionResponse(BaseModel):
    """API response for batch transactions"""
    success: bool
    summary: Annotated[
        Dict[str, int],
        Field(examples=[{"requested": 5, "found": 3, "not_found": 2, "errors": 0}])
    ]
    data: Dict[str, TransactionResponse] = Field(default_factory=dict)
    errors: Dict[str, str] = Field(default_factory=dict)
    warnings: Dict[str, List[str]] = Field(
        default_factory=dict,
        description="Per-transaction warnings"
    )
    processing_time_ms: Annotated[float, Field(ge=0)]
    query_metadata: Dict[str, Any] = Field(default_factory=dict)

# ===== FINAL VALIDATION TESTS =====

print("Running comprehensive validation tests...")

tests = [
    # Valid cases
    ("TPAB123456789012", True, "Valid ID"),
    ("tpab123456789012", True, "Lowercase conversion"),
    ("  TPAB123456789012  ", True, "Whitespace handling"),
    
    # Invalid cases
    ("TPAB12345678901", False, "Too short"),
    ("TPAB1234567890123", False, "Too long"),
    ("ABCD123456789012", False, "Wrong prefix"),
    ("TP12123456789012", False, "Numbers in letter position"),
    ("TPAB12345678901A", False, "Letter in number position"),
    ("TP'; DROP TABLE--", False, "SQL injection attempt"),
    ("ΤPAB123456789012", False, "Unicode lookalike"),
    ("", False, "Empty string"),
    ("TP\x00AB123456789012", False, "Null byte injection"),
]

for test_id, should_pass, description in tests:
    try:
        validated = deep_validate_transaction_id(test_id)
        if should_pass:
            print(f"✅ {description}: '{test_id}' → '{validated}'")
        else:
            print(f"❌ {description}: Should have failed but passed!")
    except ValueError as e:
        if not should_pass:
            print(f"✅ {description}: Correctly rejected - {e}")
        else:
            print(f"❌ {description}: Should have passed but failed - {e}")

print("\n✅ Final models ready for production use!")

In [None]:
# Cell 4: Database Connection and Query Helpers

import time
from typing import Dict, List, Optional, Any, Tuple
from sqlalchemy import text, create_engine, pool
from sqlalchemy.engine import Engine
from contextlib import contextmanager
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Database configuration
DATABASE_CONFIG = {
    "pool_size": 5,
    "max_overflow": 10,
    "pool_timeout": 30,
    "pool_recycle": 3600,  # Recycle connections after 1 hour
    "echo": False  # Set to True for SQL debugging
}

# Create database engine with connection pooling
def create_db_engine(database_url: str) -> Engine:
    """Create SQLAlchemy engine with production-ready configuration"""
    return create_engine(
        database_url,
        poolclass=pool.QueuePool,
        pool_size=DATABASE_CONFIG["pool_size"],
        max_overflow=DATABASE_CONFIG["max_overflow"],
        pool_timeout=DATABASE_CONFIG["pool_timeout"],
        pool_recycle=DATABASE_CONFIG["pool_recycle"],
        echo=DATABASE_CONFIG["echo"]
    )

# Initialize engine (will be set after connection test)
engine = None

# Database connection context manager
@contextmanager
def get_db_connection():
    """Get database connection with automatic cleanup"""
    conn = None
    try:
        conn = engine.connect()
        yield conn
    except Exception as e:
        logger.error(f"Database connection error: {e}")
        raise
    finally:
        if conn:
            conn.close()

# Query performance tracker
class QueryTimer:
    """Track query execution time"""
    def __init__(self):
        self.start_time = None
        self.end_time = None
    
    def __enter__(self):
        self.start_time = time.time()
        return self
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.end_time = time.time()
    
    @property
    def elapsed_ms(self) -> float:
        if self.start_time and self.end_time:
            return (self.end_time - self.start_time) * 1000
        return 0.0

# SQL query templates
SINGLE_TRANSACTION_QUERY = """
SELECT 
    t.hub_transaction_id,
    t.response_code,
    t.response_message,
    t.modified_on,  -- credited_on
    t.transaction_date_time_local,  -- created_on
    t.source_country_code,
    t.destination_country_code,
    t.source_currency_code,
    t.destination_currency_code,
    t.total_amount_destination,
    t.sender_name,
    t.receiver_name,
    t.source_partner_id,
    t.dest_partner_id,
    sp.partner_name as source_partner_name,
    dp.dest_partner_name as dest_partner_name
FROM iox_hub_transaction t
LEFT JOIN source_partner sp ON t.source_partner_id = sp.source_partner_id
LEFT JOIN dest_partner dp ON t.dest_partner_id = dp.dest_partner_id
WHERE t.hub_transaction_id = :transaction_id
"""

# Add date filter clause
DATE_FILTER_CLAUSE = """
AND t.transaction_date_time_local >= :start_date
AND t.transaction_date_time_local <= :end_date
"""

# Batch query template
BATCH_TRANSACTION_QUERY = """
SELECT 
    t.hub_transaction_id,
    t.response_code,
    t.response_message,
    t.modified_on,
    t.transaction_date_time_local,
    t.source_country_code,
    t.destination_country_code,
    t.source_currency_code,
    t.destination_currency_code,
    t.total_amount_destination,
    t.sender_name,
    t.receiver_name,
    t.source_partner_id,
    t.dest_partner_id,
    sp.partner_name as source_partner_name,
    dp.dest_partner_name as dest_partner_name
FROM iox_hub_transaction t
LEFT JOIN source_partner sp ON t.source_partner_id = sp.source_partner_id
LEFT JOIN dest_partner dp ON t.dest_partner_id = dp.dest_partner_id
WHERE t.hub_transaction_id = ANY(:transaction_ids)
"""

# Test database connection
def test_database_connection(database_url: str) -> bool:
    """Test database connection and verify table structure"""
    global engine
    try:
        # Create engine
        engine = create_db_engine(database_url)
        
        with get_db_connection() as conn:
            # Test basic connectivity
            result = conn.execute(text("SELECT 1"))
            logger.info("✓ Database connection successful")
            
            # Verify table exists
            table_check = conn.execute(text("""
                SELECT column_name, data_type 
                FROM information_schema.columns 
                WHERE table_name = 'iox_hub_transaction' 
                AND column_name IN (
                    'hub_transaction_id', 'response_code', 'response_message',
                    'modified_on', 'transaction_date_time_local'
                )
                ORDER BY ordinal_position
            """))
            
            columns = list(table_check)
            logger.info(f"✓ Found {len(columns)} required columns in iox_hub_transaction")
            
            # Get a sample transaction to verify data
            sample_check = conn.execute(text("""
                SELECT hub_transaction_id, response_message 
                FROM iox_hub_transaction 
                LIMIT 1
            """))
            
            sample = sample_check.fetchone()
            if sample:
                logger.info(f"✓ Sample transaction found: {sample[0]}")
            
            return True
            
    except Exception as e:
        logger.error(f"✗ Database connection failed: {e}")
        return False

# Initialize database connection
if __name__ == "__main__":
    DATABASE_URL = "postgresql://rr_replic_cp:rr_replic_cp!@!$%@127.0.0.1:54326/terra_core_db"
    
    if test_database_connection(DATABASE_URL):
        print("\n✅ Database setup complete! Ready for queries.")
    else:
        print("\n❌ Database setup failed. Please check connection details.")

In [None]:
# Cell 5: Single Transaction Query Implementation

from datetime import datetime, date
from typing import Optional, Dict, Any
import traceback

def query_single_transaction(
    transaction_id: str,
    start_date: Optional[date] = None,
    end_date: Optional[date] = None,
    search_all_dates: bool = False
) -> Tuple[Optional[Dict[str, Any]], Optional[str], float]:
    """
    Query single transaction from database
    
    Returns:
        Tuple of (transaction_data, error_message, query_time_ms)
    """
    query_timer = QueryTimer()
    
    try:
        with query_timer:
            with get_db_connection() as conn:
                # Build query based on date filter preference
                if search_all_dates:
                    query = SINGLE_TRANSACTION_QUERY
                    params = {"transaction_id": transaction_id}
                else:
                    query = SINGLE_TRANSACTION_QUERY + DATE_FILTER_CLAUSE
                    params = {
                        "transaction_id": transaction_id,
                        "start_date": start_date,
                        "end_date": end_date
                    }
                
                # Execute query
                result = conn.execute(text(query), params)
                row = result.fetchone()
                
                if not row:
                    return None, "Transaction not found", query_timer.elapsed_ms
                
                # Convert row to dictionary
                transaction_data = dict(row._mapping)
                
                # Validate critical fields aren't NULL
                if not transaction_data.get('response_code'):
                    return None, "Transaction has corrupted data - response_code is NULL", query_timer.elapsed_ms
                
                if not transaction_data.get('response_message'):
                    return None, "Transaction has corrupted data - response_message is NULL", query_timer.elapsed_ms
                
                return transaction_data, None, query_timer.elapsed_ms
                
    except Exception as e:
        error_msg = f"Database error: {str(e)}"
        logger.error(f"Query error for {transaction_id}: {error_msg}")
        logger.error(traceback.format_exc())
        return None, error_msg, query_timer.elapsed_ms

def process_single_transaction_request(request: TransactionIdRequest) -> SingleTransactionResponse:
    """
    Process single transaction request and return formatted response
    """
    start_time = time.time()
    
    try:
        # Query database
        transaction_data, error_msg, db_time = query_single_transaction(
            transaction_id=request.transaction_id,
            start_date=request.start_date,
            end_date=request.end_date,
            search_all_dates=request.search_all_dates
        )
        
        # Build metadata
        query_metadata = {
            "date_range": {
                "start": str(request.start_date),
                "end": str(request.end_date)
            },
            "search_all_dates": request.search_all_dates,
            "database_response_time_ms": db_time
        }
        
        if error_msg:
            # Error case
            return SingleTransactionResponse(
                success=False,
                data=None,
                error=error_msg,
                processing_time_ms=(time.time() - start_time) * 1000,
                query_metadata=query_metadata
            )
        
        if not transaction_data:
            # Not found case
            return SingleTransactionResponse(
                success=True,
                data=None,
                error="Transaction not found",
                processing_time_ms=(time.time() - start_time) * 1000,
                query_metadata=query_metadata
            )
        
        # Success case - create response model
        try:
            transaction_response = TransactionResponse(**transaction_data)
            
            # Add warnings to metadata if any
            if transaction_response.has_warnings:
                query_metadata["warnings"] = transaction_response.warnings
            
            return SingleTransactionResponse(
                success=True,
                data=transaction_response,
                error=None,
                processing_time_ms=(time.time() - start_time) * 1000,
                query_metadata=query_metadata
            )
            
        except Exception as e:
            # Data validation error
            logger.error(f"Response validation error: {e}")
            return SingleTransactionResponse(
                success=False,
                data=None,
                error=f"Data validation error: {str(e)}",
                processing_time_ms=(time.time() - start_time) * 1000,
                query_metadata=query_metadata
            )
            
    except Exception as e:
        # Unexpected error
        logger.error(f"Unexpected error: {e}")
        logger.error(traceback.format_exc())
        return SingleTransactionResponse(
            success=False,
            data=None,
            error=f"Internal error: {str(e)}",
            processing_time_ms=(time.time() - start_time) * 1000,
            query_metadata={}
        )

# Test function
def test_single_transaction_query():
    """Test single transaction query with various scenarios"""
    print("Testing single transaction queries...\n")
    
    # Test 1: Valid transaction (you'll need to use a real ID from your database)
    test_cases = [
        {
            "name": "Valid transaction with date range",
            "request": TransactionIdRequest(
                transaction_id="TPAB123456789012",  # Replace with actual ID
                search_all_dates=False
            )
        },
        {
            "name": "Valid transaction - all dates",
            "request": TransactionIdRequest(
                transaction_id="TPAB123456789012",  # Replace with actual ID
                search_all_dates=True
            )
        },
        {
            "name": "Non-existent transaction",
            "request": TransactionIdRequest(
                transaction_id="TPZZ999999999999"
            )
        }
    ]
    
    for test in test_cases:
        print(f"Test: {test['name']}")
        print("-" * 50)
        
        response = process_single_transaction_request(test['request'])
        
        print(f"Success: {response.success}")
        print(f"Processing time: {response.processing_time_ms:.2f}ms")
        
        if response.data:
            print(f"Transaction found: {response.data.transaction_id}")
            print(f"Status: {response.data.response_message}")
            print(f"Response code: {response.data.response_code}")
            if response.data.credited_on:
                print(f"Credited on: {response.data.credited_on}")
        
        if response.error:
            print(f"Error: {response.error}")
        
        print(f"Metadata: {response.query_metadata}")
        print("\n")

# Run test if needed
# test_single_transaction_query()

In [None]:
# Cell 6: Batch Transaction Query Implementation

from typing import List, Dict, Set
import asyncio
from concurrent.futures import ThreadPoolExecutor, TimeoutError

def query_batch_transactions(
    transaction_ids: List[str],
    start_date: Optional[date] = None,
    end_date: Optional[date] = None,
    search_all_dates: bool = False,
    timeout_seconds: int = 30
) -> Tuple[Dict[str, Dict], Dict[str, str], float]:
    """
    Query multiple transactions in a single database call
    
    Returns:
        Tuple of (transactions_dict, errors_dict, query_time_ms)
    """
    query_timer = QueryTimer()
    transactions_dict = {}
    errors_dict = {}
    
    try:
        with query_timer:
            with get_db_connection() as conn:
                # Build query
                if search_all_dates:
                    query = BATCH_TRANSACTION_QUERY
                    params = {"transaction_ids": transaction_ids}
                else:
                    query = BATCH_TRANSACTION_QUERY + DATE_FILTER_CLAUSE
                    params = {
                        "transaction_ids": transaction_ids,
                        "start_date": start_date,
                        "end_date": end_date
                    }
                
                # Execute with timeout
                result = conn.execute(text(query), params)
                rows = result.fetchall()
                
                # Process found transactions
                found_ids = set()
                for row in rows:
                    try:
                        transaction_data = dict(row._mapping)
                        txn_id = transaction_data['hub_transaction_id']
                        
                        # Validate critical fields
                        if not transaction_data.get('response_code'):
                            errors_dict[txn_id] = "Transaction has corrupted data - response_code is NULL"
                            continue
                        
                        if not transaction_data.get('response_message'):
                            errors_dict[txn_id] = "Transaction has corrupted data - response_message is NULL"
                            continue
                        
                        transactions_dict[txn_id] = transaction_data
                        found_ids.add(txn_id)
                        
                    except Exception as e:
                        txn_id = row[0] if row else "Unknown"
                        errors_dict[txn_id] = f"Error processing transaction: {str(e)}"
                
                # Identify not found transactions
                requested_ids = set(transaction_ids)
                not_found_ids = requested_ids - found_ids - set(errors_dict.keys())
                
                for txn_id in not_found_ids:
                    errors_dict[txn_id] = "Transaction not found"
                
                return transactions_dict, errors_dict, query_timer.elapsed_ms
                
    except Exception as e:
        error_msg = f"Database error: {str(e)}"
        logger.error(f"Batch query error: {error_msg}")
        logger.error(traceback.format_exc())
        
        # Return all as errors
        for txn_id in transaction_ids:
            if txn_id not in errors_dict:
                errors_dict[txn_id] = error_msg
        
        return transactions_dict, errors_dict, query_timer.elapsed_ms

def process_batch_transaction_request(request: BatchTransactionRequest) -> BatchTransactionResponse:
    """
    Process batch transaction request with timeout and validation
    """
    start_time = time.time()
    
    # Initialize response containers
    validated_transactions = {}
    all_errors = {}
    all_warnings = {}
    
    try:
        # Query database with timeout
        with ThreadPoolExecutor(max_workers=1) as executor:
            future = executor.submit(
                query_batch_transactions,
                transaction_ids=request.transaction_ids,
                start_date=request.start_date,
                end_date=request.end_date,
                search_all_dates=request.search_all_dates,
                timeout_seconds=request.timeout_seconds
            )
            
            try:
                transactions_dict, errors_dict, db_time = future.result(
                    timeout=request.timeout_seconds
                )
            except TimeoutError:
                # Timeout occurred
                for txn_id in request.transaction_ids:
                    all_errors[txn_id] = "Query timeout exceeded"
                
                return BatchTransactionResponse(
                    success=False,
                    summary={
                        "requested": len(request.transaction_ids),
                        "found": 0,
                        "not_found": 0,
                        "errors": len(request.transaction_ids)
                    },
                    data={},
                    errors=all_errors,
                    warnings={},
                    processing_time_ms=(time.time() - start_time) * 1000,
                    query_metadata={
                        "timeout_occurred": True,
                        "timeout_seconds": request.timeout_seconds
                    }
                )
        
        # Process and validate each transaction
        for txn_id, txn_data in transactions_dict.items():
            try:
                # Create response model
                transaction_response = TransactionResponse(**txn_data)
                validated_transactions[txn_id] = transaction_response
                
                # Collect warnings if requested
                if request.include_warnings and transaction_response.has_warnings:
                    all_warnings[txn_id] = transaction_response.warnings
                    
            except Exception as e:
                # Validation error
                all_errors[txn_id] = f"Data validation error: {str(e)}"
        
        # Add query errors
        all_errors.update(errors_dict)
        
        # Calculate summary
        summary = {
            "requested": len(request.transaction_ids),
            "found": len(validated_transactions),
            "not_found": len([e for e in all_errors.values() if e == "Transaction not found"]),
            "errors": len([e for e in all_errors.values() if e != "Transaction not found"])
        }
        
        # Build metadata
        query_metadata = {
            "date_range": {
                "start": str(request.start_date),
                "end": str(request.end_date)
            },
            "search_all_dates": request.search_all_dates,
            "database_response_time_ms": db_time,
            "include_warnings": request.include_warnings
        }
        
        # Determine overall success
        success = len(all_errors) == 0 or all(
            error == "Transaction not found" for error in all_errors.values()
        )
        
        return BatchTransactionResponse(
            success=success,
            summary=summary,
            data=validated_transactions,
            errors=all_errors,
            warnings=all_warnings,
            processing_time_ms=(time.time() - start_time) * 1000,
            query_metadata=query_metadata
        )
        
    except Exception as e:
        # Unexpected error
        logger.error(f"Batch processing error: {e}")
        logger.error(traceback.format_exc())
        
        return BatchTransactionResponse(
            success=False,
            summary={
                "requested": len(request.transaction_ids),
                "found": 0,
                "not_found": 0,
                "errors": len(request.transaction_ids)
            },
            data={},
            errors={txn_id: f"Internal error: {str(e)}" for txn_id in request.transaction_ids},
            warnings={},
            processing_time_ms=(time.time() - start_time) * 1000,
            query_metadata={"error": str(e)}
        )

# Test function for batch queries
def test_batch_transaction_query():
    """Test batch transaction queries with various scenarios"""
    print("Testing batch transaction queries...\n")
    
    # Test scenarios
    test_cases = [
        {
            "name": "Small batch - all valid",
            "request": BatchTransactionRequest(
                transaction_ids=[
                    "TPAB123456789012",  # Replace with actual IDs
                    "TPCD234567890123",
                    "TPEF345678901234"
                ]
            )
        },
        {
            "name": "Mixed batch - some not found",
            "request": BatchTransactionRequest(
                transaction_ids=[
                    "TPAB123456789012",  # Real ID
                    "TPZZ999999999999",  # Non-existent
                    "TPYY888888888888"   # Non-existent
                ]
            )
        },
        {
            "name": "Batch with duplicates",
            "request": BatchTransactionRequest(
                transaction_ids=[
                    "TPAB123456789012",
                    "TPAB123456789012",  # Duplicate
                    "TPCD234567890123"
                ]
            )
        },
        {
            "name": "Maximum batch size",
            "request": BatchTransactionRequest(
                transaction_ids=[f"TP{chr(65+i//10)}{chr(65+i%10)}{str(i).zfill(12)}" 
                               for i in range(20)]
            )
        }
    ]
    
    for test in test_cases:
        try:
            print(f"Test: {test['name']}")
            print("-" * 70)
            
            response = process_batch_transaction_request(test['request'])
            
            print(f"Success: {response.success}")
            print(f"Summary: {response.summary}")
            print(f"Processing time: {response.processing_time_ms:.2f}ms")
            
            if response.data:
                print(f"\nFound transactions ({len(response.data)}):")
                for txn_id, txn in list(response.data.items())[:3]:  # Show first 3
                    print(f"  - {txn_id}: {txn.response_message}")
            
            if response.errors:
                print(f"\nErrors ({len(response.errors)}):")
                for txn_id, error in list(response.errors.items())[:3]:  # Show first 3
                    print(f"  - {txn_id}: {error}")
            
            if response.warnings:
                print(f"\nWarnings ({len(response.warnings)}):")
                for txn_id, warnings in list(response.warnings.items())[:3]:  # Show first 3
                    print(f"  - {txn_id}: {warnings}")
            
            print("\n")
            
        except Exception as e:
            print(f"Test failed with exception: {e}")
            print(traceback.format_exc())
            print("\n")

# Run test if needed
# test_batch_transaction_query()

In [None]:
# Cell 7: API Endpoint Simulation and Integration Testing

from typing import Union
import json
from datetime import datetime

# Simulate API endpoint functions
async def get_single_transaction(
    transaction_id: str,
    start_date: Optional[str] = None,
    end_date: Optional[str] = None,
    search_all_dates: bool = False
) -> Dict[str, Any]:
    """
    Simulate GET /api/v1/transactions/{transaction_id} endpoint
    """
    try:
        # Parse dates if provided
        parsed_start = datetime.strptime(start_date, "%Y-%m-%d").date() if start_date else None
        parsed_end = datetime.strptime(end_date, "%Y-%m-%d").date() if end_date else None
        
        # Create request model
        request = TransactionIdRequest(
            transaction_id=transaction_id,
            start_date=parsed_start,
            end_date=parsed_end,
            search_all_dates=search_all_dates
        )
        
        # Process request
        response = process_single_transaction_request(request)
        
        # Convert to JSON-serializable format
        return response.model_dump(mode='json')
        
    except ValueError as e:
        return {
            "success": False,
            "error": f"Validation error: {str(e)}",
            "processing_time_ms": 0,
            "query_metadata": {}
        }
    except Exception as e:
        return {
            "success": False,
            "error": f"Internal server error: {str(e)}",
            "processing_time_ms": 0,
            "query_metadata": {}
        }

async def post_batch_transactions(
    transaction_ids: List[str],
    start_date: Optional[str] = None,
    end_date: Optional[str] = None,
    search_all_dates: bool = False,
    include_warnings: bool = True,
    timeout_seconds: int = 30
) -> Dict[str, Any]:
    """
    Simulate POST /api/v1/transactions/batch endpoint
    """
    try:
        # Parse dates if provided
        parsed_start = datetime.strptime(start_date, "%Y-%m-%d").date() if start_date else None
        parsed_end = datetime.strptime(end_date, "%Y-%m-%d").date() if end_date else None
        
        # Create request model
        request = BatchTransactionRequest(
            transaction_ids=transaction_ids,
            start_date=parsed_start,
            end_date=parsed_end,
            search_all_dates=search_all_dates,
            include_warnings=include_warnings,
            timeout_seconds=timeout_seconds
        )
        
        # Process request
        response = process_batch_transaction_request(request)
        
        # Convert to JSON-serializable format
        return response.model_dump(mode='json')
        
    except ValueError as e:
        return {
            "success": False,
            "summary": {"requested": len(transaction_ids), "found": 0, "not_found": 0, "errors": len(transaction_ids)},
            "data": {},
            "errors": {tid: f"Validation error: {str(e)}" for tid in transaction_ids},
            "warnings": {},
            "processing_time_ms": 0,
            "query_metadata": {}
        }
    except Exception as e:
        return {
            "success": False,
            "summary": {"requested": len(transaction_ids), "found": 0, "not_found": 0, "errors": len(transaction_ids)},
            "data": {},
            "errors": {tid: f"Internal server error: {str(e)}" for tid in transaction_ids},
            "warnings": {},
            "processing_time_ms": 0,
            "query_metadata": {}
        }

# Integration test suite
class TransactionAPITester:
    """Comprehensive API testing suite"""
    
    def __init__(self):
        self.test_results = []
    
    async def run_all_tests(self):
        """Run all integration tests"""
        print("🧪 Running Transaction API Integration Tests\n")
        
        # Test 1: Single transaction - success case
        await self.test_single_transaction_success()
        
        # Test 2: Single transaction - not found
        await self.test_single_transaction_not_found()
        
        # Test 3: Single transaction - invalid format
        await self.test_single_transaction_invalid()
        
        # Test 4: Batch - all found
        await self.test_batch_all_found()
        
        # Test 5: Batch - mixed results
        await self.test_batch_mixed_results()
        
        # Test 6: Batch - edge cases
        await self.test_batch_edge_cases()
        
        # Test 7: Performance test
        await self.test_performance()
        
        # Print summary
        self.print_summary()
    
    async def test_single_transaction_success(self):
        """Test successful single transaction lookup"""
        print("Test 1: Single Transaction - Success Case")
        print("-" * 50)
        
        # Use a real transaction ID from your database
        result = await get_single_transaction(
            transaction_id="TPAB123456789012",  # Replace with actual ID
            search_all_dates=False
        )
        
        self.validate_single_response(result, "Single Success")
        print(json.dumps(result, indent=2, default=str))
        print("\n")
    
    async def test_single_transaction_not_found(self):
        """Test single transaction not found"""
        print("Test 2: Single Transaction - Not Found")
        print("-" * 50)
        
        result = await get_single_transaction(
            transaction_id="TPZZ999999999999"
        )
        
        assert result["success"] == True  # Request succeeded
        assert result["error"] == "Transaction not found"
        assert result["data"] is None
        
        print("✅ Correctly handled not found case")
        print(f"Error message: {result['error']}")
        print("\n")
    
    async def test_single_transaction_invalid(self):
        """Test invalid transaction ID format"""
        print("Test 3: Single Transaction - Invalid Format")
        print("-" * 50)
        
        test_cases = [
            ("short_id", "Too short ID"),
            ("ABCD123456789012", "Wrong prefix"),
            ("TP12123456789012", "Numbers in letter position"),
            ("TP'; DROP TABLE--", "SQL injection attempt")
        ]
        
        for invalid_id, description in test_cases:
            result = await get_single_transaction(transaction_id=invalid_id)
            
            assert result["success"] == False
            assert "Validation error" in result["error"]
            
            print(f"✅ {description}: {invalid_id}")
            print(f"   Error: {result['error']}")
        
        print("\n")
    
    async def test_batch_all_found(self):
        """Test batch with all transactions found"""
        print("Test 4: Batch - All Found")
        print("-" * 50)
        
        # Use real transaction IDs
        result = await post_batch_transactions(
            transaction_ids=[
                "TPAB123456789012",  # Replace with actual IDs
                "TPCD234567890123",
                "TPEF345678901234"
            ]
        )
        
        self.validate_batch_response(result, "Batch All Found")
        print(f"Summary: {result['summary']}")
        print(f"Processing time: {result['processing_time_ms']:.2f}ms")
        print("\n")
    
    async def test_batch_mixed_results(self):
        """Test batch with mixed results"""
        print("Test 5: Batch - Mixed Results")
        print("-" * 50)
        
        result = await post_batch_transactions(
            transaction_ids=[
                "TPAB123456789012",  # Real
                "TPZZ999999999999",  # Not found
                "TPYY888888888888"   # Not found
            ]
        )
        
        assert result["success"] == True  # Partial success is still success
        assert result["summary"]["found"] >= 1
        assert result["summary"]["not_found"] >= 2
        
        print(f"✅ Mixed results handled correctly")
        print(f"Summary: {result['summary']}")
        print(f"Errors: {list(result['errors'].keys())}")
        print("\n")
    
    async def test_batch_edge_cases(self):
        """Test batch edge cases"""
        print("Test 6: Batch - Edge Cases")
        print("-" * 50)
        
        # Test max batch size
        max_batch = [f"TP{chr(65+i//10)}{chr(65+i%10)}{str(i).zfill(12)}" 
                     for i in range(20)]
        
        result = await post_batch_transactions(transaction_ids=max_batch)
        
        assert result["summary"]["requested"] == 20
        print(f"✅ Max batch size (20): {result['summary']}")
        
        # Test exceeding max batch size
        too_large = max_batch + ["TPXX000000000000"]
        result = await post_batch_transactions(transaction_ids=too_large)
        
        assert result["success"] == False
        assert "Validation error" in str(result["errors"])
        print(f"✅ Correctly rejected batch > 20")
        
        # Test empty batch
        result = await post_batch_transactions(transaction_ids=[])
        assert result["success"] == False
        print(f"✅ Correctly rejected empty batch")
        
        print("\n")
    
    async def test_performance(self):
        """Test performance metrics"""
        print("Test 7: Performance Test")
        print("-" * 50)
        
        # Single transaction performance
        single_times = []
        for i in range(5):
            result = await get_single_transaction(
                transaction_id=f"TPAB{str(i).zfill(12)}"
            )
            single_times.append(result["processing_time_ms"])
        
        avg_single = sum(single_times) / len(single_times)
        print(f"Average single query time: {avg_single:.2f}ms")
        
        # Batch performance
        batch_result = await post_batch_transactions(
            transaction_ids=[f"TPAB{str(i).zfill(12)}" for i in range(10)]
        )
        
        print(f"Batch query time (10 items): {batch_result['processing_time_ms']:.2f}ms")
        print(f"Per-transaction time: {batch_result['processing_time_ms']/10:.2f}ms")
        
        print("\n")
    
    def validate_single_response(self, response: Dict, test_name: str):
        """Validate single transaction response structure"""
        required_fields = ["success", "processing_time_ms", "query_metadata"]
        for field in required_fields:
            assert field in response, f"{test_name}: Missing field {field}"
        
        if response["success"] and response.get("data"):
            data = response["data"]
            assert "transaction_id" in data
            assert "response_code" in data
            assert "response_message" in data
        
        self.test_results.append((test_name, "PASS"))
    
    def validate_batch_response(self, response: Dict, test_name: str):
        """Validate batch response structure"""
        required_fields = ["success", "summary", "data", "errors", "warnings", "processing_time_ms"]
        for field in required_fields:
            assert field in response, f"{test_name}: Missing field {field}"
        
        summary = response["summary"]
        assert summary["requested"] == summary["found"] + summary["not_found"] + summary["errors"]
        
        self.test_results.append((test_name, "PASS"))
    
    def print_summary(self):
        """Print test summary"""
        print("=" * 70)
        print("TEST SUMMARY")
        print("=" * 70)
        
        for test_name, status in self.test_results:
            print(f"{test_name}: {status}")
        
        total = len(self.test_results)
        passed = sum(1 for _, status in self.test_results if status == "PASS")
        
        print(f"\nTotal: {total}, Passed: {passed}, Failed: {total - passed}")
        print("=" * 70)

# Run integration tests
async def run_integration_tests():
    """Run all integration tests"""
    tester = TransactionAPITester()
    await tester.run_all_tests()

# For Jupyter notebook - use await directly
# await run_integration_tests()

# For regular Python script
if __name__ == "__main__":
    import asyncio
    asyncio.run(run_integration_tests())

In [None]:
# Cell 8: Performance Optimization and Caching Layer

from functools import lru_cache
from typing import Optional, Dict, Any, Tuple
import hashlib
import pickle
from datetime import datetime, timedelta
import json

# Simple in-memory cache implementation
class TransactionCache:
    """In-memory cache for transaction data with TTL"""
    
    def __init__(self, ttl_seconds: int = 300):  # 5 minutes default
        self.cache = {}
        self.ttl_seconds = ttl_seconds
        self.hits = 0
        self.misses = 0
        self.evictions = 0
    
    def _make_key(self, transaction_id: str, date_range: Optional[Tuple[date, date]] = None) -> str:
        """Create cache key from transaction ID and date range"""
        key_parts = [transaction_id]
        if date_range:
            key_parts.extend([str(date_range[0]), str(date_range[1])])
        
        key_str = "|".join(key_parts)
        return hashlib.md5(key_str.encode()).hexdigest()
    
    def get(self, transaction_id: str, date_range: Optional[Tuple[date, date]] = None) -> Optional[Dict]:
        """Get transaction from cache if not expired"""
        key = self._make_key(transaction_id, date_range)
        
        if key in self.cache:
            cached_data, cached_time = self.cache[key]
            
            # Check if expired
            if datetime.now() - cached_time < timedelta(seconds=self.ttl_seconds):
                self.hits += 1
                logger.debug(f"Cache hit for {transaction_id}")
                return cached_data
            else:
                # Expired - remove from cache
                del self.cache[key]
                self.evictions += 1
        
        self.misses += 1
        return None
    
    def set(self, transaction_id: str, data: Dict, date_range: Optional[Tuple[date, date]] = None):
        """Store transaction in cache"""
        key = self._make_key(transaction_id, date_range)
        self.cache[key] = (data, datetime.now())
        logger.debug(f"Cached transaction {transaction_id}")
    
    def invalidate(self, transaction_id: str):
        """Remove all cached entries for a transaction ID"""
        keys_to_remove = [k for k in self.cache.keys() if transaction_id in k]
        for key in keys_to_remove:
            del self.cache[key]
            self.evictions += 1
    
    def clear(self):
        """Clear entire cache"""
        self.cache.clear()
        self.hits = 0
        self.misses = 0
        self.evictions = 0
    
    def get_stats(self) -> Dict[str, Any]:
        """Get cache statistics"""
        total_requests = self.hits + self.misses
        hit_rate = (self.hits / total_requests * 100) if total_requests > 0 else 0
        
        return {
            "size": len(self.cache),
            "hits": self.hits,
            "misses": self.misses,
            "evictions": self.evictions,
            "hit_rate": f"{hit_rate:.2f}%",
            "ttl_seconds": self.ttl_seconds
        }

# Initialize cache
transaction_cache = TransactionCache(ttl_seconds=300)

# Optimized query functions with caching
def query_single_transaction_cached(
    transaction_id: str,
    start_date: Optional[date] = None,
    end_date: Optional[date] = None,
    search_all_dates: bool = False,
    use_cache: bool = True
) -> Tuple[Optional[Dict[str, Any]], Optional[str], float, bool]:
    """
    Query single transaction with caching
    
    Returns:
        Tuple of (transaction_data, error_message, query_time_ms, from_cache)
    """
    cache_key_range = None if search_all_dates else (start_date, end_date)
    from_cache = False
    
    # Check cache first
    if use_cache:
        cached_data = transaction_cache.get(transaction_id, cache_key_range)
        if cached_data:
            return cached_data, None, 0.0, True
    
    # Query database
    data, error, query_time = query_single_transaction(
        transaction_id, start_date, end_date, search_all_dates
    )
    
    # Cache successful results
    if use_cache and data and not error:
        transaction_cache.set(transaction_id, data, cache_key_range)
    
    return data, error, query_time, from_cache

# Batch query optimization
def query_batch_transactions_optimized(
    transaction_ids: List[str],
    start_date: Optional[date] = None,
    end_date: Optional[date] = None,
    search_all_dates: bool = False,
    timeout_seconds: int = 30,
    use_cache: bool = True
) -> Tuple[Dict[str, Dict], Dict[str, str], float, Dict[str, bool]]:
    """
    Optimized batch query with cache lookup
    
    Returns:
        Tuple of (transactions_dict, errors_dict, query_time_ms, cache_status_dict)
    """
    # First, check cache for each transaction
    cached_transactions = {}
    uncached_ids = []
    cache_status = {}
    
    if use_cache:
        cache_key_range = None if search_all_dates else (start_date, end_date)
        
        for txn_id in transaction_ids:
            cached_data = transaction_cache.get(txn_id, cache_key_range)
            if cached_data:
                cached_transactions[txn_id] = cached_data
                cache_status[txn_id] = True
            else:
                uncached_ids.append(txn_id)
                cache_status[txn_id] = False
    else:
        uncached_ids = transaction_ids
        cache_status = {txn_id: False for txn_id in transaction_ids}
    
    # If all found in cache, return immediately
    if not uncached_ids:
        logger.info(f"All {len(transaction_ids)} transactions found in cache")
        return cached_transactions, {}, 0.0, cache_status
    
    # Query database for uncached transactions
    logger.info(f"Querying database for {len(uncached_ids)} uncached transactions")
    db_transactions, errors, query_time = query_batch_transactions(
        uncached_ids, start_date, end_date, search_all_dates, timeout_seconds
    )
    
    # Cache the newly fetched transactions
    if use_cache:
        for txn_id, txn_data in db_transactions.items():
            transaction_cache.set(txn_id, txn_data, cache_key_range)
    
    # Combine cached and fresh data
    all_transactions = {**cached_transactions, **db_transactions}
    
    return all_transactions, errors, query_time, cache_status

# Query optimization strategies
class QueryOptimizer:
    """Optimize queries based on patterns and statistics"""
    
    def __init__(self):
        self.query_stats = {
            "single_queries": 0,
            "batch_queries": 0,
            "avg_batch_size": 0,
            "common_date_ranges": {},
            "frequent_transactions": {}
        }
    
    def analyze_query_pattern(self, transaction_ids: List[str], date_range: Tuple[date, date]):
        """Analyze query patterns for optimization"""
        # Track frequency
        for txn_id in transaction_ids:
            self.query_stats["frequent_transactions"][txn_id] = \
                self.query_stats["frequent_transactions"].get(txn_id, 0) + 1
        
        # Track date ranges
        range_key = f"{date_range[0]}_{date_range[1]}"
        self.query_stats["common_date_ranges"][range_key] = \
            self.query_stats["common_date_ranges"].get(range_key, 0) + 1
    
    def get_optimization_hints(self) -> Dict[str, Any]:
        """Get optimization recommendations"""
        hints = {
            "cache_recommendations": [],
            "index_recommendations": [],
            "query_recommendations": []
        }
        
        # Identify hot transactions
        hot_transactions = sorted(
            self.query_stats["frequent_transactions"].items(),
            key=lambda x: x[1],
            reverse=True
        )[:10]
        
        if hot_transactions:
            hints["cache_recommendations"].append(
                f"Consider longer TTL for frequently accessed transactions: {[t[0] for t in hot_transactions[:5]]}"
            )
        
        # Date range analysis
        common_ranges = sorted(
            self.query_stats["common_date_ranges"].items(),
            key=lambda x: x[1],
            reverse=True
        )[:5]
        
        if common_ranges:
            hints["index_recommendations"].append(
                "Consider partial index on transaction_date_time_local for common date ranges"
            )
        
        return hints

# Initialize optimizer
query_optimizer = QueryOptimizer()

# Performance monitoring decorator
def monitor_performance(func):
    """Decorator to monitor query performance"""
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        elapsed = (time.time() - start_time) * 1000
        
        # Log slow queries
        if elapsed > 100:  # More than 100ms
            logger.warning(f"Slow query detected: {func.__name__} took {elapsed:.2f}ms")
        
        return result
    return wrapper

# Test performance improvements
def test_cache_performance():
    """Test cache performance improvements"""
    print("🚀 Testing Performance Optimizations\n")
    
    # Test transaction IDs
    test_ids = ["TPAB123456789012", "TPCD234567890123", "TPEF345678901234"]
    
    print("1. Testing single query caching:")
    print("-" * 50)
    
    # First query - cache miss
    _, _, time1, from_cache1 = query_single_transaction_cached(test_ids[0])
    print(f"First query: {time1:.2f}ms (from_cache: {from_cache1})")
    
    # Second query - cache hit
    _, _, time2, from_cache2 = query_single_transaction_cached(test_ids[0])
    print(f"Second query: {time2:.2f}ms (from_cache: {from_cache2})")
    
    print(f"Speed improvement: {time1/max(time2, 0.1):.1f}x faster")
    
    print("\n2. Testing batch query optimization:")
    print("-" * 50)
    
    # Clear cache for fair test
    transaction_cache.clear()
    
    # First batch query
    _, _, time1, cache_status1 = query_batch_transactions_optimized(test_ids)
    cached_count1 = sum(1 for v in cache_status1.values() if v)
    print(f"First batch: {time1:.2f}ms ({cached_count1}/{len(test_ids)} from cache)")
    
    # Second batch query with overlap
    test_ids_overlap = test_ids[:2] + ["TPGH456789012345"]
    _, _, time2, cache_status2 = query_batch_transactions_optimized(test_ids_overlap)
    cached_count2 = sum(1 for v in cache_status2.values() if v)
    print(f"Second batch: {time2:.2f}ms ({cached_count2}/{len(test_ids_overlap)} from cache)")
    
    print("\n3. Cache Statistics:")
    print("-" * 50)
    stats = transaction_cache.get_stats()
    for key, value in stats.items():
        print(f"{key}: {value}")
    
    print("\n4. Query Pattern Analysis:")
    print("-" * 50)
    
    # Simulate some query patterns
    for i in range(5):
        query_optimizer.analyze_query_pattern(
            [test_ids[0]], 
            (date.today() - timedelta(days=30), date.today())
        )
    
    hints = query_optimizer.get_optimization_hints()
    print("Optimization hints:")
    for category, recommendations in hints.items():
        if recommendations:
            print(f"\n{category}:")
            for rec in recommendations:
                print(f"  - {rec}")

# Run performance tests
test_cache_performance()

In [None]:
# Cell 9: Production-Ready FastAPI Implementation

from fastapi import FastAPI, HTTPException, Query, Path, Depends, Request
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager
import uvicorn
from typing import Optional
import asyncio
from datetime import datetime

# Create lifecycle manager for FastAPI
@asynccontextmanager
async def lifespan(app: FastAPI):
    """Manage application lifecycle - startup and shutdown"""
    # Startup
    print("🚀 Starting Transaction Query API...")
    
    # Initialize database connection
    global engine
    DATABASE_URL = "postgresql://rr_replic_cp:rr_replic_cp!@!$%@127.0.0.1:54326/terra_core_db"
    
    if not test_database_connection(DATABASE_URL):
        raise RuntimeError("Failed to connect to database")
    
    print("✅ Database connected")
    print(f"✅ Cache initialized with {transaction_cache.ttl_seconds}s TTL")
    print("✅ API ready to serve requests")
    
    yield
    
    # Shutdown
    print("\n🛑 Shutting down Transaction Query API...")
    if engine:
        engine.dispose()
    print("✅ Cleanup complete")

# Create FastAPI app
app = FastAPI(
    title="Transaction Query API",
    description="Production-ready API for querying transaction status",
    version="1.0.0",
    lifespan=lifespan,
    docs_url="/docs",
    redoc_url="/redoc"
)

# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Configure appropriately for production
    allow_credentials=True,
    allow_methods=["GET", "POST"],
    allow_headers=["*"],
)

# Request ID middleware for tracking
@app.middleware("http")
async def add_request_id(request: Request, call_next):
    """Add request ID for tracking"""
    import uuid
    request_id = str(uuid.uuid4())
    request.state.request_id = request_id
    
    response = await call_next(request)
    response.headers["X-Request-ID"] = request_id
    return response

# Health check endpoint
@app.get("/health", tags=["Health"])
async def health_check():
    """Health check endpoint"""
    try:
        # Check database connectivity
        with get_db_connection() as conn:
            conn.execute(text("SELECT 1"))
        
        cache_stats = transaction_cache.get_stats()
        
        return {
            "status": "healthy",
            "timestamp": datetime.utcnow().isoformat(),
            "database": "connected",
            "cache": cache_stats
        }
    except Exception as e:
        return JSONResponse(
            status_code=503,
            content={
                "status": "unhealthy",
                "timestamp": datetime.utcnow().isoformat(),
                "error": str(e)
            }
        )

# Single transaction endpoint
@app.get(
    "/api/v1/transactions/{transaction_id}",
    response_model=SingleTransactionResponse,
    tags=["Transactions"],
    summary="Get single transaction",
    description="Retrieve details of a single transaction by ID"
)
async def get_transaction(
    transaction_id: str = Path(
        ...,
        description="Transaction ID in format TP[A-Z]{2}[0-9]{12}",
        example="TPAB123456789012"
    ),
    start_date: Optional[str] = Query(
        None,
        description="Start date for search range (YYYY-MM-DD)",
        example="2025-05-01"
    ),
    end_date: Optional[str] = Query(
        None,
        description="End date for search range (YYYY-MM-DD)",
        example="2025-06-08"
    ),
    search_all_dates: bool = Query(
        False,
        description="Search without date restrictions (impacts performance)"
    ),
    use_cache: bool = Query(
        True,
        description="Use cache for faster response"
    )
):
    """Get single transaction details"""
    try:
        # Parse dates
        parsed_start = datetime.strptime(start_date, "%Y-%m-%d").date() if start_date else None
        parsed_end = datetime.strptime(end_date, "%Y-%m-%d").date() if end_date else None
        
        # Create request model
        request = TransactionIdRequest(
            transaction_id=transaction_id,
            start_date=parsed_start,
            end_date=parsed_end,
            search_all_dates=search_all_dates
        )
        
        # Query with caching
        data, error, query_time, from_cache = query_single_transaction_cached(
            transaction_id=request.transaction_id,
            start_date=request.start_date,
            end_date=request.end_date,
            search_all_dates=request.search_all_dates,
            use_cache=use_cache
        )
        
        # Build response
        query_metadata = {
            "date_range": {
                "start": str(request.start_date),
                "end": str(request.end_date)
            },
            "search_all_dates": request.search_all_dates,
            "from_cache": from_cache,
            "database_response_time_ms": query_time if not from_cache else None
        }
        
        if error:
            return SingleTransactionResponse(
                success=False,
                data=None,
                error=error,
                processing_time_ms=query_time,
                query_metadata=query_metadata
            )
        
        if not data:
            return SingleTransactionResponse(
                success=True,
                data=None,
                error="Transaction not found",
                processing_time_ms=query_time,
                query_metadata=query_metadata
            )
        
        # Create response model
        transaction_response = TransactionResponse(**data)
        
        return SingleTransactionResponse(
            success=True,
            data=transaction_response,
            error=None,
            processing_time_ms=query_time,
            query_metadata=query_metadata
        )
        
    except ValueError as e:
        raise HTTPException(status_code=400, detail=f"Validation error: {str(e)}")
    except Exception as e:
        logger.error(f"Error processing request: {e}")
        raise HTTPException(status_code=500, detail="Internal server error")

# Batch transaction endpoint
@app.post(
    "/api/v1/transactions/batch",
    response_model=BatchTransactionResponse,
    tags=["Transactions"],
    summary="Get multiple transactions",
    description="Retrieve details of multiple transactions in a single request"
)
async def get_batch_transactions(
    request: BatchTransactionRequest
):
    """Get multiple transaction details in batch"""
    start_time = time.time()
    
    try:
        # Query with optimization
        transactions_dict, errors_dict, query_time, cache_status = query_batch_transactions_optimized(
            transaction_ids=request.transaction_ids,
            start_date=request.start_date,
            end_date=request.end_date,
            search_all_dates=request.search_all_dates,
            timeout_seconds=request.timeout_seconds,
            use_cache=True
        )
        
        # Process and validate transactions
        validated_transactions = {}
        validation_errors = {}
        warnings = {}
        
        for txn_id, txn_data in transactions_dict.items():
            try:
                transaction_response = TransactionResponse(**txn_data)
                validated_transactions[txn_id] = transaction_response
                
                if request.include_warnings and transaction_response.has_warnings:
                    warnings[txn_id] = transaction_response.warnings
                    
            except Exception as e:
                validation_errors[txn_id] = f"Data validation error: {str(e)}"
        
        # Combine all errors
        all_errors = {**errors_dict, **validation_errors}
        
        # Calculate summary
        cache_hits = sum(1 for hit in cache_status.values() if hit)
        summary = {
            "requested": len(request.transaction_ids),
            "found": len(validated_transactions),
            "not_found": len([e for e in all_errors.values() if e == "Transaction not found"]),
            "errors": len([e for e in all_errors.values() if e != "Transaction not found"])
        }
        
        # Build metadata
        query_metadata = {
            "date_range": {
                "start": str(request.start_date),
                "end": str(request.end_date)
            },
            "search_all_dates": request.search_all_dates,
            "cache_hits": cache_hits,
            "cache_misses": len(request.transaction_ids) - cache_hits,
            "database_response_time_ms": query_time
        }
        
        return BatchTransactionResponse(
            success=len(validation_errors) == 0,
            summary=summary,
            data=validated_transactions,
            errors=all_errors,
            warnings=warnings,
            processing_time_ms=(time.time() - start_time) * 1000,
            query_metadata=query_metadata
        )
        
    except ValueError as e:
        raise HTTPException(status_code=400, detail=f"Validation error: {str(e)}")
    except Exception as e:
        logger.error(f"Batch processing error: {e}")
        raise HTTPException(status_code=500, detail="Internal server error")

# Cache management endpoints
@app.post("/api/v1/cache/clear", tags=["Cache Management"])
async def clear_cache():
    """Clear transaction cache"""
    transaction_cache.clear()
    return {"message": "Cache cleared successfully"}

@app.get("/api/v1/cache/stats", tags=["Cache Management"])
async def get_cache_stats():
    """Get cache statistics"""
    return transaction_cache.get_stats()

# API documentation models for OpenAPI
from pydantic import BaseModel as PydanticBaseModel

class ErrorResponse(PydanticBaseModel):
    """Error response model"""
    detail: str
    
# Configure API responses
responses = {
    400: {"description": "Bad Request", "model": ErrorResponse},
    404: {"description": "Not Found", "model": ErrorResponse},
    500: {"description": "Internal Server Error", "model": ErrorResponse},
}

# Update endpoints with response documentation
for route in app.routes:
    if hasattr(route, "responses"):
        route.responses.update(responses)

# Main entry point for testing
def run_api(host: str = "0.0.0.0", port: int = 8000):
    """Run the API server"""
    uvicorn.run(
        "app:app",  # Adjust based on your module name
        host=host,
        port=port,
        reload=True,
        log_level="info"
    )

# API client for testing
class TransactionAPIClient:
    """Client for testing the API"""
    
    def __init__(self, base_url: str = "http://localhost:8000"):
        self.base_url = base_url
    
    async def test_endpoints(self):
        """Test all API endpoints"""
        import aiohttp
        
        async with aiohttp.ClientSession() as session:
            # Test health
            async with session.get(f"{self.base_url}/health") as resp:
                print(f"Health check: {resp.status}")
                print(await resp.json())
            
            # Test single transaction
            async with session.get(
                f"{self.base_url}/api/v1/transactions/TPAB123456789012"
            ) as resp:
                print(f"\nSingle transaction: {resp.status}")
                result = await resp.json()
                print(f"Success: {result.get('success')}")
                if result.get('data'):
                    print(f"Transaction: {result['data']['transaction_id']}")
            
            # Test batch
            async with session.post(
                f"{self.base_url}/api/v1/transactions/batch",
                json={
                    "transaction_ids": ["TPAB123456789012", "TPCD234567890123"]
                }
            ) as resp:
                print(f"\nBatch transaction: {resp.status}")
                result = await resp.json()
                print(f"Summary: {result.get('summary')}")

# Print API startup message
print("""
🚀 FastAPI Transaction Query API Ready!

To run the API:
1. In terminal: uvicorn app:app --reload
2. In Jupyter: await run_api() in async cell
3. Access docs: http://localhost:8000/docs

Example usage:
- GET /api/v1/transactions/TPAB123456789012
- POST /api/v1/transactions/batch
  Body: {"transaction_ids": ["TPAB123456789012", "TPCD234567890123"]}
""")

In [None]:
# Cell 10: Docker Configuration and Deployment Setup

# Create Dockerfile content
dockerfile_content = """
# Use Python 3.11 slim image
FROM python:3.11-slim

# Set working directory
WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y \\
    gcc \\
    postgresql-client \\
    && rm -rf /var/lib/apt/lists/*

# Copy requirements first for better caching
COPY requirements.txt .

# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Copy application code
COPY . .

# Create non-root user
RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
USER appuser

# Expose port
EXPOSE 8000

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \\
    CMD curl -f http://localhost:8000/health || exit 1

# Run the application
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
"""

# Create requirements.txt content
requirements_content = """
fastapi==0.109.0
uvicorn[standard]==0.27.0
pydantic==2.5.3
sqlalchemy==2.0.25
psycopg2-binary==2.9.9
python-dateutil==2.8.2
python-multipart==0.0.6
asyncpg==0.29.0
redis==5.0.1
prometheus-client==0.19.0
python-json-logger==2.0.7
"""

# Create docker-compose.yml content
docker_compose_content = """
version: '3.8'

services:
  api:
    build: .
    container_name: transaction-api
    ports:
      - "8000:8000"
    environment:
      - DATABASE_URL=postgresql://rr_replic_cp:rr_replic_cp!@!$%@postgres:5432/terra_core_db
      - REDIS_URL=redis://redis:6379
      - LOG_LEVEL=INFO
      - WORKERS=4
    depends_on:
      - postgres
      - redis
    networks:
      - app-network
    restart: unless-stopped
    volumes:
      - ./logs:/app/logs

  postgres:
    image: postgres:15-alpine
    container_name: transaction-db
    environment:
      - POSTGRES_USER=rr_replic_cp
      - POSTGRES_PASSWORD=rr_replic_cp!@!$%
      - POSTGRES_DB=terra_core_db
    ports:
      - "54326:5432"
    volumes:
      - postgres_data:/var/lib/postgresql/data
    networks:
      - app-network
    restart: unless-stopped

  redis:
    image: redis:7-alpine
    container_name: transaction-cache
    ports:
      - "6379:6379"
    networks:
      - app-network
    restart: unless-stopped
    command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru

  prometheus:
    image: prom/prometheus
    container_name: transaction-metrics
    ports:
      - "9090:9090"
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
      - prometheus_data:/prometheus
    networks:
      - app-network
    restart: unless-stopped

networks:
  app-network:
    driver: bridge

volumes:
  postgres_data:
  prometheus_data:
"""

# Create main.py - Production ready application
main_py_content = '''
import os
import logging
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.middleware.trustedhost import TrustedHostMiddleware
from prometheus_client import Counter, Histogram, generate_latest
from fastapi.responses import PlainTextResponse
import time
import json
from pythonjsonlogger import jsonlogger

# Configure structured logging
logHandler = logging.StreamHandler()
formatter = jsonlogger.JsonFormatter()
logHandler.setFormatter(formatter)
logger = logging.getLogger()
logger.addHandler(logHandler)
logger.setLevel(logging.INFO)

# Metrics
REQUEST_COUNT = Counter(
    'http_requests_total', 
    'Total HTTP requests', 
    ['method', 'endpoint', 'status']
)
REQUEST_LATENCY = Histogram(
    'http_request_duration_seconds', 
    'HTTP request latency'
)

# Import your models and functions here
# from models import *
# from database import *

@asynccontextmanager
async def lifespan(app: FastAPI):
    """Application lifecycle management"""
    logger.info("Starting Transaction Query API", extra={
        "event": "startup",
        "workers": os.getenv("WORKERS", 1)
    })
    
    # Initialize database
    # Initialize cache
    # Initialize other services
    
    yield
    
    # Cleanup
    logger.info("Shutting down Transaction Query API", extra={
        "event": "shutdown"
    })

# Create app
app = FastAPI(
    title="Transaction Query API",
    version="1.0.0",
    lifespan=lifespan
)

# Security middleware
app.add_middleware(
    TrustedHostMiddleware,
    allowed_hosts=["*"]  # Configure for your domain
)

# CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=os.getenv("CORS_ORIGINS", "*").split(","),
    allow_credentials=True,
    allow_methods=["GET", "POST"],
    allow_headers=["*"],
)

# Metrics middleware
@app.middleware("http")
async def metrics_middleware(request: Request, call_next):
    start_time = time.time()
    
    response = await call_next(request)
    
    REQUEST_COUNT.labels(
        method=request.method,
        endpoint=request.url.path,
        status=response.status_code
    ).inc()
    
    REQUEST_LATENCY.observe(time.time() - start_time)
    
    return response

# Logging middleware
@app.middleware("http")
async def logging_middleware(request: Request, call_next):
    start_time = time.time()
    
    response = await call_next(request)
    
    logger.info("Request processed", extra={
        "method": request.method,
        "path": request.url.path,
        "status_code": response.status_code,
        "duration_ms": (time.time() - start_time) * 1000,
        "client_ip": request.client.host
    })
    
    return response

# Metrics endpoint
@app.get("/metrics", include_in_schema=False)
async def metrics():
    return PlainTextResponse(generate_latest())

# Add your endpoints here
# app.include_router(transaction_router, prefix="/api/v1")

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(
        "main:app",
        host="0.0.0.0",
        port=int(os.getenv("PORT", 8000)),
        workers=int(os.getenv("WORKERS", 4)),
        log_config=None  # Use our custom logging
    )
'''

# Create deployment files
def create_deployment_files():
    """Create all deployment configuration files"""
    
    files = {
        "Dockerfile": dockerfile_content,
        "requirements.txt": requirements_content,
        "docker-compose.yml": docker_compose_content,
        "main.py": main_py_content,
        ".dockerignore": """
__pycache__
*.pyc
*.pyo
*.pyd
.Python
env/
venv/
.venv/
pip-log.txt
pip-delete-this-directory.txt
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.log
.git
.gitignore
.mypy_cache
.pytest_cache
.hypothesis
.ipynb_checkpoints
"""
    }
    
    print("📁 Creating deployment files:\n")
    for filename, content in files.items():
        print(f"✅ {filename}")
        # Uncomment to actually create files
        # with open(filename, 'w') as f:
        #     f.write(content.strip())
    
    print("\n📋 Deployment Instructions:")
    print("1. Build: docker-compose build")
    print("2. Run: docker-compose up -d")
    print("3. Check logs: docker-compose logs -f api")
    print("4. Scale: docker-compose up -d --scale api=3")

# Kubernetes configuration
kubernetes_yaml = """
apiVersion: apps/v1
kind: Deployment
metadata:
  name: transaction-api
spec:
  replicas: 3
  selector:
    matchLabels:
      app: transaction-api
  template:
    metadata:
      labels:
        app: transaction-api
    spec:
      containers:
      - name: api
        image: transaction-api:latest
        ports:
        - containerPort: 8000
        env:
        - name: DATABASE_URL
          valueFrom:
            secretKeyRef:
              name: db-secret
              key: url
        resources:
          requests:
            memory: "256Mi"
            cpu: "250m"
          limits:
            memory: "512Mi"
            cpu: "500m"
        livenessProbe:
          httpGet:
            path: /health
            port: 8000
          initialDelaySeconds: 30
          periodSeconds: 10
        readinessProbe:
          httpGet:
            path: /health
            port: 8000
          initialDelaySeconds: 5
          periodSeconds: 5
---
apiVersion: v1
kind: Service
metadata:
  name: transaction-api-service
spec:
  selector:
    app: transaction-api
  ports:
    - protocol: TCP
      port: 80
      targetPort: 8000
  type: LoadBalancer
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: transaction-api-hpa
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: transaction-api
  minReplicas: 2
  maxReplicas: 10
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80
"""

print("""
🚀 Deployment Configuration Ready!

Key Features:
1. ✅ Dockerized application
2. ✅ Docker Compose for local development
3. ✅ Production-ready logging
4. ✅ Prometheus metrics
5. ✅ Health checks
6. ✅ Kubernetes ready
7. ✅ Auto-scaling configuration
8. ✅ Resource limits
9. ✅ Security best practices

Next Steps:
1. Review and adjust configuration
2. Set up CI/CD pipeline
3. Configure monitoring dashboards
4. Set up alerts
5. Load testing
6. Security scanning
""")

# Show deployment checklist
deployment_checklist = """
📋 Production Deployment Checklist:

□ Environment Variables
  - DATABASE_URL (with connection pooling)
  - REDIS_URL (for distributed caching)
  - API_KEY (for authentication)
  - LOG_LEVEL
  
□ Database
  - Connection pooling configured
  - Read replicas for scaling
  - Backup strategy
  - Migration scripts
  
□ Security
  - API authentication
  - Rate limiting
  - Input validation
  - SQL injection prevention
  - HTTPS/TLS
  
□ Monitoring
  - Prometheus metrics
  - Grafana dashboards
  - Alert rules
  - Log aggregation (ELK/EFK)
  
□ Performance
  - Load testing completed
  - Caching strategy
  - Database indexes
  - Query optimization
  
□ Operations
  - CI/CD pipeline
  - Rollback strategy
  - Documentation
  - Runbooks
"""

print(deployment_checklist)