In [None]:
import argparse
import os
import uuid
from sqlalchemy.orm import Session

# --- Core Application Imports ---
from rfiprocessor.utils.logger import get_logger
from rfiprocessor.services.run_inference import run_inference_for_rfi

# --- Database Imports ---
from rfiprocessor.db.database import init_db, get_db_session
from rfiprocessor.db.db_models import RfiDocument, RfiStatus

# --- Initial Setup ---
logger = get_logger(__name__)

def main():
    """
    Main function to run the inference pipeline test.
    """

    input_file_path = "/Users/nagurshareefshaik/Desktop/Cogstate-RFI/data/blank/incoming/Roche on-market cognitive screener solution_2024-12-12 Blank.docx"
    if not os.path.exists(input_file_path):
        logger.error(f"Input file not found: {input_file_path}")
        return

    logger.info("--- Starting Inference Pipeline Test ---")
    
    # 1. Initialize the database
    init_db()
    
    # 2. Simulate the API's pre-computation step: Create the RFI Document record
    db_session_gen = get_db_session()
    db: Session = next(db_session_gen)
    
    new_rfi_doc = None
    try:
        logger.info("Simulating API: Creating initial RfiDocument record in the database...")
        
        # We create the record first to get a valid ID, just like the real API
        new_rfi_doc = RfiDocument(
            id=str(uuid.uuid4()), # Generate ID upfront
            title=os.path.splitext(os.path.basename(input_file_path))[0],
            source_filename=os.path.basename(input_file_path),
            status=RfiStatus.IN_PROGRESS,
            updated_by_user="test_runner"
        )
        db.add(new_rfi_doc)
        db.commit()
        db.refresh(new_rfi_doc)
        
        logger.info(f"Record created with ID: {new_rfi_doc.id}")
        
    finally:
        db.close()

    # 3. Call the core inference pipeline function
    # This function will handle its own database sessions internally.
    if new_rfi_doc:
        logger.info(f"Calling the core inference pipeline for RFI ID: {new_rfi_doc.id}")
        run_inference_for_rfi(
            rfi_doc_id=new_rfi_doc.id,
            temp_file_path=input_file_path,
            user="test_runner"
        )
        logger.info("--- Inference Pipeline Test Function Call Completed ---")
    else:
        logger.error("Failed to create the initial RFI document record. Aborting.")
        return

    # 4. (Optional) Verify the final state in the database
    db_session_gen = get_db_session()
    db: Session = next(db_session_gen)
    try:
        final_doc = db.query(RfiDocument).filter(RfiDocument.id == new_rfi_doc.id).first()
        if final_doc:
            logger.info(f"Final status for RFI ID {final_doc.id}: {final_doc.status.value}")
            logger.info(f"Final progress: {final_doc.progress}%")
            # You can uncomment the line below to see the full final payload
            # import json; print(json.dumps(final_doc.payload, indent=2))
        else:
            logger.error("Could not retrieve final document state from DB.")
    finally:
        db.close()


if __name__ == "__main__":
    main()

2025-07-28 12:42:42,293 - __main__ - INFO - --- Starting Inference Pipeline Test ---
2025-07-28 12:42:42,294 - rfiprocessor.db.database - INFO - Initializing database and creating tables if they don't exist...
2025-07-28 12:42:42,296 - rfiprocessor.db.database - INFO - Database initialization complete.
2025-07-28 12:42:42,296 - __main__ - INFO - Simulating API: Creating initial RfiDocument record in the database...
2025-07-28 12:42:42,302 - __main__ - INFO - Record created with ID: 6407ec8e-12be-4f74-8054-350e25117ce0
2025-07-28 12:42:42,303 - __main__ - INFO - Calling the core inference pipeline for RFI ID: 6407ec8e-12be-4f74-8054-350e25117ce0
2025-07-28 12:42:42,303 - __main__ - INFO - Inference pipeline started for RFI ID: 6407ec8e-12be-4f74-8054-350e25117ce0
2025-07-28 12:42:42,321 - rfiprocessor.services.llm_provider - INFO - Initialized Advanced LLM Provider: gpt-4o
2025-07-28 12:42:42,321 - rfiprocessor.services.prompt_loader - INFO - Loading prompt 'rfi_parser_summary' from: rf