In [1]:
import os
import shutil
from tqdm import tqdm

# --- Core Application Imports ---
from config.config import Config
from rfiprocessor.utils.logger import get_logger
from rfiprocessor.utils.wlak_dir import list_all_file_paths
from rfiprocessor.services.markdown_converter import MarkdownConverter, ProcessorType
from rfiprocessor.core.agents.document_classifier import DocumentClassifierAgent

# --- Database Imports ---
from rfiprocessor.db.database import init_db, get_db_session
from rfiprocessor.services.db_handler import DatabaseHandler
from rfiprocessor.db.db_models import IngestionStatus

# --- Initial Setup ---
config = Config()
logger = get_logger(__name__)

def run_markdown_conversion_pipeline():
    """
    Scans for new files, converts them to markdown, and tracks progress in the database.
    """

    # --- Step 1: Markdown Conversion ---
    logger.info("--- Starting Markdown Conversion Pipeline ---")
    
    # Initialize services
    converter = MarkdownConverter()
    classifier = DocumentClassifierAgent()
    
    # Get a database session and initialize the handler
    db_session_generator = get_db_session()
    db_session = next(db_session_generator)
    try:
        db_handler = DatabaseHandler(db_session)

        # 1. Scan the incoming directory for all files
        all_files = list_all_file_paths(config.INCOMING_DATA_PATH)
        
        # 2. Register all found files in the database if they don't exist
        logger.info(f"Found {len(all_files)} files. Registering new files in the database...")
        for file_path in all_files:
            # This will add the document if new, or get the existing one.
            db_handler.add_or_get_document(source_filepath=file_path)
        
        # 3. Fetch only the documents that are pending conversion
        pending_docs = db_handler.get_documents_by_status(IngestionStatus.PENDING)
        
        # Filter out unsupported files
        valid_docs = [
            doc for doc in pending_docs 
            if any(doc.source_filename.lower().endswith(ext) for ext in config.VALID_FILE_EXTNS)
        ]
        
        if not valid_docs:
            logger.info("No new documents to convert. Pipeline finished.")
            return

        logger.info(f"Found {len(valid_docs)} new documents to process.")

        # 4. Process each pending document
        for doc in tqdm(valid_docs, desc="Converting files to Markdown"):
            try:
                logger.info(f"Processing document: {doc.source_filename} (ID: {doc.id})")
                
                # Determine which processor to use
                processor_to_use = ProcessorType.MARKITDOWN
                if any(doc.source_filename.lower().endswith(ext) for ext in config.UNSTRD_FILE_EXTNS):
                    processor_to_use = ProcessorType.UNSTRUCTURED

                # Convert the file to markdown
                markdown_content, markdown_path = converter.convert_to_markdown(
                    file_path=doc.source_filepath,
                    processor=processor_to_use
                )
                
                # Move the original raw file to the processed directory
                destination_path = os.path.join(config.PROCESSED_DATA_PATH, doc.source_filename)
                os.makedirs(os.path.dirname(destination_path), exist_ok=True)
                shutil.move(doc.source_filepath, destination_path)
                
                # Update the document record in the database with the new status and paths
                updates = {
                    "markdown_filepath": markdown_path,
                    "processed_filepath": destination_path,
                    "ingestion_status": IngestionStatus.MARKDOWN_CONVERTED,
                    "error_message": None # Clear any previous errors
                }
                db_handler.update_document(doc.id, updates)
                logger.info(f"Successfully converted and moved document ID {doc.id}.")

            except Exception as e:
                logger.error(f"Error processing document ID {doc.id} ('{doc.source_filename}'): {e}", exc_info=True)
                # Update the document record to reflect the failure
                db_handler.update_document(
                    doc.id,
                    {
                        "ingestion_status": IngestionStatus.FAILED,
                        "error_message": str(e)
                    }
                )

    finally:
        # Ensure the database session is closed
        db_session.close()
        logger.info("--- Markdown Conversion Pipeline Finished ---")

    # Get a database session and initialize the handler
    db_session_generator = get_db_session()
    db_session = next(db_session_generator)
    try:
        db_handler = DatabaseHandler(db_session)

        # --- Step 2: Document Classification ---
        logger.info("--- Starting Document Classification Step ---")
        docs_to_classify = db_handler.get_documents_by_status(IngestionStatus.MARKDOWN_CONVERTED)
        
        if not docs_to_classify:
            logger.info("No new documents to classify.")
        else:
            logger.info(f"Found {len(docs_to_classify)} documents to classify.")

        for doc in tqdm(docs_to_classify, desc="Classifying documents"):
            try:
                logger.info(f"Classifying document: {doc.source_filename} (ID: {doc.id})")
                
                # Read the markdown content from the file
                with open(doc.markdown_filepath, 'r', encoding='utf-8') as f:
                    markdown_content = f.read()
                
                # Call the classifier agent
                classification = classifier.classify(markdown_content)
                
                # Update the document in the database
                updates = {
                    "document_type": classification.get("document_type"),
                    "document_grade": classification.get("document_grade"),
                    "ingestion_status": IngestionStatus.CLASSIFIED,
                    "error_message": None
                }
                db_handler.update_document(doc.id, updates)
                logger.info(f"Successfully classified document ID {doc.id}.")

            except Exception as e:
                logger.error(f"Error classifying document ID {doc.id}: {e}", exc_info=True)
                db_handler.update_document(
                    doc.id,
                    {"ingestion_status": IngestionStatus.FAILED, "error_message": f"Classification failed: {e}"}
                )

    finally:
        db_session.close()
        logger.info("--- Ingestion Pipeline Finished ---")


if __name__ == "__main__":
    logger.info("Application started.")
    
    # Ensure the database and tables are created before running the pipeline
    init_db()
    
    # Run the main processing function
    run_markdown_conversion_pipeline()
    
    logger.info("Application finished.")

2025-07-22 13:31:37,004 - __main__ - INFO - Application started.
2025-07-22 13:31:37,005 - rfiprocessor.db.database - INFO - Initializing database and creating tables if they don't exist...
2025-07-22 13:31:37,012 - rfiprocessor.db.database - INFO - Database initialization complete.
2025-07-22 13:31:37,014 - __main__ - INFO - --- Starting Markdown Conversion Pipeline ---
2025-07-22 13:31:38,496 - rfiprocessor.services.llm_provider - INFO - Initialized Fast LLM Provider: gpt-4-turbo
2025-07-22 13:31:38,498 - rfiprocessor.services.prompt_loader - INFO - Loading prompt 'document_classifier' from: rfiprocessor\prompts\document_classifier.txt
2025-07-22 13:31:38,511 - rfiprocessor.core.agents.document_classifier - INFO - DocumentClassifierAgent initialized successfully.
2025-07-22 13:31:38,513 - rfiprocessor.utils.wlak_dir - DEBUG - Found file: data/raw/incoming\.DS_Store
2025-07-22 13:31:38,514 - rfiprocessor.utils.wlak_dir - DEBUG - Found file: data/raw/incoming\Cogstate Quality Managemen

In [2]:
from tqdm import tqdm

# --- Core Application Imports ---
from config.config import Config
from rfiprocessor.utils.logger import get_logger
from rfiprocessor.core.agents.document_classifier import DocumentClassifierAgent

# --- Database Imports ---
from rfiprocessor.db.database import init_db, get_db_session
from rfiprocessor.services.db_handler import DatabaseHandler
from rfiprocessor.db.db_models import IngestionStatus

# --- Initial Setup ---
config = Config()
logger = get_logger(__name__)

def run_document_classification_pipeline():
    """
    Scans for markdown_converted files, classify them to RFI/RFO or Supporting Document with grades, and tracks progress in the database.
    """

    classifier = DocumentClassifierAgent()

    # Get a database session and initialize the handler
    db_session_generator = get_db_session()
    db_session = next(db_session_generator)
    try:
        db_handler = DatabaseHandler(db_session)

        # --- Step 2: Document Classification ---
        logger.info("--- Starting Document Classification Step ---")
        docs_to_classify = db_handler.get_documents_by_status(IngestionStatus.MARKDOWN_CONVERTED)

        if not docs_to_classify:
            logger.info("No new documents to classify.")
        else:
            logger.info(f"Found {len(docs_to_classify)} documents to classify.")

        for doc in tqdm(docs_to_classify, desc="Classifying documents"):
            try:
                logger.info(f"Classifying document: {doc.source_filename} (ID: {doc.id})")
                
                # Read the markdown content from the file
                with open(doc.markdown_filepath, 'r', encoding='utf-8') as f:
                    markdown_content = f.read()
                
                # Call the classifier agent
                classification = classifier.classify(markdown_content)
                
                # Update the document in the database
                updates = {
                    "document_type": classification.get("document_type"),
                    "document_grade": classification.get("document_grade"),
                    "ingestion_status": IngestionStatus.CLASSIFIED,
                    "error_message": None
                }
                db_handler.update_document(doc.id, updates)
                logger.info(f"Successfully classified document ID {doc.id}.")

            except Exception as e:
                logger.error(f"Error classifying document ID {doc.id}: {e}", exc_info=True)
                db_handler.update_document(
                    doc.id,
                    {"ingestion_status": IngestionStatus.FAILED, "error_message": f"Classification failed: {e}"}
                )

    finally:
        db_session.close()
        logger.info("--- Ingestion Pipeline Finished ---")

if __name__ == "__main__":
    logger.info("Application started.")
    
    # Ensure the database and tables are created before running the pipeline
    init_db()
    
    # Run the main processing function
    run_document_classification_pipeline()
    
    logger.info("Application finished.")


2025-07-22 13:31:45,738 - __main__ - INFO - Application started.
2025-07-22 13:31:45,740 - rfiprocessor.db.database - INFO - Initializing database and creating tables if they don't exist...
2025-07-22 13:31:45,743 - rfiprocessor.db.database - INFO - Database initialization complete.
2025-07-22 13:31:45,746 - rfiprocessor.services.llm_provider - INFO - Initialized Fast LLM Provider: gpt-4-turbo
2025-07-22 13:31:45,747 - rfiprocessor.services.prompt_loader - DEBUG - Returning cached prompt: 'document_classifier'
2025-07-22 13:31:45,749 - rfiprocessor.core.agents.document_classifier - INFO - DocumentClassifierAgent initialized successfully.
2025-07-22 13:31:45,749 - __main__ - INFO - --- Starting Document Classification Step ---
2025-07-22 13:31:45,750 - rfiprocessor.services.db_handler - DEBUG - Querying for documents with status: MARKDOWN_CONVERTED
2025-07-22 13:31:45,752 - __main__ - INFO - No new documents to classify.
Classifying documents: 0it [00:00, ?it/s]
2025-07-22 13:31:45,755 

In [3]:
from tqdm import tqdm
from pydantic import ValidationError

# --- Core Application Imports ---
from config.config import Config
from rfiprocessor.utils.logger import get_logger
from rfiprocessor.core.agents.rfi_parser import RfiParserAgent

# --- Database Imports ---
from rfiprocessor.db.database import init_db, get_db_session
from rfiprocessor.services.db_handler import DatabaseHandler
from rfiprocessor.db.db_models import IngestionStatus
from rfiprocessor.models.data_models import RFIJson

import os
import json

# --- Initial Setup ---
config = Config()
logger = get_logger(__name__)

def run_rfi_parser_pipeline():
    """
    Scans for classified RFI/RFP files and processes them through the RFI parser pipeline.
    """
    logger.info("--- Starting RFI Parsing Pipeline ---")
    rfi_parser = RfiParserAgent()
    
    db_session_generator = get_db_session()
    db_session = next(db_session_generator)
    try:
        db_handler = DatabaseHandler(db_session)

        # --- Step 3: RFI/RFP Parsing ---
        logger.info("--- Starting RFI/RFP Parsing Step ---")
        docs_to_parse = db_handler.get_documents_by_status(IngestionStatus.CLASSIFIED)

        if not docs_to_parse:
            logger.info("No new classified documents to parse.")
        else:
            logger.info(f"Found {len(docs_to_parse)} documents to potentially parse.")

        for doc in tqdm(docs_to_parse, desc="Parsing RFI/RFP documents"):
            try:
                # Only process documents that were classified as RFI/RFP
                if doc.document_type != "RFI/RFP":
                    logger.info(f"Skipping parsing for doc ID {doc.id} (type: {doc.document_type}). Marking as parsed.")
                    db_handler.update_document(doc.id, {"ingestion_status": IngestionStatus.PARSED})
                    continue

                logger.info(f"Parsing RFI/RFP document: {doc.source_filename} (ID: {doc.id})")
                
                # Read the markdown content
                with open(doc.markdown_filepath, 'r', encoding='utf-8') as f:
                    markdown_content = f.read()
                
                # Call the RFI Parser Agent
                parsed_data = rfi_parser.parse(markdown_content)
                
                # --- SAVE TO /json_output FOLDER AT PROJECT ROOT ---
                output_dir = "json_output"
                os.makedirs(output_dir, exist_ok=True)
                json_filename = os.path.splitext(doc.source_filename)[0] + ".json"
                output_path = os.path.join(output_dir, json_filename)
                with open(output_path, "w", encoding="utf-8") as f:
                    json.dump(parsed_data, f, indent=2, ensure_ascii=False)
                logger.info(f"Saved parsed output to {output_path}")
                # --- END SAVE SECTION ---

                # Validate the structured output with Pydantic
                try:
                    validated_data = RFIJson.model_validate(parsed_data)
                    logger.info(f"Successfully validated JSON structure for doc ID {doc.id}.")
                except ValidationError as ve:
                    # If validation fails, log the specific error and mark the doc as failed
                    error_details = f"Pydantic validation failed for doc ID {doc.id}: {ve}"
                    logger.error(error_details)
                    db_handler.update_document(
                        doc.id,
                        {"ingestion_status": IngestionStatus.FAILED, "error_message": error_details}
                    )
                    continue  # Skip to the next document
                
                # Update the document record with the JSON payload and new status
                updates = {
                    "rfi_json_payload": validated_data.model_dump(),  # Use the validated data
                    "ingestion_status": IngestionStatus.PARSED,
                    "error_message": None
                }
                db_handler.update_document(doc.id, updates)
                logger.info(f"Successfully parsed and stored JSON for document ID {doc.id}.")

            except Exception as e:
                error_msg = f"Error parsing document ID {doc.id}: {e}"
                logger.error(error_msg, exc_info=True)
                db_handler.update_document(
                    doc.id,
                    {"ingestion_status": IngestionStatus.FAILED, "error_message": error_msg}
                )

    finally:
        db_session.close()
        logger.info("--- Ingestion Pipeline Finished ---")

# The if __name__ == "__main__": block remains the same
if __name__ == "__main__":
    logger.info("Application started.")
    init_db()
    run_rfi_parser_pipeline()
    logger.info("Application finished.")


2025-07-22 13:31:45,782 - __main__ - INFO - Application started.
2025-07-22 13:31:45,783 - rfiprocessor.db.database - INFO - Initializing database and creating tables if they don't exist...
2025-07-22 13:31:45,784 - rfiprocessor.db.database - INFO - Database initialization complete.
2025-07-22 13:31:45,785 - __main__ - INFO - --- Starting RFI Parsing Pipeline ---
2025-07-22 13:31:46,303 - rfiprocessor.services.llm_provider - INFO - Initialized Advanced LLM Provider: gpt-4o
2025-07-22 13:31:46,305 - rfiprocessor.services.prompt_loader - INFO - Loading prompt 'rfi_parser_summary' from: rfiprocessor\prompts\rfi_parser_summary.txt
2025-07-22 13:31:46,319 - rfiprocessor.services.prompt_loader - INFO - Loading prompt 'rfi_parser_chunking' from: rfiprocessor\prompts\rfi_parser_chunking.txt
2025-07-22 13:31:46,337 - rfiprocessor.core.agents.rfi_parser - INFO - RfiParserAgent initialized successfully.
2025-07-22 13:31:46,338 - __main__ - INFO - --- Starting RFI/RFP Parsing Step ---
2025-07-22 1

In [4]:
from rfiprocessor.db.database import get_db_session
from rfiprocessor.services.db_handler import DatabaseHandler
from rfiprocessor.db.db_models import Document, IngestionStatus
from rfiprocessor.services.chunker import ChunkerService
from config.config import Config

# Setup config and chunker
config = Config()
chunk_size = getattr(config, "CHUNK_SIZE", 1000)
chunk_overlap = getattr(config, "CHUNK_OVERLAP", 150)
chunker = ChunkerService(chunk_size=chunk_size, chunk_overlap=chunk_overlap)

# Get a database session and handler
db_session_generator = get_db_session()
db_session = next(db_session_generator)
db_handler = DatabaseHandler(db_session)

# Fetch all documents
docs = db_session.query(Document).all()
if not docs:
    raise ValueError("No documents found in the database.")

for doc in docs:
    print("="*60)
    print(f"Document: {doc.source_filename} (type: {doc.document_type})")
    try:
        if doc.document_type == "RFI/RFP" and doc.rfi_json_payload:
            markdown_content = ""  # Not used for RFI/RFP
            print("Chunking as RFI/RFP (using JSON payload)...")
        elif doc.markdown_filepath:
            with open(doc.markdown_filepath, 'r', encoding='utf-8') as f:
                markdown_content = f.read()
            print("Chunking as supporting document (using markdown content)...")
        else:
            print("Skipping: Document is missing required data for chunking.")
            continue

        chunks_data = chunker.create_chunks_for_document(doc, markdown_content)
        print(f"Number of chunks: {len(chunks_data)}")

        if chunks_data:
            db_handler.add_chunks_to_document(doc.id, chunks_data)
            # Optionally, update the document status to CHUNKED
            db_handler.update_document(doc.id, {"ingestion_status": IngestionStatus.CHUNKED})
            print("Chunks saved to database.")
        else:
            print("No chunks created for this document.")

    except Exception as e:
        print(f"Error processing document: {e}")

print("="*60)
print("Done chunking and saving all documents.")

2025-07-22 13:33:23,563 - rfiprocessor.services.chunker - INFO - ChunkerService initialized with chunk_size=2000 and chunk_overlap=200
2025-07-22 13:33:23,569 - rfiprocessor.services.chunker - INFO - Creating chunks for document ID 3 (Type: RFI/RFP)
2025-07-22 13:33:23,570 - rfiprocessor.services.chunker - INFO - Created 135 Q&A-based chunks for document ID 3.
2025-07-22 13:33:23,590 - rfiprocessor.services.db_handler - INFO - Added 135 chunks to document ID 3.
2025-07-22 13:33:23,633 - rfiprocessor.services.db_handler - INFO - Updating document ID 3 with: {'ingestion_status': <IngestionStatus.CHUNKED: 'CHUNKED'>}
2025-07-22 13:33:23,636 - rfiprocessor.services.chunker - INFO - Creating chunks for document ID 5 (Type: Supporting Document)
2025-07-22 13:33:23,654 - rfiprocessor.services.chunker - INFO - Creating chunks for document ID 6 (Type: Supporting Document)
2025-07-22 13:33:23,656 - rfiprocessor.services.chunker - INFO - Created 2 semantic chunks for document ID 6.
2025-07-22 13:

Document: .DS_Store (type: None)
Skipping: Document is missing required data for chunking.
Document: Copy of AbbVie Remediation Assessment- IT Security Privacy- Cogstate v10 (Additional Info) 3 Feb 2016.xlsx (type: None)
Skipping: Document is missing required data for chunking.
Document: Cogstate_RFI_AbbVie S&E Vendor Communication Plan.docx (type: RFI/RFP)
Chunking as RFI/RFP (using JSON payload)...
Number of chunks: 135
Chunks saved to database.
Document: Attachment 2.xlsx (type: None)
Skipping: Document is missing required data for chunking.
Document: Corporate Org Chart.pdf (type: Supporting Document)
Chunking as supporting document (using markdown content)...
Number of chunks: 0
No chunks created for this document.
Document: Attachment 1 - ISO Certificate.pdf (type: Supporting Document)
Chunking as supporting document (using markdown content)...
Number of chunks: 2
Chunks saved to database.
Document: Abbvie.pdf (type: Supporting Document)
Chunking as supporting document (using mar

2025-07-22 13:33:23,773 - rfiprocessor.services.db_handler - INFO - Updating document ID 10 with: {'ingestion_status': <IngestionStatus.CHUNKED: 'CHUNKED'>}
2025-07-22 13:33:23,779 - rfiprocessor.services.chunker - INFO - Creating chunks for document ID 12 (Type: RFI/RFP)
2025-07-22 13:33:23,780 - rfiprocessor.services.chunker - INFO - Created 122 Q&A-based chunks for document ID 12.
2025-07-22 13:33:23,794 - rfiprocessor.services.db_handler - INFO - Added 122 chunks to document ID 12.
2025-07-22 13:33:23,847 - rfiprocessor.services.db_handler - INFO - Updating document ID 12 with: {'ingestion_status': <IngestionStatus.CHUNKED: 'CHUNKED'>}
2025-07-22 13:33:23,857 - rfiprocessor.services.chunker - INFO - Creating chunks for document ID 14 (Type: RFI/RFP)
2025-07-22 13:33:23,860 - rfiprocessor.services.chunker - INFO - Created 157 Q&A-based chunks for document ID 14.
2025-07-22 13:33:23,876 - rfiprocessor.services.db_handler - INFO - Added 157 chunks to document ID 14.
2025-07-22 13:33:2

Chunks saved to database.
Document: .DS_Store (type: None)
Skipping: Document is missing required data for chunking.
Document: Cogstate Quality Management System_Quality Questionnaire Blank.pdf (type: RFI/RFP)
Chunking as RFI/RFP (using JSON payload)...
Number of chunks: 122
Chunks saved to database.
Document: Diligent Pharma - Harmony Biosciences Core RFI 3.0 - 27-01-2025 Blank.xlsx (type: None)
Skipping: Document is missing required data for chunking.
Document: LEXEO GCP service provider audit questionnaire Blank.docx (type: RFI/RFP)
Chunking as RFI/RFP (using JSON payload)...
Number of chunks: 157
Chunks saved to database.
Done chunking and saving all documents.


In [5]:
import os
import shutil
from tqdm import tqdm

# --- Core Application Imports ---
from config.config import Config
from rfiprocessor.utils.logger import get_logger
from rfiprocessor.core.agents.rfi_parser import RfiParserAgent

# --- Database Imports ---
from rfiprocessor.db.database import init_db, get_db_session
from rfiprocessor.services.db_handler import DatabaseHandler
from rfiprocessor.db.db_models import IngestionStatus
from rfiprocessor.models.data_models import RFIJson

# --- Initial Setup ---
config = Config()
logger = get_logger(__name__)


def run_chunking_pipeline():
    """
    Scans for new files and processes them through the ingestion pipeline.
    """
    logger.info("--- Starting Ingestion Pipeline ---")

    # Initialize services and agents
    chunker = ChunkerService(config.CHUNK_SIZE, config. CHUNK_OVERLAP)

    db_session_generator = get_db_session()
    db_session = next(db_session_generator)
    try:
        db_handler = DatabaseHandler(db_session)
        # --- Step 4: Document Chunking ---
        logger.info("--- Starting Document Chunking Step ---")
        docs_to_chunk = db_handler.get_documents_by_status(IngestionStatus.PARSED)

        if not docs_to_chunk:
            logger.info("No new parsed documents to chunk.")
        else:
            logger.info(f"Found {len(docs_to_chunk)} documents to chunk.")

        for doc in tqdm(docs_to_chunk, desc="Chunking documents"):
            try:
                logger.info(f"Chunking document: {doc.source_filename} (ID: {doc.id})")
                
                # We need the markdown content for supporting docs.
                # RFI docs don't need it for chunking, but it's cleaner to always have it.
                with open(doc.markdown_filepath, 'r', encoding='utf-8') as f:
                    markdown_content = f.read()
                    
                # Call the chunker service
                chunks_data = chunker.create_chunks_for_document(doc, markdown_content)
                
                if not chunks_data:
                    logger.warning(f"No chunks were created for document ID {doc.id}. Moving to next stage.")
                else:
                    # Add the created chunks to the database
                    db_handler.add_chunks_to_document(doc.id, chunks_data)
                
                # Update the document status to CHUNKED
                db_handler.update_document(doc.id, {"ingestion_status": IngestionStatus.CHUNKED})
                logger.info(f"Successfully chunked and stored chunks for document ID {doc.id}.")

            except Exception as e:
                error_msg = f"Error chunking document ID {doc.id}: {e}"
                logger.error(error_msg, exc_info=True)
                db_handler.update_document(
                    doc.id,
                    {"ingestion_status": IngestionStatus.FAILED, "error_message": error_msg}
                )

    finally:
        db_session.close()
        logger.info("--- Ingestion Pipeline Finished ---")

# The if __name__ == "__main__": block remains the same
if __name__ == "__main__":
    logger.info("Application started.")
    init_db()
    run_ingestion_pipeline()
    logger.info("Application finished.")

2025-07-22 13:33:23,945 - __main__ - INFO - Application started.
2025-07-22 13:33:23,946 - rfiprocessor.db.database - INFO - Initializing database and creating tables if they don't exist...
2025-07-22 13:33:23,950 - rfiprocessor.db.database - INFO - Database initialization complete.


NameError: name 'run_ingestion_pipeline' is not defined