# Data Ingestion Test

This notebook demonstrates how to ingest documents into the Agentic RAG system.

In [None]:
import sys
import os
from pathlib import Path

# Add src to Python path
sys.path.insert(0, str(Path('..') / 'src'))

from processing.loader import DocumentLoader
from components.vector_stores import VectorStoreManager
from utils.config_manager import ConfigManager

In [None]:
# Load configuration
config_manager = ConfigManager('../config.yaml')
config = config_manager.config
print(f"Loaded configuration: LLM={config.llm.model}, Embeddings={config.embeddings.model}")

In [None]:
# Initialize components
loader = DocumentLoader(config)
vector_store = VectorStoreManager(config)

print("Components initialized successfully")

In [None]:
# Find documents in data directory
data_path = Path('../data')
documents = loader.find_documents(data_path, recursive=True)
print(f"Found {len(documents)} documents: {[doc.name for doc in documents]}")

In [None]:
# Process and ingest a document
if documents:
    doc_path = documents[0]
    print(f"Processing: {doc_path}")
    
    # Load document chunks
    chunks = loader.load_document(doc_path)
    print(f"Created {len(chunks)} chunks")
    
    # Add to vector store
    vector_store.add_documents(chunks, str(doc_path))
    print("Documents added to vector store")
    
    # Check document count
    total_docs = vector_store.get_document_count()
    print(f"Total documents in store: {total_docs}")
else:
    print("No documents found. Add some .pdf, .md, or .txt files to the data/ directory.")