# RAG System Manual Tests

This notebook provides manual tests and examples for the RAG system.

## Setup

In [None]:
import sys
from pathlib import Path

# Add parent directory to path
sys.path.insert(0, str(Path.cwd().parent))

from rag_app.config import DATA_DIR, CHROMA_PERSIST_DIR
from rag_app.ingest import ingest_documents
from rag_app.rag_chain import answer_question
from rag_app.vectorstore import get_vectorstore

## Test 1: Check Configuration

In [None]:
print(f"Data directory: {DATA_DIR}")
print(f"Chroma directory: {CHROMA_PERSIST_DIR}")
print(f"Data directory exists: {DATA_DIR.exists()}")

## Test 2: Ingest Sample Documents

In [None]:
# Run ingestion
stats = ingest_documents(verbose=True)
print("\nIngestion statistics:")
for key, value in stats.items():
    print(f"  {key}: {value}")

## Test 3: Check Vector Store

In [None]:
vectorstore = get_vectorstore()
doc_count = vectorstore.count()
print(f"Documents in vector store: {doc_count}")

## Test 4: Ask Questions

In [None]:
# Example question
question = "What is this document about?"

result = answer_question(question, k=4)

print("Question:", question)
print("\nAnswer:")
print(result["answer"])
print("\nSources:")
for i, source in enumerate(result["sources"], 1):
    print(f"\n[{i}] {source['filename']} ({source['file_type']})")
    print(f"    Location: {source['location']}")
    print(f"    Snippet: {source['snippet'][:100]}...")

## Test 5: Test Different Questions

In [None]:
questions = [
    "What are the main topics covered?",
    "Summarize the key points",
    "What technologies are mentioned?",
]

for q in questions:
    print(f"\n{'='*80}")
    print(f"Q: {q}")
    print(f"{'='*80}")
    
    result = answer_question(q, k=3)
    print(result["answer"])
    print(f"\n(Retrieved {result['retrieved_docs']} documents)")

## Test 6: Embeddings Test

In [None]:
from rag_app.embeddings import get_embeddings

embeddings = get_embeddings()

# Test embedding a query
query = "What is machine learning?"
embedding = embeddings.embed_query(query)

print(f"Query: {query}")
print(f"Embedding dimension: {len(embedding)}")
print(f"First 5 values: {embedding[:5]}")