In [None]:
import sys
sys.path.append('..')

import torch
from src.config import settings
from src.models import TutorLLM
from src.ingestion import VectorStoreManager
from src.rag import create_retriever, RAGChain

print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")

## 1. Test Document Ingestion

In [None]:
from src.ingestion import load_documents, chunk_documents, get_document_stats

# Load sample documents (update paths as needed)
docs = load_documents(
    source_paths=["../data/raw/sample.pdf"],  # Replace with actual PDF
    subject="ML"
)

# Get stats
stats = get_document_stats(docs)
print(f"Documents loaded: {stats['total_documents']}")
print(f"Total characters: {stats['total_characters']:,}")

# Chunk documents
chunks = chunk_documents(docs)
print(f"\nChunks created: {len(chunks)}")
print(f"\nSample chunk:\n{chunks[0].page_content[:300]}...")

## 2. Build Vector Store

In [None]:
# Initialize vector store
vectorstore_manager = VectorStoreManager(collection_name="test_tutor")

# Build from chunks
vectorstore_manager.build_vectorstore(chunks)

# Get stats
vs_stats = vectorstore_manager.get_stats()
print(f"Vector store stats:")
for key, value in vs_stats.items():
    print(f"  {key}: {value}")

## 3. Test Retrieval

In [None]:
# Test similarity search
query = "What is gradient descent?"
results = vectorstore_manager.similarity_search(query, k=3)

print(f"Query: {query}\n")
for i, doc in enumerate(results, 1):
    print(f"Result {i}:")
    print(f"  Source: {doc.metadata.get('source_file', 'Unknown')}")
    print(f"  Page: {doc.metadata.get('page', 'N/A')}")
    print(f"  Content: {doc.page_content[:200]}...\n")

## 4. Load LLM

In [None]:
# Load model (this will take a few minutes on first run)
print("Loading LLM...")
llm = TutorLLM(use_quantization=True)

# Print model info
info = llm.get_info()
print("\nModel Info:")
for key, value in info.items():
    print(f"  {key}: {value}")

## 5. Test RAG Pipeline

In [None]:
# Create retriever
retriever = create_retriever(vectorstore_manager, k=5)

# Create RAG chain
rag_chain = RAGChain(
    llm=llm,
    retriever=retriever,
    subject="ML"
)

print("RAG chain ready!")

In [None]:
# Ask a question
question = "Explain overfitting and how to prevent it"

result = rag_chain.ask(question)

print(f"Question: {question}\n")
print(f"Answer:\n{result['answer']}\n")
print(f"Inference time: {result['inference_time']}s")
print(f"Sources used: {result['num_sources']}")

if result.get('sources'):
    print("\nSource citations:")
    for i, source in enumerate(result['sources'], 1):
        print(f"  {i}. {source['source']} (Page {source['page']})")

## 6. Test Practice Question Generation

In [None]:
from src.rag import PracticeQuestionGenerator

generator = PracticeQuestionGenerator(
    llm=llm,
    retriever=retriever,
    subject="ML"
)

questions = generator.generate(
    topic="Neural Networks",
    num_questions=3
)

print(questions['questions'])

## 7. Evaluate Multiple Questions

In [None]:
test_questions = [
    "What is the difference between supervised and unsupervised learning?",
    "Explain backpropagation in simple terms",
    "What is regularization and why is it important?"
]

for i, q in enumerate(test_questions, 1):
    print(f"\n{'='*60}")
    print(f"Question {i}: {q}")
    print(f"{'='*60}")
    
    result = rag_chain.ask(q)
    print(f"\n{result['answer']}")
    print(f"\n[Time: {result['inference_time']}s, Sources: {result['num_sources']}]")

## 8. Monitor VRAM Usage

In [None]:
if torch.cuda.is_available():
    print(f"VRAM Allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
    print(f"VRAM Reserved: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")
    print(f"Max VRAM Used: {torch.cuda.max_memory_allocated() / 1024**3:.2f} GB")
    
    # Clear cache
    torch.cuda.empty_cache()
    print("\nCache cleared")
    print(f"VRAM Allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")

## 9. Experiment with Different Subjects

In [None]:
# Create RAG chains for different subjects
subjects = ["ML", "DL", "DSA"]

for subject in subjects:
    chain = RAGChain(llm=llm, retriever=retriever, subject=subject)
    result = chain.ask(f"Give me a key concept in {subject}")
    
    print(f"\n{subject}: {result['answer'][:200]}...")

## 10. Cleanup

In [None]:
# Clear memory
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    
print("Cleanup complete!")