In [1]:
# RAG Pipeline Implementation - Jupyter Notebook
# Run each cell sequentially to build and test the RAG system

In [3]:
# Install and Import Dependencies
"""
!pip install sentence-transformers scikit-learn beautifulsoup4 requests numpy transformers torch
"""

'\n!pip install sentence-transformers scikit-learn beautifulsoup4 requests numpy transformers torch\n'

In [4]:
import os
import requests
from bs4 import BeautifulSoup
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import re
from typing import List, Tuple, Dict, Optional
import logging
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch
import warnings
import time
warnings.filterwarnings('ignore')

In [5]:
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [6]:
print("✅ All dependencies imported successfully!")

✅ All dependencies imported successfully!


In [7]:
# RAG System Class Definition

In [8]:
class RAGSystem:
    def __init__(self, embedding_model_name: str = "sentence-transformers/all-MiniLM-L6-v2",
                 llm_model_name: str = "microsoft/DialoGPT-medium"):
        """
        Initialize RAG System with embedding model and local LLM
        
        Args:
            embedding_model_name: Name of the sentence transformer model for embeddings
            llm_model_name: Name of the local LLM model
        """
        self.embedding_model_name = embedding_model_name
        self.llm_model_name = llm_model_name
        self.chunks = []
        self.embeddings = None
        self.embedding_model = None
        self.llm_pipeline = None
        self.tokenizer = None
        self.source_url = ""
        
        print(f"🔧 Initializing RAG System...")
        print(f"📊 Embedding Model: {embedding_model_name}")
        print(f"🧠 LLM Model: {llm_model_name}")
        
        # Initialize models
        self._load_models()
    
    def _load_models(self):
        """Load embedding model and local LLM"""
        try:
            print(f"📥 Loading embedding model: {self.embedding_model_name}")
            self.embedding_model = SentenceTransformer(self.embedding_model_name)
            print("✅ Embedding model loaded successfully!")
            
            print(f"📥 Loading local LLM: {self.llm_model_name}")
            # Use a lightweight model for local inference
            device = 0 if torch.cuda.is_available() else -1
            self.llm_pipeline = pipeline(
                "text-generation",
                model=self.llm_model_name,
                device=device,
                max_new_tokens=150,
                do_sample=True,
                temperature=0.7,
                pad_token_id=50256
            )
            print("✅ Local LLM loaded successfully!")
            
        except Exception as e:
            print(f"❌ Error loading models: {str(e)}")
            # Fallback to simpler models
            self.embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
            print("⚠️ Using fallback embedding model")
    
    def load_web_content(self, url: str) -> str:
        """Load and extract text content from a web URL"""
        try:
            print(f"🌐 Loading content from: {url}")
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
            }
            
            response = requests.get(url, headers=headers, timeout=30)
            response.raise_for_status()
            
            soup = BeautifulSoup(response.content, 'html.parser')
            
            # Remove script and style elements
            for script in soup(["script", "style"]):
                script.decompose()
            
            # Extract text
            text = soup.get_text()
            
            # Clean up text
            lines = (line.strip() for line in text.splitlines())
            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
            text = ' '.join(chunk for chunk in chunks if chunk)
            
            self.source_url = url
            print(f"✅ Successfully loaded {len(text)} characters from URL")
            return text
            
        except Exception as e:
            print(f"❌ Error loading web content: {str(e)}")
            raise
    
    def split_text_into_chunks(self, text: str, chunk_size: int = 500, overlap: int = 50) -> List[str]:
        """Split text into overlapping chunks"""
        print(f"✂️ Splitting text into chunks (size: {chunk_size}, overlap: {overlap})")
        
        # Clean text
        text = re.sub(r'\s+', ' ', text).strip()
        
        chunks = []
        start = 0
        
        while start < len(text):
            end = start + chunk_size
            
            # Try to end at a sentence boundary
            if end < len(text):
                sentence_end = max(
                    text.rfind('.', start, end),
                    text.rfind('!', start, end),
                    text.rfind('?', start, end)
                )
                
                if sentence_end > start + chunk_size // 2:
                    end = sentence_end + 1
            
            chunk = text[start:end].strip()
            if chunk:
                chunks.append(chunk)
            
            start = max(start + chunk_size - overlap, end)
        
        self.chunks = chunks
        print(f"✅ Created {len(chunks)} chunks")
        return chunks
    
    def create_embeddings(self) -> np.ndarray:
        """Create embeddings for all chunks"""
        if not self.chunks:
            raise ValueError("No chunks available. Please load and split text first.")
        
        print("🔢 Creating embeddings for chunks...")
        self.embeddings = self.embedding_model.encode(self.chunks, show_progress_bar=True)
        print(f"✅ Created embeddings with shape: {self.embeddings.shape}")
        return self.embeddings
    
    def retrieve_relevant_chunks(self, query: str, top_k: int = 3) -> List[Tuple[str, float, int]]:
        """
        Retrieve most relevant chunks using cosine similarity
        
        Method Explanation:
        1. Convert query to embedding using same model as chunks
        2. Calculate cosine similarity between query and all chunk embeddings
        3. Cosine similarity measures angle between vectors (0-1, higher = more similar)
        4. Select top-k chunks with highest similarity scores
        """
        if self.embeddings is None:
            raise ValueError("Embeddings not created. Please create embeddings first.")
        
        print(f"🔍 Retrieving relevant chunks for query: '{query}'")
        print(f"📊 Using cosine similarity for vector search")
        
        # Create query embedding
        query_embedding = self.embedding_model.encode([query])
        
        # Calculate cosine similarities
        similarities = cosine_similarity(query_embedding, self.embeddings)[0]
        
        # Get top-k most similar chunks
        top_indices = np.argsort(similarities)[::-1][:top_k]
        
        relevant_chunks = []
        for idx in top_indices:
            relevant_chunks.append((
                self.chunks[idx],
                float(similarities[idx]),
                int(idx)
            ))
        
        print(f"✅ Retrieved {len(relevant_chunks)} relevant chunks")
        print(f"📈 Similarity scores: {[f'{score:.3f}' for _, score, _ in relevant_chunks]}")
        return relevant_chunks
    
    def extract_answer_from_chunks(self, query: str, relevant_chunks: List[Tuple[str, float, int]], 
                                 similarity_threshold: float = 0.2) -> Dict:
        """Extract answer from relevant chunks using content analysis"""
        print("🧠 Extracting answer from chunks using content analysis...")
        
        # Filter chunks by similarity threshold
        filtered_chunks = [(chunk, score, idx) for chunk, score, idx in relevant_chunks 
                          if score >= similarity_threshold]
        
        if not filtered_chunks:
            return {
                "answer": "❌ I couldn't find relevant information to answer your question in the provided content. The similarity scores were too low, suggesting the content may not contain information related to your query.",
                "chunks_used": [],
                "method": "Content Analysis - No relevant chunks found above similarity threshold",
                "similarity_scores": [],
                "confidence": 0.0
            }
        
        # Content analysis approach
        query_words = set(query.lower().split())
        query_words = {word for word in query_words if len(word) > 2}  # Filter short words
        
        best_chunk = filtered_chunks[0]  # Highest similarity chunk
        answer_text = best_chunk[0]
        
        # Extract sentences that contain query keywords
        sentences = re.split(r'[.!?]+', answer_text)
        relevant_sentences = []
        
        for sentence in sentences:
            sentence = sentence.strip()
            if sentence:
                sentence_words = set(sentence.lower().split())
                # Check if sentence contains query keywords
                if query_words.intersection(sentence_words):
                    relevant_sentences.append(sentence)
        
        if relevant_sentences:
            answer = '. '.join(relevant_sentences[:2])  # Take first 2 relevant sentences
            confidence = filtered_chunks[0][1]  # Use highest similarity as confidence
        else:
            # Fallback to first part of best chunk
            answer = answer_text[:400] + "..." if len(answer_text) > 400 else answer_text
            confidence = filtered_chunks[0][1] * 0.8  # Lower confidence for fallback
        
        return {
            "answer": answer,
            "chunks_used": [{"chunk_index": idx, "chunk_text": chunk, "similarity_score": score}
                           for chunk, score, idx in filtered_chunks],
            "method": "Content Analysis - Keyword matching and sentence extraction from most similar chunks",
            "similarity_scores": [score for _, score, _ in filtered_chunks],
            "confidence": confidence,
            "source_url": self.source_url
        }
    
    def generate_llm_answer(self, query: str, context: str) -> str:
        """Generate answer using local LLM"""
        try:
            print("🧠 Generating answer using local LLM...")
            prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"
            
            if self.llm_pipeline:
                response = self.llm_pipeline(prompt, max_new_tokens=100, do_sample=True, temperature=0.7)
                generated_text = response[0]['generated_text']
                
                # Extract only the answer part
                answer_start = generated_text.find("Answer:") + len("Answer:")
                answer = generated_text[answer_start:].strip()
                
                return answer if answer else "Unable to generate answer with local LLM."
            else:
                return "Local LLM not available."
                
        except Exception as e:
            print(f"❌ Error generating LLM answer: {str(e)}")
            return "Error occurred while generating answer with local LLM."
    
    def query(self, question: str, use_llm: bool = False, top_k: int = 3) -> Dict:
        """Main query method that orchestrates the RAG pipeline"""
        print(f"\n{'='*60}")
        print(f"🔍 PROCESSING QUERY: {question}")
        print(f"{'='*60}")
        
        try:
            start_time = time.time()
            
            # Retrieve relevant chunks
            relevant_chunks = self.retrieve_relevant_chunks(question, top_k=top_k)
            
            if not use_llm:
                # Extract answer without LLM
                result = self.extract_answer_from_chunks(question, relevant_chunks)
                result["llm_used"] = False
            else:
                # Use local LLM for answer generation
                context = " ".join([chunk for chunk, _, _ in relevant_chunks[:2]])
                llm_answer = self.generate_llm_answer(question, context)
                
                result = {
                    "answer": llm_answer,
                    "chunks_used": [{"chunk_index": idx, "chunk_text": chunk, "similarity_score": score}
                                   for chunk, score, idx in relevant_chunks],
                    "method": "Local LLM generation with retrieved context",
                    "similarity_scores": [score for _, score, _ in relevant_chunks],
                    "confidence": max([score for _, score, _ in relevant_chunks]) if relevant_chunks else 0.0,
                    "llm_used": True,
                    "source_url": self.source_url
                }
            
            result["query"] = question
            result["processing_time"] = time.time() - start_time
            
            print(f"✅ Query processed in {result['processing_time']:.2f} seconds")
            return result
            
        except Exception as e:
            print(f"❌ Error processing query: {str(e)}")
            return {
                "answer": f"Error processing query: {str(e)}",
                "chunks_used": [],
                "method": "Error occurred",
                "similarity_scores": [],
                "query": question,
                "confidence": 0.0,
                "llm_used": use_llm,
                "processing_time": 0.0
            }

print("✅ RAG System class defined successfully!")

✅ RAG System class defined successfully!


In [9]:
 # Initialize RAG System

In [10]:
print("🚀 Initializing RAG System...")
rag = RAGSystem()

INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda:0
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


🚀 Initializing RAG System...
🔧 Initializing RAG System...
📊 Embedding Model: sentence-transformers/all-MiniLM-L6-v2
🧠 LLM Model: microsoft/DialoGPT-medium
📥 Loading embedding model: sentence-transformers/all-MiniLM-L6-v2
✅ Embedding model loaded successfully!
📥 Loading local LLM: microsoft/DialoGPT-medium


Device set to use cuda:0


✅ Local LLM loaded successfully!


In [11]:
# Load and Process Content
# # Choose one of the sample URLs or provide your own

In [13]:
sample_urls = [
    "https://genai.owasp.org/initiatives/",
    "https://www.stanford.edu/research/",
    "https://openai.com/about/"
]

In [14]:
# Select URL (change index to try different URLs)
selected_url = sample_urls[0]  # Change to 1 or 2 for other URLs
print(f"🌐 Selected URL: {selected_url}")

🌐 Selected URL: https://genai.owasp.org/initiatives/


In [15]:
# Load content
try:
    content = rag.load_web_content(selected_url)
    print(f"📄 Content preview (first 300 chars):\n{content[:300]}...")
    
    # Split into chunks
    chunks = rag.split_text_into_chunks(content, chunk_size=500, overlap=50)
    print(f"\n📝 Sample chunks:")
    for i, chunk in enumerate(chunks[:3]):
        print(f"\nChunk {i+1}:")
        print(f"{chunk[:150]}...")
    
    # Create embeddings
    embeddings = rag.create_embeddings()
    print(f"\n🔢 Embeddings created successfully!")
    print(f"Shape: {embeddings.shape}")
    print(f"Sample embedding (first 10 values): {embeddings[0][:10]}")
    
except Exception as e:
    print(f"❌ Error: {e}")

🌐 Loading content from: https://genai.owasp.org/initiatives/
✅ Successfully loaded 6267 characters from URL
📄 Content preview (first 300 chars):
Initiatives - OWASP Gen AI Security Project Skip to content GETTING STARTED Introduction LEARNING MEETINGS RESOURCES CONTRIBUTING GLOSSARY INITIATIVES LLM TOP 10 LLM TOP 10 FOR 2025 LLM TOP 10 FOR 2023/24 AI Security Landscape GOVERNANCE CHECKLIST Threat Intelligence AGENTIC APP SECURITY Secure AI A...
✂️ Splitting text into chunks (size: 500, overlap: 50)
✅ Created 14 chunks

📝 Sample chunks:

Chunk 1:
Initiatives - OWASP Gen AI Security Project Skip to content GETTING STARTED Introduction LEARNING MEETINGS RESOURCES CONTRIBUTING GLOSSARY INITIATIVES...

Chunk 2:
NDUSTRY RECOGNITION Governance CONTACT BRANDING T10 FOR GEN AI Project Initiatives Initiatives The goal of initiatives within the project are to addre...

Chunk 3:
lligence Secure Gen AI Adoption Risk and Data Gathering AI Red Teaming and Eval Agentic Application Security AI Cyber Th

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

✅ Created embeddings with shape: (14, 384)

🔢 Embeddings created successfully!
Shape: (14, 384)
Sample embedding (first 10 values): [-0.00336468 -0.04504144  0.01070712  0.0016239   0.06725973  0.02016295
  0.05085123 -0.0131855  -0.03454284  0.01050522]


In [16]:
# Test Queries - Without LLM

In [17]:
print("\n" + "="*80)
print("🧪 TESTING QUERIES WITHOUT LLM")
print("="*80)

test_queries = [
    "What are the main initiatives mentioned?",
    "What topics are covered in this content?",
    "What are the key focus areas?",
    "Tell me about the research mentioned"
]

for i, query in enumerate(test_queries, 1):
    print(f"\n{'='*60}")
    print(f"Query {i}: {query}")
    print('='*60)
    
    result = rag.query(query, use_llm=False, top_k=3)
    
    print(f"🤖 ANSWER:")
    print(f"{result['answer']}")
    print(f"\n📊 METADATA:")
    print(f"Method: {result['method']}")
    print(f"Confidence: {result['confidence']:.3f}")
    print(f"Processing Time: {result['processing_time']:.2f}s")
    print(f"Chunks Used: {len(result['chunks_used'])}")
    
    print(f"\n📄 CHUNKS USED:")
    for j, chunk_info in enumerate(result['chunks_used'][:2]):  # Show first 2 chunks
        print(f"Chunk {j+1} (Index: {chunk_info['chunk_index']}, Similarity: {chunk_info['similarity_score']:.3f}):")
        print(f"{chunk_info['chunk_text'][:200]}...")
        print()



🧪 TESTING QUERIES WITHOUT LLM

Query 1: What are the main initiatives mentioned?

🔍 PROCESSING QUERY: What are the main initiatives mentioned?
🔍 Retrieving relevant chunks for query: 'What are the main initiatives mentioned?'
📊 Using cosine similarity for vector search


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

✅ Retrieved 3 relevant chunks
📈 Similarity scores: ['0.564', '0.413', '0.409']
🧠 Extracting answer from chunks using content analysis...
✅ Query processed in 0.17 seconds
🤖 ANSWER:
Initiatives - OWASP Gen AI Security Project Skip to content GETTING STARTED Introduction LEARNING MEETINGS RESOURCES CONTRIBUTING GLOSSARY INITIATIVES LLM TOP 10 LLM TOP 10 FOR 2025 LLM TOP 10 FOR 2023/24 AI Security Landscape GOVERNANCE CHECKLIST Threat Intelligence AGENTIC APP SECURITY Secure AI Adoption AI Red Teaming Data Security PROJECT Mission and Charter ROADMAP LEADERSHIP CONTRIBUTORS SPONSORS SUPPORTERS SPONSORSHIP NEWSLETTER OWASP PROJECT PAGE PROJECT WIKI BLOG ABOUT EVENTS NEWSROOM I

📊 METADATA:
Method: Content Analysis - Keyword matching and sentence extraction from most similar chunks
Confidence: 0.564
Processing Time: 0.17s
Chunks Used: 3

📄 CHUNKS USED:
Chunk 1 (Index: 0, Similarity: 0.564):
Initiatives - OWASP Gen AI Security Project Skip to content GETTING STARTED Introduction LEARNING MEE

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

✅ Retrieved 3 relevant chunks
📈 Similarity scores: ['0.333', '0.266', '0.266']
🧠 Extracting answer from chunks using content analysis...
✅ Query processed in 0.06 seconds
🤖 ANSWER:
Initiatives - OWASP Gen AI Security Project Skip to content GETTING STARTED Introduction LEARNING MEETINGS RESOURCES CONTRIBUTING GLOSSARY INITIATIVES LLM TOP 10 LLM TOP 10 FOR 2025 LLM TOP 10 FOR 2023/24 AI Security Landscape GOVERNANCE CHECKLIST Threat Intelligence AGENTIC APP SECURITY Secure AI Adoption AI Red Teaming Data Security PROJECT Mission and Charter ROADMAP LEADERSHIP CONTRIBUTORS SPO...

📊 METADATA:
Method: Content Analysis - Keyword matching and sentence extraction from most similar chunks
Confidence: 0.267
Processing Time: 0.06s
Chunks Used: 3

📄 CHUNKS USED:
Chunk 1 (Index: 0, Similarity: 0.333):
Initiatives - OWASP Gen AI Security Project Skip to content GETTING STARTED Introduction LEARNING MEETINGS RESOURCES CONTRIBUTING GLOSSARY INITIATIVES LLM TOP 10 LLM TOP 10 FOR 2025 LLM TOP 10 FOR 2

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

✅ Retrieved 3 relevant chunks
📈 Similarity scores: ['0.274', '0.215', '0.202']
🧠 Extracting answer from chunks using content analysis...
✅ Query processed in 0.04 seconds
🤖 ANSWER:
Initiatives - OWASP Gen AI Security Project Skip to content GETTING STARTED Introduction LEARNING MEETINGS RESOURCES CONTRIBUTING GLOSSARY INITIATIVES LLM TOP 10 LLM TOP 10 FOR 2025 LLM TOP 10 FOR 2023/24 AI Security Landscape GOVERNANCE CHECKLIST Threat Intelligence AGENTIC APP SECURITY Secure AI Adoption AI Red Teaming Data Security PROJECT Mission and Charter ROADMAP LEADERSHIP CONTRIBUTORS SPO...

📊 METADATA:
Method: Content Analysis - Keyword matching and sentence extraction from most similar chunks
Confidence: 0.219
Processing Time: 0.04s
Chunks Used: 3

📄 CHUNKS USED:
Chunk 1 (Index: 0, Similarity: 0.274):
Initiatives - OWASP Gen AI Security Project Skip to content GETTING STARTED Introduction LEARNING MEETINGS RESOURCES CONTRIBUTING GLOSSARY INITIATIVES LLM TOP 10 LLM TOP 10 FOR 2025 LLM TOP 10 FOR 2

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

✅ Retrieved 3 relevant chunks
📈 Similarity scores: ['0.270', '0.234', '0.218']
🧠 Extracting answer from chunks using content analysis...
✅ Query processed in 0.05 seconds
🤖 ANSWER:
Research Initiative: AI Red Teaming & Evaluation Red Teaming: The Power of Adversarial Thinking in AI Security (AI hackers, tech wizards, and code sorcerers, we need you. Research Initiative – Securing and Scrutinizing LLMS in Exploit Generation Challenge Currently limited actionable data exists in understanding how different LLMS are being leveraged in exploit generation, and what mechanisms can be used to

📊 METADATA:
Method: Content Analysis - Keyword matching and sentence extraction from most similar chunks
Confidence: 0.270
Processing Time: 0.05s
Chunks Used: 3

📄 CHUNKS USED:
Chunk 1 (Index: 11, Similarity: 0.270):
. Research Initiative: AI Red Teaming & Evaluation Red Teaming: The Power of Adversarial Thinking in AI Security (AI hackers, tech wizards, and code sorcerers, we need you!) This is your inv

In [18]:
# Test Queries - With LLM

In [19]:
print("\n" + "="*80)
print("🧠 TESTING QUERIES WITH LOCAL LLM")
print("="*80)

for i, query in enumerate(test_queries[:2], 1):  # Test first 2 queries with LLM
    print(f"\n{'='*60}")
    print(f"LLM Query {i}: {query}")
    print('='*60)
    
    result = rag.query(query, use_llm=True, top_k=3)
    
    print(f"🤖 LLM ANSWER:")
    print(f"{result['answer']}")
    print(f"\n📊 METADATA:")
    print(f"Method: {result['method']}")
    print(f"Confidence: {result['confidence']:.3f}")
    print(f"Processing Time: {result['processing_time']:.2f}s")
    print(f"Chunks Used: {len(result['chunks_used'])}")


🧠 TESTING QUERIES WITH LOCAL LLM

LLM Query 1: What are the main initiatives mentioned?

🔍 PROCESSING QUERY: What are the main initiatives mentioned?
🔍 Retrieving relevant chunks for query: 'What are the main initiatives mentioned?'
📊 Using cosine similarity for vector search


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

✅ Retrieved 3 relevant chunks
📈 Similarity scores: ['0.564', '0.413', '0.409']
🧠 Generating answer using local LLM...
✅ Query processed in 0.55 seconds
🤖 LLM ANSWER:
Unable to generate answer with local LLM.

📊 METADATA:
Method: Local LLM generation with retrieved context
Confidence: 0.564
Processing Time: 0.55s
Chunks Used: 3

LLM Query 2: What topics are covered in this content?

🔍 PROCESSING QUERY: What topics are covered in this content?
🔍 Retrieving relevant chunks for query: 'What topics are covered in this content?'
📊 Using cosine similarity for vector search


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

✅ Retrieved 3 relevant chunks
📈 Similarity scores: ['0.333', '0.266', '0.266']
🧠 Generating answer using local LLM...
✅ Query processed in 0.12 seconds
🤖 LLM ANSWER:
Unable to generate answer with local LLM.

📊 METADATA:
Method: Local LLM generation with retrieved context
Confidence: 0.333
Processing Time: 0.12s
Chunks Used: 3


In [20]:
# Interactive Query Interface

In [21]:
def interactive_query():
    """Interactive interface for testing queries"""
    print("\n" + "="*80)
    print("🎯 INTERACTIVE QUERY INTERFACE")
    print("="*80)
    print("Enter your questions below (type 'quit' to exit)")
    
    while True:
        try:
            user_query = input("\n❓ Your question: ").strip()
            
            if user_query.lower() in ['quit', 'exit', 'q']:
                print("👋 Goodbye!")
                break
            
            if not user_query:
                continue
                
            # Ask for LLM preference
            use_llm_input = input("🧠 Use local LLM? (y/n, default=n): ").strip().lower()
            use_llm = use_llm_input in ['y', 'yes', '1', 'true']
            
            # Process query
            result = rag.query(user_query, use_llm=use_llm, top_k=3)
            
            print(f"\n🤖 ANSWER:")
            print(f"{result['answer']}")
            
            print(f"\n📊 DETAILS:")
            print(f"Method: {result['method']}")
            print(f"Confidence: {result['confidence']:.3f}")
            print(f"LLM Used: {'Yes' if result['llm_used'] else 'No'}")
            print(f"Chunks Used: {len(result['chunks_used'])}")
            
            # Show chunk sources
            if result['chunks_used']:
                print(f"\n📄 SOURCE CHUNKS:")
                for i, chunk_info in enumerate(result['chunks_used'][:2]):
                    print(f"  {i+1}. Similarity: {chunk_info['similarity_score']:.3f}")
                    print(f"     Preview: {chunk_info['chunk_text'][:100]}...")
            
        except KeyboardInterrupt:
            print("\n👋 Goodbye!")
            break
        except Exception as e:
            print(f"❌ Error: {e}")

In [None]:
 interactive_query()