# 🦜⛓️ RAG with LangChain Demo

This notebook demonstrates building a robust RAG system using LangChain framework:
- **🦜⛓️ LangChain**: Industry-standard framework for LLM applications
- **📄 Document Processing**: Advanced text splitters and loaders
- **🔢 Vector Embeddings**: Ollama integration with mxbai-embed-large model
- **🗂️ Vector Storage**: Qdrant vector store integration, Qdrant Dashboard: http://localhost:6333/dashboard
- **🤖 LLM Integration**: Ollama LLM with conversation chains
- **🔍 Phoenix Tracing**: End-to-end observability and debugging, Phoenix Dashboard: http://localhost:6006
- **💬 Chat Interface**: Conversational RAG with memory


## 📖 **Sample Document**
We'll use a comprehensive document about "Vector stores and modern data analytics" as our knowledge base.


## 1. Environment Setup and Dependencies

In [None]:
# Enhanced Phoenix setup with full LangChain and Ollama instrumentation
phoenix_initialized = False
try:
    import phoenix as px
    from phoenix.otel import register
    
    print("🔍 Setting up comprehensive Phoenix tracing...")
    
    # Configure Phoenix tracing endpoint
    phoenix_endpoint = "http://phoenix:4317"
    
    # Register Phoenix tracer for RAG project
    tracer_provider = register(
        project_name="langchain-rag-demo",
        endpoint=phoenix_endpoint,
        auto_instrument=True,
    )
    
    print("✅ Phoenix tracer registered for LangChain RAG demo")
    print(f"📡 Traces will be sent to: {phoenix_endpoint}")
    print(f"🌐 Phoenix Dashboard: http://localhost:6006")
    phoenix_initialized = True
    
except ImportError as e:
    print(f"❌ Phoenix instrumentation not available: {e}")
    print("💡 Make sure openinference-instrumentation-langchain is installed")
    phoenix_initialized = False
except Exception as e:
    print(f"❌ Phoenix setup failed: {e}")
    phoenix_initialized = False

print(f"\n🔧 RAG System Status:")
print(f"LangChain Framework: ✅ Enabled")
print(f"Vector Database: ✅ Enabled") 
print(f"LLM & Embeddings: ✅ Enabled")
print(f"Observability: {'✅ Enabled' if phoenix_initialized else '❌ Disabled'}")

if phoenix_initialized:
    print(f"\n📊 Phoenix Tracing Features:")
    print(f"  🔗 LangChain chains and agents")
    print(f"  🤖 Ollama LLM calls and embeddings")
    print(f"  📄 Document retrieval operations")
    print(f"  💭 Conversation memory tracking")
    print(f"  🧮 Token usage and latency metrics")

In [None]:
# Import Required Libraries and LangChain Components
print("📦 Importing required libraries...")

# Standard library imports
import time
import requests
from typing import List, Dict, Any

# OpenTelemetry for Phoenix tracing
from opentelemetry import trace

# LangChain Core Components
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

# LangChain Community Components  
from langchain_community.vectorstores import Qdrant

# LangChain Integration Components
from langchain_ollama import OllamaEmbeddings, OllamaLLM
from langchain_qdrant import QdrantVectorStore

print("✅ All required libraries imported successfully!")
print("🦜⛓️ LangChain components ready")
print("🔧 Text processing utilities loaded")
print("🤖 Ollama integrations available")
print("🗂️ Vector store connectors ready")

In [None]:
# Check service health and available models
import requests
print("🔍 RAG Service Health Check:\n")

services = {
    "Ollama LLM & Embeddings": "http://ollama:11434/api/tags",
    "Qdrant Vector Database": "http://qdrant:6333/",
    "Phoenix AI Observability": "http://phoenix:6006/health"
}

service_status = {}
for service, url in services.items():
    try:
        response = requests.get(url, timeout=5)
        if response.status_code == 200:
            print(f"✅ {service}: Healthy")
            service_status[service] = True
        else:
            print(f"⚠️ {service}: Responding but status {response.status_code}")
            service_status[service] = False
    except requests.exceptions.RequestException as e:
        print(f"❌ {service}: Not accessible ({e})")
        service_status[service] = False

print("\n🤖 Available Ollama Models:")
try:
    response = requests.get("http://ollama:11434/api/tags", timeout=5)
    if response.status_code == 200:
        models = response.json().get('models', [])
        available_models = []
        for model in models:
            model_name = model.get('name', 'Unknown')
            print(f"  📋 {model_name}")
            available_models.append(model_name)
        
        # Check for required models
        required_models = ['gemma3:4b', 'mxbai-embed-large:latest']
        models_ready = all(any(req in model for model in available_models) for req in required_models)
        print(f"\n🎯 Required models available: {'✅ Yes' if models_ready else '❌ No'}")
        
    else:
        print("  ⚠️ Could not retrieve model list")
        models_ready = False
except:
    print("  ❌ Ollama not accessible")
    models_ready = False

# Store global status for later use
SERVICES_READY = service_status.get("Ollama LLM & Embeddings", False) and \
                service_status.get("Qdrant Vector Database", False) and \
                models_ready

print(f"\n🚀 System Ready for RAG: {'✅ Yes' if SERVICES_READY else '❌ No'}")

## 2. Sample Document - Modern Data Analytics Knowledge Base

In [None]:
# Enhanced sample document about modern data analytics
SAMPLE_DOCUMENT = """
# Modern Data Analytics: A Comprehensive Guide

## Introduction to Data Analytics

Data analytics is the science of analyzing raw data to make conclusions about information. It involves applying algorithmic or mechanical processes to derive insights and encompasses a variety of techniques and methodologies used across industries to improve decision-making processes.

The field has evolved significantly with the advent of big data, cloud computing, and artificial intelligence, transforming how organizations extract value from their data assets.

## Key Components of Modern Data Analytics

### 1. Data Collection and Storage

Modern data analytics begins with robust data collection systems. Organizations typically use data lakes and data warehouses to store structured and unstructured data. Cloud-based solutions like Amazon S3, Google Cloud Storage, and Azure Data Lake have become popular choices for scalable storage solutions.

Data lakes allow for storing raw data in its native format until needed, while data warehouses provide structured, processed data optimized for analytical queries. The modern approach often combines both in a lakehouse architecture.

### 2. Data Processing and ETL

Extract, Transform, Load (ETL) processes are crucial for preparing data for analysis. Modern tools like Apache Spark, Apache Kafka, and cloud-based services enable real-time and batch processing of large datasets. These tools can handle petabytes of data across distributed systems.

ETL has evolved into ELT (Extract, Load, Transform) in many cloud environments, where raw data is loaded first and then transformed as needed, providing more flexibility and faster ingestion.

### 3. Machine Learning and AI Integration

Machine learning has become integral to modern data analytics. Techniques such as supervised learning, unsupervised learning, and reinforcement learning help identify patterns and make predictions. Popular frameworks include TensorFlow, PyTorch, and scikit-learn.

AutoML platforms are democratizing machine learning by automating model selection, hyperparameter tuning, and feature engineering, making advanced analytics accessible to non-experts.

### 4. Data Visualization and Business Intelligence

Effective data visualization transforms complex data into understandable insights. Tools like Tableau, Power BI, Apache Superset, and custom dashboards help stakeholders make data-driven decisions. Interactive visualizations enable exploration of data from multiple perspectives.

Modern BI tools incorporate AI-powered features like natural language queries, automated insights, and predictive analytics directly in the visualization layer.

### 5. Real-time Analytics and Stream Processing

Modern businesses require real-time insights to respond quickly to changing conditions. Stream processing frameworks like Apache Kafka, Apache Flink, and Apache Storm enable organizations to process and analyze data as it arrives.

Real-time analytics powers applications like fraud detection, recommendation engines, IoT monitoring, and dynamic pricing systems.

## Vector Databases and Similarity Search

Vector databases have emerged as a crucial component for AI-powered applications. They store high-dimensional vectors representing embeddings of text, images, or other data types. These databases enable similarity search, recommendation systems, and retrieval-augmented generation (RAG) applications.

Popular vector databases include:
- **Qdrant**: High-performance vector database with excellent API design and hybrid search capabilities
- **Pinecone**: Managed vector database service with easy scaling
- **Weaviate**: Open-source vector database with GraphQL interface and semantic search
- **Chroma**: Lightweight vector database perfect for AI applications and prototyping
- **Milvus**: Open-source vector database designed for scalable similarity search

Vector databases solve the challenge of finding semantically similar content, enabling applications like document search, image recognition, and recommendation systems.

## Applications of Modern Data Analytics

### Healthcare Analytics

In healthcare, data analytics helps improve patient outcomes through predictive modeling, drug discovery, and personalized treatment plans. Electronic health records (EHR) analysis can identify treatment patterns and potential health risks.

Applications include predicting patient readmissions, optimizing treatment protocols, drug interaction analysis, and population health management.

### Financial Services

Financial institutions use analytics for fraud detection, risk assessment, algorithmic trading, and customer behavior analysis. Real-time transaction monitoring helps prevent fraudulent activities.

Advanced applications include credit scoring, regulatory compliance monitoring, market risk analysis, and robo-advisory services for investment management.

### E-commerce and Retail

Retail analytics enables personalized recommendations, inventory optimization, price optimization, and customer segmentation. Understanding customer behavior helps improve conversion rates and customer satisfaction.

Modern retail analytics incorporates omnichannel data, real-time personalization, demand forecasting, and supply chain optimization.

### Manufacturing and IoT

In manufacturing, analytics help optimize production processes, predict equipment failures, and improve quality control. Internet of Things (IoT) sensors generate vast amounts of data that can be analyzed for operational insights.

Industry 4.0 initiatives leverage analytics for predictive maintenance, digital twins, supply chain visibility, and autonomous quality control systems.

## Best Practices for Data Analytics

### 1. Data Quality Management

Ensure data accuracy, completeness, and consistency through proper validation and cleaning processes. Poor data quality leads to unreliable insights and poor decision-making.

Implement data quality frameworks with automated monitoring, profiling, and cleansing processes throughout the data pipeline.

### 2. Security and Privacy

Implement robust security measures to protect sensitive data. Comply with regulations like GDPR, CCPA, and industry-specific requirements. Use encryption, access controls, and audit trails.

Privacy-preserving techniques like differential privacy, federated learning, and synthetic data generation are becoming increasingly important.

### 3. Scalability and Performance

Design systems that can scale with growing data volumes and user demands. Use distributed computing frameworks and cloud-native architectures for flexibility.

Consider serverless architectures, containerization, and microservices for elastic scaling and cost optimization.

### 4. Collaboration and Documentation

Foster collaboration between data scientists, analysts, and business stakeholders. Maintain proper documentation of data pipelines, models, and analytical processes.

Implement DataOps practices to improve collaboration, automate workflows, and ensure reproducible analytics.

## Future Trends in Data Analytics

The field of data analytics continues to evolve rapidly. Emerging trends include:

- **Automated machine learning (AutoML)** making AI more accessible to business users
- **Edge analytics** bringing computation closer to data sources for real-time processing
- **Explainable AI** providing transparency in model decisions and building trust
- **Quantum computing** potentially revolutionizing complex calculations and optimization
- **Real-time decision-making systems** powered by streaming analytics and AI
- **Augmented analytics** using AI to assist analysts in data preparation and insight discovery
- **Data mesh architectures** decentralizing data ownership and management
- **Synthetic data generation** for privacy-preserving analytics and model training

## Conclusion

Modern data analytics combines traditional statistical methods with cutting-edge technologies to extract value from data. Success requires the right combination of tools, processes, and skilled professionals. Organizations that effectively leverage data analytics gain competitive advantages through better decision-making, operational efficiency, and customer insights.

The future of data analytics lies in making advanced capabilities more accessible, automated, and integrated into business processes, while maintaining strong governance and ethical standards.
"""

print("📄 Enhanced Sample Document Loaded:")
print(f"📊 Document Length: {len(SAMPLE_DOCUMENT):,} characters")
print(f"📊 Word Count: ~{len(SAMPLE_DOCUMENT.split()):,} words")
print(f"📊 Lines: {len(SAMPLE_DOCUMENT.splitlines())}")

print("\n📖 Document Structure:")
lines = SAMPLE_DOCUMENT.split('\n')
headers = [line for line in lines if line.startswith('#')]
for header in headers[:10]:  # Show first 10 headers
    level = header.count('#')
    print(f"{'  ' * (level-1)}📌 {header.strip('#').strip()}")

print(f"\n📖 Document Preview:")
print("="*80)
print(SAMPLE_DOCUMENT[:500] + "...")
print("="*80)

## 4. Document Processing with LangChain Text Splitters

LangChain provides sophisticated text splitters that preserve semantic meaning and context.

In [None]:
# Create LangChain Document objects and split them
print("🔧 Processing document with LangChain text splitters...")

# Create a Document object
doc = Document(
    page_content=SAMPLE_DOCUMENT,
    metadata={
        "source": "modern_data_analytics_guide.md",
        "title": "Modern Data Analytics: A Comprehensive Guide",
        "author": "DataLab Playground",
        "created_at": "2025-01-31",
        "document_type": "knowledge_base"
    }
)

# Initialize RecursiveCharacterTextSplitter for intelligent chunking
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # Target chunk size
    chunk_overlap=200,  # Overlap between chunks to preserve context
    length_function=len,
    separators=[
        "\n\n",  # Paragraph breaks (highest priority)
        "\n",    # Line breaks
        " ",     # Word breaks
        ""       # Character breaks (fallback)
    ],
    add_start_index=True  # Add start index to metadata
)

# Split the document
documents = text_splitter.split_documents([doc])

print(f"\n📊 Document Processing Results:")
print(f"Original Document Length: {len(SAMPLE_DOCUMENT):,} characters")
print(f"Total Chunks Created: {len(documents)}")

# Analyze chunk statistics
chunk_lengths = [len(doc.page_content) for doc in documents]
print(f"Average Chunk Size: {sum(chunk_lengths) / len(chunk_lengths):.0f} characters")
print(f"Min Chunk Size: {min(chunk_lengths)} characters")
print(f"Max Chunk Size: {max(chunk_lengths)} characters")

# Display sample chunks with metadata
print(f"\n📖 Sample Chunks with Metadata:")
for i, doc in enumerate(documents[:3]):
    print(f"\n--- Chunk {i+1} ---")
    print(f"Length: {len(doc.page_content)} characters")
    print(f"Metadata: {doc.metadata}")
    print(f"Content Preview: {doc.page_content[:200]}...")
    
    # Show overlap with previous chunk
    if i > 0:
        prev_content = documents[i-1].page_content
        current_content = doc.page_content
        
        # Find overlapping text
        overlap_found = False
        for j in range(min(len(prev_content), 300), 0, -1):
            if prev_content[-j:] in current_content[:j+50]:
                print(f"Overlap detected: '{prev_content[-j:]}...'")
                overlap_found = True
                break
        
        if not overlap_found:
            print("No overlap detected")

print(f"\n✅ Document processing complete! Ready for vectorization.")

## 5. Vector Store Setup with LangChain-Qdrant Integration

We'll use LangChain's Qdrant integration for seamless vector operations.

In [None]:
# Initialize Ollama Embeddings with Phoenix Tracing
print("🔢 Setting up Ollama Embeddings...")

# Get tracer for embedding operations
tracer = trace.get_tracer(__name__)

with tracer.start_as_current_span("initialize_embeddings") as span:
    span.set_attribute("embedding.model", "mxbai-embed-large")
    span.set_attribute("embedding.base_url", "http://ollama:11434")
    
    embeddings = OllamaEmbeddings(
        base_url="http://ollama:11434",
        model="mxbai-embed-large:latest",
    )
    
    # Test embeddings
    try:
        print("🧪 Testing embeddings...")
        with tracer.start_as_current_span("test_embeddings") as test_span:
            test_text = "This is a test for embeddings"
            test_span.set_attribute("test.text", test_text)
            
            test_embedding = embeddings.embed_query(test_text)
            
            test_span.set_attribute("embedding.dimension", len(test_embedding))
            test_span.set_attribute("embedding.success", True)
            
            print(f"✅ Embeddings working! Dimension: {len(test_embedding)}")
            print(f"📊 Sample values: {test_embedding[:5]}...")
            
    except Exception as e:
        span.set_attribute("embedding.error", str(e))
        span.set_attribute("embedding.success", False)
        print(f"❌ Embeddings test failed: {e}")
        print("💡 Make sure Ollama service is running with mxbai-embed-large model")

print("🗂️ Setting up Qdrant vector store with LangChain...")

try:
    
    collection_name = "langchain_data_analytics"
    
    with tracer.start_as_current_span("connect_vector_store") as span:
        span.set_attribute("vector_store.type", "qdrant")
        span.set_attribute("vector_store.collection", collection_name)
        span.set_attribute("vector_store.url", "http://qdrant:6333")
        
        try:
            # LangChain's QdrantVectorStore automatically handles:
            # - Qdrant client connection
            # - Collection creation with correct vector dimensions
            # - Distance metric configuration (defaults to cosine)
            vector_store = QdrantVectorStore.from_existing_collection(
                embedding=embeddings,
                collection_name=collection_name,
                url="http://qdrant:6333"
            )
            
            span.set_attribute("vector_store.connection", "existing")
            span.set_attribute("vector_store.success", True)
            print(f"✅ Connected to existing collection '{collection_name}'")
            
        except Exception:
            # If collection doesn't exist, create it from documents
            span.set_attribute("vector_store.connection", "new")
            print(f"🆕 Creating new collection '{collection_name}' from documents...")
            
            try:
                # This will automatically:
                # - Create Qdrant client
                # - Determine vector size from embeddings
                # - Set up collection with optimal settings
                # - Index the provided documents
                vector_store = QdrantVectorStore.from_documents(
                    documents=[],  # Empty for now, we'll add documents later
                    embedding=embeddings,
                    url="http://qdrant:6333",
                    collection_name=collection_name,
                    prefer_grpc=False
                )
                span.set_attribute("vector_store.success", True)
                print(f"✅ Collection '{collection_name}' created successfully")
                
            except Exception as e:
                span.set_attribute("vector_store.error", str(e))
                span.set_attribute("vector_store.success", False)
                print(f"❌ Error creating vector store: {e}")
                print("💡 Make sure Qdrant service is running and accessible")
                vector_store = None
                
except Exception as e:
    print(f"❌ Error setting up vector store: {e}")
    vector_store = None

if vector_store:
    # Get collection stats using LangChain's built-in methods
    try:
        # Simple way to check collection status
        print(f"\n📋 Vector Store Ready:")
        print(f"  Collection: {collection_name}")
        print(f"  Embedding Model: mxbai-embed-large (1024 dims)")
        print(f"  Distance Metric: Cosine")
        print(f"  Status: ✅ Connected")
    except Exception as e:
        print(f"⚠️ Could not get collection details: {e}")


## 6. Document Indexing and Vector Storage

Add all document chunks to the vector store with LangChain's batch operations.

In [None]:
# Add documents to vector store

if vector_store:
    print("📚 Indexing documents in vector store...")
    
    try:
        # Check if documents are already in the vector store
        # Use similarity search to test if there are existing documents
        try:
            test_results = vector_store.similarity_search("test", k=1)
            current_count = len(test_results) if test_results else 0
            has_documents = current_count > 0
        except Exception:
            # If similarity search fails, assume empty collection
            has_documents = False
            current_count = 0
        
        if has_documents:
            print(f"📊 Found existing documents in collection")
            reindex = input("🔄 Re-index documents? (y/N): ").lower().strip() == 'y'
            if not reindex:
                print("⏭️ Skipping indexing - using existing documents")
            else:
                print("🗑️ Will overwrite with new documents")
        else:
            reindex = True
        
        if not has_documents or reindex:
            print(f"🚀 Indexing {len(documents)} document chunks...")
            
            # Add documents to vector store in batches
            batch_size = 5  # Process in small batches to avoid overwhelming the service
            for i in range(0, len(documents), batch_size):
                batch = documents[i:i + batch_size]
                
                print(f"📥 Processing batch {i//batch_size + 1}/{(len(documents)-1)//batch_size + 1}")
                
                # Add batch to vector store
                vector_store.add_documents(batch)
                
                # Small delay between batches
                time.sleep(1)
            
            # Verify indexing with a test search
            test_results = vector_store.similarity_search("data analytics", k=3)
            print(f"\n✅ Indexing complete!")
            print(f"📊 Test search returned {len(test_results)} results")
            
            # Show sample of what was indexed
            if test_results:
                print(f"\n📖 Sample indexed documents:")
                for i, doc in enumerate(test_results[:2]):
                    print(f"\n--- Document {i+1} ---")
                    print(f"Content: {doc.page_content[:150]}...")
                    print(f"Metadata: {doc.metadata}")
        
    except Exception as e:
        print(f"❌ Error during indexing: {e}")
        vector_store = None
else:
    print("❌ Vector store not available - skipping indexing")

print(f"\n🎯 Vector store ready: {'✅ Yes' if vector_store else '❌ No'}")

## 7. LangChain RAG Components - Retriever and Chains

Now we'll build the core RAG components using LangChain's modular architecture.

In [None]:
# Create LangChain retriever from vector store
if vector_store is not None:
    print("🔗 Creating LangChain RAG components...")
    
    # Create retriever with search configuration
    retriever = vector_store.as_retriever(
        search_type="similarity",
        search_kwargs={
            "k": 4,  # Number of documents to retrieve
        }
    )
    
    print("✅ Retriever created successfully")
    
    # Test retriever
    test_query = "What are vector databases used for?"
    try:
        retrieved_docs = retriever.invoke(test_query)  # Updated method name
        
        print(f"\n🧪 Retriever Test:")
        print(f"Query: '{test_query}'")
        print(f"Retrieved {len(retrieved_docs)} documents")
        
        for i, doc in enumerate(retrieved_docs[:2]):
            print(f"\n--- Document {i+1} ---")
            print(f"Content: {doc.page_content[:200]}...")
            print(f"Source: {doc.metadata.get('source', 'Unknown')}")
    
    except Exception as e:
        print(f"❌ Retriever test failed: {e}")
        print("💡 This might be normal if no documents are indexed yet")
    
else:
    print("❌ Vector store not available - cannot create retriever")
    retriever = None

In [None]:
# Create custom prompt template for RAG

from langchain.prompts import PromptTemplate
from langchain_ollama import OllamaLLM

# Initialize Ollama LLM
llm = OllamaLLM(
    base_url="http://ollama:11434",
    model="gemma3:4b",
    temperature=0.1
)

# Define a comprehensive prompt template
qa_prompt_template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer based on the context, just say that you don't know, don't try to make up an answer.

Context:
{context}

Question: {question}

Answer: Let me help you based on the information provided. """

qa_prompt = PromptTemplate(
    template=qa_prompt_template,
    input_variables=["context", "question"]
)

print("✅ Custom prompt template created")

# Create RetrievalQA chain
if retriever is not None and 'llm' in locals():
    print("🔗 Creating RetrievalQA chain...")
    
    try:
        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",  # Stuff all retrieved docs into the prompt
            retriever=retriever,
            return_source_documents=True,
            chain_type_kwargs={"prompt": qa_prompt}
        )
        
        print("✅ RetrievalQA chain created successfully")
        
        # Test the QA chain
        test_question = "What are vector databases used for and which one is the best in your opinion?"
        
        print(f"\n🧪 Testing RetrievalQA chain...")
        print(f"Question: '{test_question}'")
        
        try:
            result = qa_chain.invoke({"query": test_question})  # Updated method
            
            print(f"\n🤖 Answer:")
            print(result["result"])
            
            print(f"\n📚 Source Documents ({len(result['source_documents'])}):")
            for i, doc in enumerate(result["source_documents"]):
                print(f"\n--- Source {i+1} ---")
                print(f"Content: {doc.page_content[:200]}...")
                print(f"Metadata: {doc.metadata}")
        
        except Exception as e:
            print(f"❌ QA chain test failed: {e}")
            print("💡 Check if LLM is properly initialized and accessible")
            
    except Exception as e:
        print(f"❌ Failed to create QA chain: {e}")
        qa_chain = None
        
else:
    print("❌ Cannot create QA chain - missing retriever or LLM")
    print("💡 Make sure both retriever and llm variables are defined")
    qa_chain = None

## 8. Conversational RAG with Memory

Add conversation memory for multi-turn interactions.

In [None]:
# Create conversational RAG chain with memory

# Initialize Ollama LLM
from langchain_ollama import OllamaLLM

llm = OllamaLLM(
    base_url="http://ollama:11434",
    model="gemma3:4b",
    temperature=0.1,
    num_predict=8096,  # Maximum output tokens (increase for longer responses)
    num_ctx=4096,      # Context window size
    top_p=0.9,         # Nucleus sampling
    top_k=40,          # Top-k sampling
    repeat_penalty=1.1 # Reduce repetition
)

print("✅ Ollama LLM initialized")

if retriever is not None and 'llm' in locals():
    print("💭 Creating Conversational RAG chain with memory...")
    
    try:
        # Create conversation memory
        memory = ConversationBufferMemory(
            memory_key="chat_history",
            return_messages=True,
            output_key="answer"
        )
        
        # Create conversational retrieval chain
        conversational_qa = ConversationalRetrievalChain.from_llm(
            llm=llm,
            retriever=retriever,
            memory=memory,
            return_source_documents=True
        )
        
        print("✅ Conversational RAG chain created successfully")
        
        # Test conversational flow
        print(f"\n🧪 Testing Conversational RAG...")
        
        try:
            # First question
            q1 = "What are vector databases?"
            print(f"\n👤 Question 1: {q1}")
            
            result1 = conversational_qa.invoke({"question": q1})
            print(f"🤖 Answer 1: {result1['answer']}")
            
            # Follow-up question
            q2 = "How are they used in AI applications?"
            print(f"\n👤 Question 2: {q2}")
            
            result2 = conversational_qa.invoke({"question": q2})
            print(f"🤖 Answer 2: {result2['answer']}")
            
            # Check memory
            print(f"\n💭 Conversation Memory:")
            print(f"Messages in memory: {len(memory.chat_memory.messages)}")
            
            # Show source documents from last question
            if 'source_documents' in result2:
                print(f"\n📚 Sources for last answer ({len(result2['source_documents'])}):")
                for i, doc in enumerate(result2['source_documents'][:2]):
                    print(f"  📄 Source {i+1}: {doc.page_content}")
        
        except Exception as e:
            print(f"❌ Conversational RAG test failed: {e}")
            print("💡 Check if LLM is responding properly")
            
    except Exception as e:
        print(f"❌ Failed to create conversational chain: {e}")
        conversational_qa = None
        
else:
    print("❌ Cannot create conversational chain - missing components")
    print("💡 Make sure both retriever and llm variables are defined")
    conversational_qa = None

## 9. Advanced RAG Demo Functions

Interactive functions for testing the complete RAG system.

In [None]:
def ask_question(question: str, use_conversational: bool = False):
    """
    Ask a question using either simple QA or conversational RAG.
    """
    print(f"\n{'='*80}")
    print(f"❓ Question: {question}")
    print(f"{'='*80}")
    
    if use_conversational and conversational_qa is not None:
        print("🗣️ Using Conversational RAG...")
        result = conversational_qa({"question": question})
        answer = result["answer"]
        sources = result.get("source_documents", [])
    elif qa_chain is not None:
        print("🔍 Using Simple RAG...")
        result = qa_chain({"query": question})
        answer = result["result"]
        sources = result.get("source_documents", [])
    else:
        print("❌ RAG system not available")
        return
    
    print(f"\n🤖 Answer:")
    print(f"{answer}")
    
    if sources:
        print(f"\n📚 Sources ({len(sources)} documents):")
        for i, doc in enumerate(sources):
            print(f"\n--- Source {i+1} ---")
            print(f"Content: {doc.page_content[:250]}...")
            if hasattr(doc, 'metadata') and doc.metadata:
                print(f"Metadata: {doc.metadata}")

def test_rag_capabilities():
    """
    Test the RAG system with various types of questions.
    """
    print("🎯 Testing RAG System Capabilities")
    print("="*60)
    
    test_questions = [
        {
            "question": "What is data analytics?",
            "category": "Definition"
        },
        {
            "question": "What are the key components of modern data analytics?",
            "category": "Components"
        },
        {
            "question": "How do vector databases work with embeddings?",
            "category": "Technical"
        },
        {
            "question": "What are some applications in healthcare?",
            "category": "Application"
        },
        {
            "question": "What are the best practices for data quality?",
            "category": "Best Practices"
        }
    ]
    
    for i, test in enumerate(test_questions):
        print(f"\n🧪 Test {i+1}: {test['category']}")
        ask_question(test["question"])
        
        if i < len(test_questions) - 1:
            print(f"\n{'⏳ Moving to next test...'}")
            time.sleep(2)

def interactive_chat():
    """
    Interactive chat session with the RAG system.
    """
    print("💬 Interactive RAG Chat Session")
    print("="*60)
    print("Ask questions about data analytics!")
    print("Commands: 'quit' to exit, 'memory' to see conversation history")
    print("="*60)
    
    while True:
        try:
            question = input("\n👤 Your question: ").strip()
            
            if question.lower() == 'quit':
                print("👋 Goodbye! Thanks for using the RAG system.")
                break
            
            if question.lower() == 'memory':
                if conversational_qa and hasattr(conversational_qa, 'memory'):
                    print(f"\n💭 Conversation History:")
                    messages = conversational_qa.memory.chat_memory.messages
                    for i, msg in enumerate(messages[-6:]):  # Show last 6 messages
                        print(f"{i+1}. {type(msg).__name__}: {str(msg)[:100]}...")
                else:
                    print("💭 No conversation memory available")
                continue
            
            if not question:
                print("❓ Please enter a question")
                continue
            
            ask_question(question, use_conversational=True)
            
        except KeyboardInterrupt:
            print("\n👋 Goodbye!")
            break
        except Exception as e:
            print(f"❌ Error: {e}")

# Example usage
print("🎉 LangChain RAG System Ready!")
print("\n🚀 Available functions:")
print("1. ask_question('Your question here')")
print("2. test_rag_capabilities()")
print("3. interactive_chat()")

# Run a quick demonstration
print("\n🧪 Quick Demo:")
if qa_chain:
    ask_question("What are the future trends in data analytics?")
else:
    print("❌ RAG system not properly initialized")