In [7]:
"""
Complete RAG (Retrieval-Augmented Generation) System
Updated with LCEL (LangChain Expression Language) - Modern Approach
"""

from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_groq import ChatGroq
from langchain_community.chat_models import ChatOllama
import os
from dotenv import load_dotenv

load_dotenv()

def get_llm():
    """Initialize LLM based on environment configuration"""
    llm_type = os.getenv("LLM_TYPE", "groq")
    if llm_type == "groq":
        return ChatGroq(
            model="llama-3.1-8b-instant",
            temperature=0.3,
            max_tokens=500,
            groq_api_key=os.getenv("GROQ_API_KEY")
        )
    else:
        return ChatOllama(
            model="llama3.1",
            temperature=0.3
        )

# ==============================================================================
# STEP 1: Prepare Documents and Create Knowledge Base
# ==============================================================================

# Sample knowledge base about AI concepts
knowledge_base = [
    """
    Vector Databases and Embeddings:
    Vector databases store information as numerical vectors (embeddings) that 
    capture semantic meaning. When you search, your query is converted to a 
    vector and compared to stored vectors using similarity metrics like cosine 
    similarity. This enables semantic search - finding relevant information 
    based on meaning rather than exact keyword matches.
    """,
    """
    Retrieval-Augmented Generation (RAG):
    RAG enhances Large Language Models by retrieving relevant information from 
    a knowledge base before generating responses. The process involves three steps:
    1. Retrieve: Search for relevant documents using vector similarity
    2. Augment: Add retrieved context to the prompt
    3. Generate: The LLM creates an answer grounded in the retrieved information
    This approach reduces hallucinations and provides up-to-date, specific knowledge.
    """,
    """
    Chunking Strategies:
    Documents are split into chunks to fit within LLM context windows and improve 
    retrieval precision. Key considerations include:
    - Chunk size: Smaller chunks (100-500 tokens) for precise retrieval
    - Overlap: 10-20% overlap prevents losing context at boundaries
    - Separators: Use natural boundaries (paragraphs, sentences)
    - Metadata: Add source info, page numbers, timestamps for tracking
    """,
    """
    Embedding Models:
    Embeddings are dense vector representations of text that capture semantic meaning.
    Popular models include:
    - OpenAI text-embedding-ada-002: High quality, 1536 dimensions
    - Sentence-BERT (all-MiniLM-L6-v2): Fast, efficient, 384 dimensions
    - Instructor embeddings: Task-specific embeddings
    The choice depends on accuracy needs, speed requirements, and cost constraints.
    """,
    """
    Agentic AI Systems:
    AI agents are autonomous systems that can:
    - Perceive their environment through sensors or data
    - Make decisions using reasoning and planning
    - Take actions using tools (APIs, databases, search engines)
    - Learn from feedback to improve performance
    Agents can use RAG to access knowledge dynamically during task execution.
    """
]

# Create Document objects
documents = [
    Document(
        page_content=text.strip(),
        metadata={"source": f"ai_guide_{i}.txt", "section": i}
    )
    for i, text in enumerate(knowledge_base)
]

print("=" * 80)
print("STEP 1: Document Preparation")
print("=" * 80)
print(f"Created {len(documents)} documents")

# ==============================================================================
# STEP 2: Split Documents into Chunks
# ==============================================================================

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,      # Smaller chunks for more precise retrieval
    chunk_overlap=50,
    length_function=len,
    separators=["\n\n", "\n", ". ", " "]
)

split_documents = text_splitter.split_documents(documents)

print("\n" + "=" * 80)
print("STEP 2: Document Chunking")
print("=" * 80)
print(f"Split into {len(split_documents)} chunks")
for i, doc in enumerate(split_documents[:3]):
    print(f"\nChunk {i+1}: {doc.page_content[:100]}...")

# ==============================================================================
# STEP 3: Create Embeddings and Vector Store
# ==============================================================================
"""
We use HuggingFace embeddings (free, runs locally)
For production, consider: OpenAI embeddings (paid but higher quality)
"""

print("\n" + "=" * 80)
print("STEP 3: Creating Vector Store")
print("=" * 80)
print("Loading embedding model (this may take a moment)...")

# Initialize embedding model
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'},  # Use 'cuda' for GPU
    encode_kwargs={'normalize_embeddings': True}  # Important for cosine similarity
)

# Create vector store from documents
vectorstore = Chroma.from_documents(
    documents=split_documents,
    embedding=embeddings,
    collection_name="rag_knowledge_base",
    persist_directory="./chroma_rag_db"  # Save to disk
)

print("Vector store created successfully!")
print(f"Total vectors stored: {vectorstore._collection.count()}")

# ==============================================================================
# STEP 4: Test Retrieval
# ==============================================================================

print("\n" + "=" * 80)
print("STEP 4: Testing Retrieval")
print("=" * 80)

# Create a retriever with specific search parameters
retriever = vectorstore.as_retriever(
    search_type="similarity",  # or "mmr" for diverse results
    search_kwargs={"k": 3}     # Return top 3 results
)

# Test query
test_query = "How does RAG work?"
retrieved_docs = retriever.invoke(test_query)

print(f"\nQuery: '{test_query}'")
print(f"Retrieved {len(retrieved_docs)} documents:\n")

for i, doc in enumerate(retrieved_docs):
    print(f"{i+1}. {doc.page_content[:150]}...")
    print(f"   Source: {doc.metadata.get('source', 'unknown')}\n")

# ==============================================================================
# STEP 5: Build RAG Chain Using LCEL (Modern Approach)
# ==============================================================================
"""
LCEL (LangChain Expression Language) uses the | operator to chain components
This replaces the deprecated RetrievalQA.from_chain_type() method
"""

print("=" * 80)
print("STEP 5: Building RAG Chain with LCEL")
print("=" * 80)

# Define a custom prompt template
template = """You are a helpful AI assistant. Use the following context to answer the question.
If you cannot answer based on the context, say so.

Context:
{context}

Question: {question}

Answer: Let me help you understand this based on the provided information."""

prompt = ChatPromptTemplate.from_template(template)

print("\nPrompt template created:")
print(template)

# Helper function to format documents
def format_docs(docs):
    """Format retrieved documents into a single context string"""
    return "\n\n".join(doc.page_content for doc in docs)

# Build the RAG chain using LCEL (pipe operator |)
# This is the modern, recommended approach
rag_chain = (
    {
        "context": retriever | format_docs,  # Retrieve docs and format
        "question": RunnablePassthrough()     # Pass question through
    }
    | prompt                                  # Format into prompt
    | get_llm()                              # Send to LLM
    | StrOutputParser()                      # Parse output to string
)

print("\n✓ RAG chain created successfully using LCEL")
print("\nChain structure:")
print("  Input → {context: retriever → format, question: passthrough}")
print("       → Prompt → LLM → Output Parser → Final Answer")

# ==============================================================================
# STEP 6: Test the RAG Chain
# ==============================================================================

print("\n" + "=" * 80)
print("STEP 6: Testing RAG Chain")
print("=" * 80)

test_questions = [
    "What is RAG?",
    "What are the key considerations for chunking?",
    "What embedding models are mentioned?"
]

for question in test_questions:
    print(f"\nQuestion: {question}")
    print("-" * 80)
    try:
        answer = rag_chain.invoke(question)
        print(f"Answer: {answer}\n")
    except Exception as e:
        print(f"Error: {e}")
        print("Note: Make sure GROQ_API_KEY is set in your .env file\n")

# ==============================================================================
# STEP 7: RAG Chain with Source Documents
# ==============================================================================

print("=" * 80)
print("STEP 7: RAG Chain with Source Attribution")
print("=" * 80)

def rag_with_sources(question: str):
    """
    Enhanced RAG function that returns both answer and source documents
    This allows you to cite sources for the generated answer
    """
    # Step 1: Retrieve relevant documents
    docs = retriever.invoke(question)
    
    # Step 2: Format context from documents
    context = format_docs(docs)
    
    # Step 3: Create the formatted prompt
    formatted_prompt = prompt.format(context=context, question=question)
    
    # Step 4: Get answer from LLM
    llm = get_llm()
    response = llm.invoke(formatted_prompt)
    answer = response.content if hasattr(response, 'content') else str(response)
    
    return {
        "question": question,
        "answer": answer,
        "sources": docs,
        "num_sources": len(docs)
    }

# Test with source attribution
test_question = "Explain what AI agents can do"
print(f"\nQuestion: {test_question}")
print("-" * 80)

try:
    result = rag_with_sources(test_question)
    
    print(f"\nAnswer:\n{result['answer']}\n")
    print(f"Sources used ({result['num_sources']}):")
    print("-" * 80)
    for i, doc in enumerate(result['sources'], 1):
        print(f"\n[{i}] {doc.page_content[:150]}...")
        print(f"    From: {doc.metadata.get('source', 'unknown')}")
        print(f"    Section: {doc.metadata.get('section', 'N/A')}")
except Exception as e:
    print(f"Error: {e}")
    print("Make sure GROQ_API_KEY is set in your .env file")

# ==============================================================================
# STEP 8: Advanced RAG Chain with Streaming
# ==============================================================================

print("\n" + "=" * 80)
print("STEP 8: Streaming RAG Chain")
print("=" * 80)

# Streaming allows you to see the response as it's generated
def streaming_rag(question: str):
    """RAG with streaming output for better UX"""
    print(f"\nQuestion: {question}")
    print("-" * 80)
    print("Answer (streaming): ", end="", flush=True)
    
    try:
        # Stream the response token by token
        for chunk in rag_chain.stream(question):
            print(chunk, end="", flush=True)
        print("\n")
    except Exception as e:
        print(f"\nError: {e}")

# Test streaming
streaming_rag("What is the purpose of chunking in RAG systems?")

# ==============================================================================
# STEP 9: Advanced Retrieval Techniques
# ==============================================================================

print("\n" + "=" * 80)
print("STEP 9: Advanced Retrieval Options")
print("=" * 80)

# Option 1: MMR (Maximal Marginal Relevance) - diverse results
mmr_retriever = vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={
        "k": 3,              # Number of documents to return
        "fetch_k": 10,       # Number of candidates to consider
        "lambda_mult": 0.5   # Diversity (0=max diversity, 1=max relevance)
    }
)

# Create MMR-based RAG chain
rag_chain_mmr = (
    {"context": mmr_retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | get_llm()
    | StrOutputParser()
)

print("Created MMR-based RAG chain for diverse results")

# Option 2: Similarity threshold - only return docs above threshold
threshold_retriever = vectorstore.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={
        "score_threshold": 0.7,  # Only return docs with similarity > 0.7
        "k": 5
    }
)

# Create threshold-based RAG chain
rag_chain_threshold = (
    {"context": threshold_retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | get_llm()
    | StrOutputParser()
)

print("Created threshold-based RAG chain for high-quality matches only")

# Test different retrieval strategies
print("\n" + "=" * 80)
print("Comparing Retrieval Strategies")
print("=" * 80)

comparison_query = "Tell me about embeddings"

print(f"\nQuery: {comparison_query}\n")

# Standard similarity search
print("1. Standard Similarity Search:")
print("-" * 80)
standard_docs = retriever.invoke(comparison_query)
for i, doc in enumerate(standard_docs, 1):
    print(f"{i}. {doc.page_content[:80]}...")

# MMR search (more diverse)
print("\n2. MMR Search (Diverse Results):")
print("-" * 80)
mmr_docs = mmr_retriever.invoke(comparison_query)
for i, doc in enumerate(mmr_docs, 1):
    print(f"{i}. {doc.page_content[:80]}...")

# ==============================================================================
# Summary
# ==============================================================================

print("\n" + "=" * 80)
print("RAG SYSTEM SUMMARY")
print("=" * 80)
print(f"""
✓ Knowledge Base: {len(documents)} documents
✓ Chunks: {len(split_documents)} chunks
✓ Embedding Model: sentence-transformers/all-MiniLM-L6-v2
✓ Vector Store: ChromaDB (persistent)
✓ Retrieval Methods: Similarity, MMR, Threshold
✓ RAG Chain: Built with LCEL (modern approach)
✓ Features: Streaming, Source Attribution, Multiple Retrieval Strategies

Key LCEL Benefits:
• More composable and flexible than old chains
• Better streaming support for real-time responses
• Easier to debug and modify components
• Cleaner syntax with | operator
• Built-in async support for performance

Usage:
1. Set GROQ_API_KEY in .env file (or use Ollama)
2. Run: answer = rag_chain.invoke("Your question here")
3. For sources: result = rag_with_sources("Your question")
4. For streaming: rag_chain.stream("Your question")
""")

STEP 1: Document Preparation
Created 5 documents

STEP 2: Document Chunking
Split into 10 chunks

Chunk 1: Vector Databases and Embeddings:
    Vector databases store information as numerical vectors (embedd...

Chunk 2: similarity. This enables semantic search - finding relevant information 
    based on meaning rather...

Chunk 3: Retrieval-Augmented Generation (RAG):
    RAG enhances Large Language Models by retrieving relevant ...

STEP 3: Creating Vector Store
Loading embedding model (this may take a moment)...
Vector store created successfully!
Total vectors stored: 30

STEP 4: Testing Retrieval

Query: 'How does RAG work?'
Retrieved 3 documents:

1. - Learn from feedback to improve performance
    Agents can use RAG to access knowledge dynamically during task execution....
   Source: ai_guide_4.txt

2. - Learn from feedback to improve performance
    Agents can use RAG to access knowledge dynamically during task execution....
   Source: ai_guide_4.txt

3. - Learn from feedback t

  return ChatOllama(


Answer: Based on the context, it appears that RAG refers to a mechanism or system used by agents (likely artificial intelligence or machine learning models) to access knowledge dynamically during task execution. This allows them to learn from feedback and improve their performance over time.

In other words, RAG seems to be a way for intelligent systems to adapt and learn as they interact with their environment, using the information they gather to refine their understanding and behavior.


Question: What are the key considerations for chunking?
--------------------------------------------------------------------------------
Answer: According to the given context, the key considerations for chunking strategies include:

1. Chunk size: Smaller chunks (100-500 tokens) for precise retrieval.
2. Overlap: 10-20% overlap prevents losing context at boundaries.

These two factors are crucial in determining how documents should be split into chunks to fit within LLM (Large Language Model) conte

In [None]:
"""
Complete RAG (Retrieval-Augmented Generation) System
This demonstrates a full pipeline from documents to AI-generated answers
"""

from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_core.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
import os
from dotenv import load_dotenv

load_dotenv()

def get_llm():
    # We use Groq for logic/routing as it follows instructions well
    llm_type = os.getenv("LLM_TYPE", "groq")
    if llm_type == "groq":
        return ChatGroq(model="llama-3.1-8b-instant", temperature=0.3, max_tokens=500)
    else:
        return ChatOllama(model="llama3.1", temperature=0.3, max_tokens=500)

# ==============================================================================
# STEP 1: Prepare Documents and Create Knowledge Base
# ==============================================================================

# Sample knowledge base about AI concepts
knowledge_base = [
    """
    Vector Databases and Embeddings:
    Vector databases store information as numerical vectors (embeddings) that 
    capture semantic meaning. When you search, your query is converted to a 
    vector and compared to stored vectors using similarity metrics like cosine 
    similarity. This enables semantic search - finding relevant information 
    based on meaning rather than exact keyword matches.
    """,
    """
    Retrieval-Augmented Generation (RAG):
    RAG enhances Large Language Models by retrieving relevant information from 
    a knowledge base before generating responses. The process involves three steps:
    1. Retrieve: Search for relevant documents using vector similarity
    2. Augment: Add retrieved context to the prompt
    3. Generate: The LLM creates an answer grounded in the retrieved information
    This approach reduces hallucinations and provides up-to-date, specific knowledge.
    """,
    """
    Chunking Strategies:
    Documents are split into chunks to fit within LLM context windows and improve 
    retrieval precision. Key considerations include:
    - Chunk size: Smaller chunks (100-500 tokens) for precise retrieval
    - Overlap: 10-20% overlap prevents losing context at boundaries
    - Separators: Use natural boundaries (paragraphs, sentences)
    - Metadata: Add source info, page numbers, timestamps for tracking
    """,
    """
    Embedding Models:
    Embeddings are dense vector representations of text that capture semantic meaning.
    Popular models include:
    - OpenAI text-embedding-ada-002: High quality, 1536 dimensions
    - Sentence-BERT (all-MiniLM-L6-v2): Fast, efficient, 384 dimensions
    - Instructor embeddings: Task-specific embeddings
    The choice depends on accuracy needs, speed requirements, and cost constraints.
    """,
    """
    Agentic AI Systems:
    AI agents are autonomous systems that can:
    - Perceive their environment through sensors or data
    - Make decisions using reasoning and planning
    - Take actions using tools (APIs, databases, search engines)
    - Learn from feedback to improve performance
    Agents can use RAG to access knowledge dynamically during task execution.
    """
]

# Create Document objects
documents = [
    Document(
        page_content=text.strip(),
        metadata={"source": f"ai_guide_{i}.txt", "section": i}
    )
    for i, text in enumerate(knowledge_base)
]

print("=" * 80)
print("STEP 1: Document Preparation")
print("=" * 80)
print(f"Created {len(documents)} documents")

# ==============================================================================
# STEP 2: Split Documents into Chunks
# ==============================================================================

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,      # Smaller chunks for more precise retrieval
    chunk_overlap=50,
    length_function=len,
    separators=["\n\n", "\n", ". ", " "]
)

split_documents = text_splitter.split_documents(documents)

print("\n" + "=" * 80)
print("STEP 2: Document Chunking")
print("=" * 80)
print(f"Split into {len(split_documents)} chunks")
for i, doc in enumerate(split_documents[:3]):
    print(f"\nChunk {i+1}: {doc.page_content[:100]}...")

# ==============================================================================
# STEP 3: Create Embeddings and Vector Store
# ==============================================================================
"""
We use HuggingFace embeddings (free, runs locally)
For production, consider: OpenAI embeddings (paid but higher quality)
"""

print("\n" + "=" * 80)
print("STEP 3: Creating Vector Store")
print("=" * 80)
print("Loading embedding model (this may take a moment)...")

# Initialize embedding model
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'},  # Use 'cuda' for GPU
    encode_kwargs={'normalize_embeddings': True}  # Important for cosine similarity
)

# Create vector store from documents
vectorstore = Chroma.from_documents(
    documents=split_documents,
    embedding=embeddings,
    collection_name="rag_knowledge_base",
    persist_directory="./chroma_rag_db"  # Save to disk
)

print("Vector store created successfully!")
print(f"Total vectors stored: {vectorstore._collection.count()}")

# ==============================================================================
# STEP 4: Test Retrieval
# ==============================================================================

print("\n" + "=" * 80)
print("STEP 4: Testing Retrieval")
print("=" * 80)

# Create a retriever with specific search parameters
retriever = vectorstore.as_retriever(
    search_type="similarity",  # or "mmr" for diverse results
    search_kwargs={"k": 3}     # Return top 3 results
)

# Test query
test_query = "How does RAG work?"
retrieved_docs = retriever.invoke(test_query)

print(f"\nQuery: '{test_query}'")
print(f"Retrieved {len(retrieved_docs)} documents:\n")

for i, doc in enumerate(retrieved_docs):
    print(f"{i+1}. {doc.page_content[:150]}...")
    print(f"   Source: {doc.metadata.get('source', 'unknown')}\n")

# ==============================================================================
# STEP 5: Build RAG Chain (Without LLM for demonstration)
# ==============================================================================
"""
The complete RAG chain would look like this:
"""

print("=" * 80)
print("STEP 5: RAG Chain Architecture")
print("=" * 80)

# Define a custom prompt template
template = """
You are a helpful AI assistant. Use the following context to answer the question.
If you cannot answer based on the context, say so.

Context:
{context}

Question: {question}

Answer: Let me help you understand this based on the provided information.
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["context", "question"]
)

print("\nPrompt template created:")
print(prompt.template)

# Note: This is the structure for when you have an LLM API key

# Create RAG chain
qa_chain = RetrievalQA.from_chain_type(
    llm=get_llm(),
    chain_type="stuff",  # "stuff" puts all docs in context
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt}
)

# Use the chain
result = qa_chain({"query": "What is RAG?"})
print(f"Answer: {result['result']}")
print(f"Sources: {result['source_documents']}")


# ==============================================================================
# STEP 6: Manual RAG Simulation (Without LLM)
# ==============================================================================

print("\n" + "=" * 80)
print("STEP 6: Manual RAG Process Demonstration")
print("=" * 80)

def manual_rag(query: str, retriever, max_docs: int = 3):
    """
    Demonstrates the RAG process without calling an actual LLM
    Shows what context would be sent to the model
    """
    # Step 1: Retrieve relevant documents
    docs = retriever.get_relevant_documents(query)[:max_docs]
    
    # Step 2: Format context
    context = "\n\n".join([f"Document {i+1}:\n{doc.page_content}" 
                           for i, doc in enumerate(docs)])
    
    # Step 3: Create the full prompt
    full_prompt = f"""
Context Information:
{context}

Question: {query}

Instructions: Based on the context above, provide a comprehensive answer.
"""
    
    return {
        "query": query,
        "retrieved_docs": docs,
        "context": context,
        "full_prompt": full_prompt,
        "num_docs": len(docs)
    }

# Test the manual RAG
test_queries = [
    "What is the purpose of chunking in RAG?",
    "Explain what agents can do",
    "What embedding models are available?"
]

for query in test_queries:
    result = manual_rag(query, retriever)
    
    print(f"\n{'='*80}")
    print(f"Query: {result['query']}")
    print(f"{'='*80}")
    print(f"Retrieved {result['num_docs']} documents")
    print("\nThis prompt would be sent to the LLM:")
    print("-" * 80)
    print(result['full_prompt'][:500] + "...")
    print()

# ==============================================================================
# STEP 7: Advanced Retrieval Techniques
# ==============================================================================

print("\n" + "=" * 80)
print("STEP 7: Advanced Retrieval Options")
print("=" * 80)

# Option 1: MMR (Maximal Marginal Relevance) - diverse results
mmr_retriever = vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={
        "k": 3,              # Number of documents to return
        "fetch_k": 10,       # Number of candidates to consider
        "lambda_mult": 0.5   # Diversity (0=max diversity, 1=max relevance)
    }
)

# Option 2: Similarity threshold - only return docs above threshold
threshold_retriever = vectorstore.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={
        "score_threshold": 0.7,  # Only return docs with similarity > 0.7
        "k": 5
    }
)

print("Created advanced retrievers:")
print("1. MMR retriever (for diverse results)")
print("2. Threshold retriever (for high-quality matches only)")

# ==============================================================================
# Summary
# ==============================================================================

print("\n" + "=" * 80)
print("RAG SYSTEM SUMMARY")
print("=" * 80)
print(f"""
✓ Knowledge Base: {len(documents)} documents
✓ Chunks: {len(split_documents)} chunks
✓ Embedding Model: sentence-transformers/all-MiniLM-L6-v2
✓ Vector Store: ChromaDB (persistent)
✓ Retrieval Methods: Similarity, MMR, Threshold

To complete the RAG system:
1. Add OpenAI API key
2. Initialize ChatOpenAI
3. Create RetrievalQA chain
4. Query and get AI-generated answers with sources
""")

ModuleNotFoundError: No module named 'langchain.chains'