In [1]:
## Make sure you load the API keys for cloud providers!

In [3]:
# setting the environment variables, the keys
from dotenv import load_dotenv
load_dotenv()

True

### Query Expansion

In [4]:
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

expansion_template = """Given the user question: {question}
Generate three alternative versions that express the same information need but with different wording:
1."""

expansion_prompt = PromptTemplate(
    input_variables=["question"],
    template=expansion_template
)

llm = ChatOpenAI(temperature=0.7)

expansion_chain = expansion_prompt | llm | StrOutputParser()

# Generate expanded queries
original_query = "What are the benefits of using renewable energy sources?"
expanded_queries = expansion_chain.invoke(original_query)

print("Original Query:", original_query)
print("Expanded Queries:", expanded_queries)

Original Query: What are the benefits of using renewable energy sources?
Expanded Queries: What advantages come from utilizing renewable energy sources?
2. How does using renewable energy sources benefit us?
3. What are the perks of relying on renewable energy sources for power?


### Hypothetical Document Embeddings (HyDE)

In [5]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import JSONLoader

loader = JSONLoader(
    file_path="knowledge_base.json",
    jq_schema=".[].content",  # This extracts the content field from each array item
    text_content=True
)
documents = loader.load()
embedder = OpenAIEmbeddings()
embeddings = embedder.embed_documents([doc.page_content for doc in documents])
vector_db = FAISS.from_documents(documents, embedder)

In [6]:
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# Create prompt for generating hypothetical document
hyde_template = """Based on the question: {question}
Write a passage that could contain the answer to this question:"""

hyde_prompt = PromptTemplate(
    input_variables=["question"],
    template=hyde_template
)
llm = ChatOpenAI(temperature=0.2)
hyde_chain = hyde_prompt | llm | StrOutputParser()

# Generate hypothetical document
query = "What dietary changes can reduce carbon footprint?"
hypothetical_doc = hyde_chain.invoke(query)

# Use the hypothetical document for retrieval
embeddings = OpenAIEmbeddings()
embedded_query = embeddings.embed_query(hypothetical_doc)
results = vector_db.similarity_search_by_vector(embedded_query, k=3)

In [7]:
print(results)

[Document(id='9b255915-fce3-4f56-ac1f-72972779109f', metadata={'source': '/Users/surajpandey/Documents/Python/agentic-ai-wrk/RAGS/knowledge_base.json', 'seq_num': 5}, page_content='Vector databases store high-dimensional vectors and efficiently perform similarity searches. Popular vector databases include Pinecone, Milvus, and FAISS. They use algorithms like HNSW (Hierarchical Navigable Small World) or IVF (Inverted File Index) to enable fast approximate nearest neighbor search. These databases are essential for scaling embedding-based retrieval systems to large document collections.'), Document(id='bac674b4-350d-4b87-bab8-6f12be93a0be', metadata={'source': '/Users/surajpandey/Documents/Python/agentic-ai-wrk/RAGS/knowledge_base.json', 'seq_num': 1}, page_content="Transformer models were introduced in the paper 'Attention Is All You Need' by Vaswani et al. in 2017. The architecture relies on self-attention mechanisms rather than recurrent or convolutional neural networks. This design al

### Contextual Compression

In [9]:
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.retrievers import ContextualCompressionRetriever
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature=0)
compressor = LLMChainExtractor.from_llm(llm)

# create basic retriever from the vector store
base_retriever = vector_db.as_retriever(search_kwargs={"k": 3})

compression_retriever = ContextualCompressionRetriever(
    base_retriever=base_retriever,
    base_compressor=compressor
)
compressed_doc = compression_retriever.invoke("How do transformers work?")

In [10]:
compressed_doc

[Document(metadata={'source': '/Users/surajpandey/Documents/Python/agentic-ai-wrk/RAGS/knowledge_base.json', 'seq_num': 1}, page_content="Transformer models were introduced in the paper 'Attention Is All You Need' by Vaswani et al. in 2017. The architecture relies on self-attention mechanisms rather than recurrent or convolutional neural networks. This design allows for more parallelization during training and better handling of long-range dependencies in text."),
 Document(metadata={'source': '/Users/surajpandey/Documents/Python/agentic-ai-wrk/RAGS/knowledge_base.json', 'seq_num': 2}, page_content='BERT (Bidirectional Encoder Representations from Transformers) was developed by Google AI Language team in 2018.')]

### Maximum Marginal Relevance (MMR)

In [11]:
from langchain_community.vectorstores import FAISS

vector_store = FAISS.from_documents(documents, embedder)

mmr_results = vector_db.max_marginal_relevance_search(
    query="What are transformer models?",
    k=5,
    fetch_k=20,
    lambda_mult=0.5
)

print(mmr_results)

[Document(id='bac674b4-350d-4b87-bab8-6f12be93a0be', metadata={'source': '/Users/surajpandey/Documents/Python/agentic-ai-wrk/RAGS/knowledge_base.json', 'seq_num': 1}, page_content="Transformer models were introduced in the paper 'Attention Is All You Need' by Vaswani et al. in 2017. The architecture relies on self-attention mechanisms rather than recurrent or convolutional neural networks. This design allows for more parallelization during training and better handling of long-range dependencies in text."), Document(id='fc492055-f866-4ebb-9f23-0030caad05a8', metadata={'source': '/Users/surajpandey/Documents/Python/agentic-ai-wrk/RAGS/knowledge_base.json', 'seq_num': 3}, page_content='GPT (Generative Pre-trained Transformer) models are autoregressive language models that use transformer-based neural networks. Unlike BERT, which is bidirectional, GPT models are unidirectional and predict the next token based on previous tokens. The original GPT was introduced by OpenAI in 2018, followed b

### Source Attribution

In [12]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document

# Example documents
documents = [
    Document(
        page_content="The transformer architecture was introduced in the paper 'Attention is All You Need' by Vaswani et al. in 2017.",
        metadata={"source": "Neural Network Review 2021", "page": 42}
    ),
    Document(
        page_content="BERT uses bidirectional training of the Transformer, masked language modeling, and next sentence prediction tasks.",
        metadata={"source": "Introduction to NLP", "page": 137}
    ),
    Document(
        page_content="GPT models are autoregressive transformers that predict the next token based on previous tokens.",
        metadata={"source": "Large Language Models Survey", "page": 89}
    )
]

In [13]:
# Create a vector store and retriever
embeddings = OpenAIEmbeddings()
vector_store = FAISS.from_documents(documents, embeddings)
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

In [14]:
# Source attribution prompt template

attribution_prompt = ChatPromptTemplate.from_template(
    """
    You are a precise AI assistant that provides well-sourced information.
    Please ensure to cite your sources accurately.
    Answer the following question based ONLY on the provided sources. for each fact or claim in your answer,
    include citation using [1], [2], etc. that refers to the source. Include a numbered reference list at the end.

    Question: {question}

    Sources:
    {sources}

    Your Answer:
    """
)

In [15]:
# Create a source-formatted string from documents
def format_sources_with_citations(docs):
    formatted_sources = []
    for i, doc in enumerate(docs, 1):
        source_info = f"[{i}] {doc.metadata.get('source', 'Unknown source')}"
        if doc.metadata.get('page'):
            source_info += f", page {doc.metadata['page']}"
        formatted_sources.append(f"{source_info}\n{doc.page_content}")
    return "\n\n".join(formatted_sources)

# Build the RAG chain with source attribution
def generate_attributed_response(question):
    # Retrieve relevant documents
    retrieved_docs = retriever.invoke(question)
    
    # Format sources with citation numbers
    sources_formatted = format_sources_with_citations(retrieved_docs)
    
    # Create the attribution chain using LCEL
    attribution_chain = (
        attribution_prompt
        | ChatOpenAI(temperature=0)
        | StrOutputParser()
    )
    
    # Generate the response with citations
    response = attribution_chain.invoke({
        "question": question,
        "sources": sources_formatted
    })
    
    return response

In [16]:
# Example usage
question = "How do transformer models work and what are some examples?"
attributed_answer = generate_attributed_response(question)
print(attributed_answer)

Transformer models work by utilizing self-attention mechanisms to weigh the importance of different input tokens when making predictions. This allows them to capture long-range dependencies in the data more effectively compared to traditional recurrent neural networks [1].

One example of a transformer model is BERT (Bidirectional Encoder Representations from Transformers), which incorporates bidirectional training of the Transformer, masked language modeling, and next sentence prediction tasks to achieve state-of-the-art performance in various natural language processing tasks [2].

Another example is the GPT (Generative Pre-trained Transformer) series of models, which are autoregressive transformers that predict the next token in a sequence based on the preceding tokens. This approach allows GPT models to generate coherent and contextually relevant text [3].

Reference List:
[1] Neural Network Review 2021, page 42
[2] Introduction to NLP, page 137
[3] Large Language Models Survey, pa

### Self-consistency Checking

In [17]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from typing import List, Dict
from langchain_core.documents import Document

def verify_response_accuracy(
    retrieved_docs: List[Document],
    generated_answer: str,
    llm: ChatOpenAI = None
) -> Dict:
    """
    Verify if a generated answer is fully supported by the retrieved documents.
    Args:
        retrieved_docs: List of documents used to generate the answer
        generated_answer: The answer produced by the RAG system
        llm: Language model to use for verification
    Returns:
        Dictionary containing verification results and any identified issues
    """
    if llm is None:
        llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
        
    # Create context from retrieved documents
    context = "\n\n".join([doc.page_content for doc in retrieved_docs])
    
    # Define verification prompt - fixed to avoid JSON formatting issues in the template
    verification_prompt = ChatPromptTemplate.from_template("""
    As a fact-checking assistant, verify whether the following answer is fully supported
    by the provided context. Identify any statements that are not supported or contradict the context.
    
    Context:
    {context}
    
    Answer to verify:
    {answer}
    
    Perform a detailed analysis with the following structure:
    1. List any factual claims in the answer
    2. For each claim, indicate whether it is:
       - Fully supported (provide the supporting text from context)
       - Partially supported (explain what parts lack support)
       - Contradicted (identify the contradiction)
       - Not mentioned in context
    3. Overall assessment: Is the answer fully grounded in the context?
    
    Return your analysis in JSON format with the following structure:
    {{
      "claims": [
        {{
          "claim": "The factual claim",
          "status": "fully_supported|partially_supported|contradicted|not_mentioned",
          "evidence": "Supporting or contradicting text from context",
          "explanation": "Your explanation"
        }}
      ],
      "fully_grounded": true|false,
      "issues_identified": ["List any specific issues"]
    }}
    """)
    
    # Create verification chain using LCEL
    verification_chain = (
        verification_prompt
        | llm
        | StrOutputParser()
    )
    
    # Run verification
    result = verification_chain.invoke({
        "context": context,
        "answer": generated_answer
    })
    
    return result

In [18]:
# Example usage
retrieved_docs = [
    Document(page_content="The transformer architecture was introduced in the paper 'Attention Is All You Need' by Vaswani et al. in 2017. It relies on self-attention mechanisms instead of recurrent or convolutional neural networks."),
    Document(page_content="BERT is a transformer-based model developed by Google that uses masked language modeling and next sentence prediction as pre-training objectives.")
]

generated_answer = "The transformer architecture was introduced by OpenAI in 2018 and uses recurrent neural networks. BERT is a transformer model developed by Google."

verification_result = verify_response_accuracy(retrieved_docs, generated_answer)
print(verification_result)

{
    "claims": [
        {
            "claim": "The transformer architecture was introduced by OpenAI in 2018",
            "status": "contradicted",
            "evidence": "The transformer architecture was introduced in the paper 'Attention Is All You Need' by Vaswani et al. in 2017.",
            "explanation": "The claim is contradicted by the fact that the transformer architecture was actually introduced in 2017 by Vaswani et al., not by OpenAI in 2018."
        },
        {
            "claim": "The transformer architecture uses recurrent neural networks",
            "status": "contradicted",
            "evidence": "It relies on self-attention mechanisms instead of recurrent or convolutional neural networks.",
            "explanation": "The claim is contradicted by the fact that the transformer architecture does not use recurrent neural networks but relies on self-attention mechanisms."
        },
        {
            "claim": "BERT is a transformer model developed by Googl

### Corrective RAG

In [19]:
from pydantic import BaseModel, Field
class DocumentRelevanceScore(BaseModel):
    """Binary relevance score for document evaluation."""
    is_relevant: bool = Field(description="Whether the document contains information relevant to the query")
    reasoning: str = Field(description="Explanation for the relevance decision")

def evaluate_document(document, query, llm):
    """Evaluate if a document is relevant to a query."""
    prompt = f""" You are an expert document evaluator. Your task is to
    determine if the following document contains information relevant to the
    given query.
    Query: {query}
    Document content:
    {document.page_content}
    Analyze whether this document contains information that helps answer the
    query.
    """
    evaluation = llm.with_structured_output(DocumentRelevanceScore).invoke(prompt)
    return evaluation