In [1]:
import os
import langchain_community
from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain_community.chat_models import ChatOpenAI
from langchain.prompts import FewShotPromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain_community.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain.embeddings import OpenAIEmbeddings
from sentence_transformers import SentenceTransformer
from langchain.evaluation.qa import QAEvalChain
from langchain.schema import OutputParserException
import json
import numpy as np
import torch

  from .autonotebook import tqdm as notebook_tqdm


# Title: Advanced RAG & LangChain Fundamentals

This notebook explores:
1. **Few-Shot Prompting**
2. **Re-Ranking with Cross-Encoders**
3. **Conversational Retrieval with Memory**
4. **RAG Evaluation**
5. **Output Parsing**



In [3]:
def pretty_print(title, content):
    """Helper function to pretty-print results in a more readable format."""
    print(f"=== {title} ===")
    print(content)
    print("================\n")

from dotenv import load_dotenv
load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")
llm = ChatOpenAI(temperature=0, openai_api_key=openai_api_key,model_name="gpt-4o")  # Low temperature for deterministic output

  llm = ChatOpenAI(temperature=0, openai_api_key=openai_api_key,model_name="gpt-4o")  # Low temperature for deterministic output


## Section 1: Few-Shot Prompting

Few-Shot Prompting helps demonstrate a task to the LLM using a small set of examples.  
Below, we create a function that:
1. Builds a prompt from a list of Q&A examples.
2. Asks **you** for a question, then appends it to the final prompt.
3. Sends the prompt to the LLM (OpenAI by default), returning the model’s response.


In [None]:
def few_shot_prompt_example():
    """
    This function demonstrates how to build a few-shot prompt.
    We'll supply multiple examples to 'teach' the model how to respond
    before asking a final question.
    """
    # Example QA pairs
    examples = [
        {
            "input": "What is the capital of France?",
            "output": "Paris."
        },
        {
            "input": "What is the capital of Spain?",
            "output": "Madrid."
        },
        {
            "input": "Whats the capital of Italy?",
            "output": "Rome."
        },
        {
            "input": "Whats denmarks capital city?",
            "output": "Copenhagen."
        }
    ]
    
    # Build a string that contains the few-shot examples
    example_prompt = ""
    for ex in examples:
        example_prompt += f"Q: {ex['input']}\nA: {ex['output']}\n\n"

    # Create a template with placeholders
    template = (
        "Use the examples below to answer the question.\n\n"
        "{examples}\n"
        "Q: {question}\nA:"
    )

    # Prompt the user to enter their own question
    user_question = input("Enter a question ('What is the capital of ___?'): ")
    
    filled_prompt = template.format(examples=example_prompt, question=user_question)
    
    response = llm.invoke(filled_prompt)
    
    return response.content

response_few_shot = few_shot_prompt_example()
pretty_print("Few-Shot Prompt Response", response_few_shot)



## Section 2: Re-Ranking with a Cross-Encoder
**Why Re-Rank?**  - Approximate nearest neighbor (ANN) searches may return decent but not always perfectly ranked results.

**Cross-Encoder** re-evaluates (query, document) pairs to produce a more accurate relevance score.
**Steps**:
1. Create a small set of dummy documents.
2. Store them in a FAISS vector store with embeddings.
3. Retrieve the top-k documents.
4. Re-rank those documents using a cross-encoder model.
5. Return the highest-scoring document.

In [None]:
from sentence_transformers import CrossEncoder

def advanced_rag_rerank_example():
    """
    1. Create a diverse set of dummy documents
    2. Store them in a FAISS vector store using OpenAI embeddings
    3. Retrieve the top 5 most relevant results
    4. Re-rank them using a cross-encoder model
    5. Return the best-ranked document with scores
    """
    
    # Step 1: Create a diverse set of dummy documents
    texts = [
        "Paris is the capital of France, known for its iconic Eiffel Tower, rich art, and the Louvre Museum.",
        "Berlin, the capital of Germany, is famous for its history, culture, and landmarks such as the Brandenburg Gate.",
        "Madrid, the capital of Spain, is a vibrant city known for its Royal Palace and Plaza Mayor.",
        "The Eiffel Tower, located in Paris, is one of the most visited monuments in the world.",
        "New York City is known for the Statue of Liberty, Times Square, and as a global financial hub.",
        "Tokyo is the capital of Japan, a metropolis blending modern skyscrapers with historic temples.",
        "The Colosseum, an ancient Roman amphitheater, is located in Rome, Italy.",
        "London, the capital of the United Kingdom, is home to Big Ben, Buckingham Palace, and the River Thames.",
        "The Seine River runs through Paris, offering scenic views of the city's famous landmarks.",
        "Barcelona, Spain, is renowned for its stunning architecture, including the Sagrada Familia and Park Güell.",
        "The Louvre Museum in Paris houses thousands of artworks, including the Mona Lisa.",
        "The Champs-Élysées in Paris is a famous avenue lined with luxury shops, theaters, and cafés.",
        "The Golden Gate Bridge in San Francisco is an iconic suspension bridge known for its striking red color.",
        "The Pyramids of Giza in Egypt are among the world's most famous ancient wonders.",
        "Sydney, Australia, is known for its Opera House, Harbour Bridge, and beautiful beaches.",
        "Mount Everest, the tallest mountain in the world, is located on the border between Nepal and Tibet."
    ]
    
    docs = [Document(page_content=t) for t in texts]

    # Step 2: Initialize FAISS vector store with OpenAI embeddings
    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
    vectorstore = FAISS.from_documents(docs, embeddings)

    # Step 3: User inputs a query
    user_query = input("\nEnter a query (e.g., Ask about opera houses or fine arts): ")
    retrieved_docs = vectorstore.similarity_search(user_query, k=5)

    if not retrieved_docs:
        print("\nNo relevant documents found.")
        return None, []

    # Step 4: Use CrossEncoder instead of SentenceTransformer
    model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

    # Prepare input pairs
    inputs = [(user_query, d.page_content) for d in retrieved_docs]

    # Get scores using `model.predict()`
    scores = model.predict(inputs)  # Now correctly applies cross-encoder scoring

    # Step 5: Sort and return results
    ranked_docs = sorted(zip(retrieved_docs, scores), key=lambda x: x[1], reverse=True)
    best_doc = ranked_docs[0][0]  # Best document based on ranking

    # Step 6: Display results
    print("\n=== Re-Ranked Results ===")
    for rank, (doc, score) in enumerate(ranked_docs, 1):
        print(f"{rank}. Score: {score:.4f} | Content: {doc.page_content}")
    print("=========================")

    return best_doc, ranked_docs

# Run the function
best_doc, ranked_docs = advanced_rag_rerank_example()

# Display the best result separately
if best_doc:
    print("\n=== Best Retrieved Document ===")
    print(best_doc.page_content)
    print("==============================")

## Section 3: Conversational Retrieval with Memory

LangChain’s **Memory** component preserves conversation history.  
Below, we use a **ConversationBufferMemory** that keeps all prior messages in a buffer.

We'll simulate:
1. A user greeting.
2. AI greeting response.
3. Another user question.
4. AI response.

In a real pipeline, this memory would be fed into a retrieval step so the system can reference the conversation context.



In [None]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
memory = ConversationBufferMemory(memory_key="history", return_messages=True)

# Set up a conversational agent with memory
conversation = ConversationChain(
    llm=llm,
    memory=memory,
    verbose=True
)

def conversational_retrieval_example():
    """
    Demonstrates how an AI conversation is dynamically managed using LLM and memory.
    - AI remembers niche facts that wouldn't be general knowledge.
    - User input is processed dynamically.
    """
    
    print("\nAI: Hello! I can remember details from our conversation. Ask me about niche topics.")

    while True:
        user_input = input("\nYou: ")

        if user_input.lower() in ["exit", "quit", "stop"]:
            print("\nExiting conversation...")
            break

        # Generate response
        response = conversation.predict(input=user_input)

        print(f"\nAI: {response}")

    return memory

# Run conversation
conv_memory = conversational_retrieval_example()

# Display conversation history
print("\n=== Conversation History ===")
for msg in conv_memory.chat_memory.messages:
    role = "User" if msg.type == "user" else "AI"
    print(f"{role}: {msg.content}")
print("============================")

## Section 4: RAG Evaluation

To measure how well a RAG system performs, we can:
- Compare retrieved documents to a “gold standard” set.
- Compare final answers to reference answers.

**QAEvalChain** (from LangChain) is a convenient helper to assess the correctness of QA outputs.

In [None]:

def rag_evaluation_example():
    """
    Simulates an evaluation:
    - Question: "Where is the Eiffel Tower located?"
    - Reference Answer: "The Eiffel Tower is located in Paris."
    - Predicted Answer: "It's located in central Paris."
    
    Uses QAEvalChain from LangChain to evaluate the prediction.
    """
    question = "Where is the Eiffel Tower located?"
    reference_answer = "The Eiffel Tower is located in Paris."
    predicted_answer = "It's located in central Paris."
    
    # Instantiate QAEvalChain
    eval_chain = QAEvalChain.from_llm(llm)
    
    # Prepare example and prediction
    example = {"query": question, "answer": reference_answer}
    prediction = {"query": question, "result": predicted_answer}
    
    # Evaluate
    graded = eval_chain.evaluate(
        examples=[example],  # List of reference examples
        predictions=[prediction]  # List of model predictions
    )
    
    return graded

In [None]:
# Call the function and print the evaluation result
#Try adjusting the values above to see if it correctly classifies
eval_result = rag_evaluation_example()
print(eval_result)


## Section 5: Output ParsingOften, we need structured output (JSON, CSV, etc.).  

**Output Parsing** ensures the LLM's output can be safely converted into a machine-readable format.  Below, we ask the LLM to respond in JSON. If it fails, we raise an exception.

In [4]:
def output_parsing_example():
    """
    1. User enters a question.
    2. LLM generates a JSON-formatted response.
    3. Parse the response and return a structured dictionary.
    """
    
    # Step 1: Get user input
    user_query = input("\nEnter a question for JSON-formatted response (e.g., 'What is the capital of Germany?'): ")

    # Step 2: Generate response
    prompt = f"Answer the following question in **valid JSON format only** (without markdown formatting): {{'question': '{user_query}', 'answer': 'your response here'}}"
    response = llm.invoke(prompt)  # FIX: Use `invoke()` instead of `predict()`

    # Step 3: Extract text from AIMessage object
    response_text = response.content if hasattr(response, "content") else str(response)

    # Step 4: Remove JSON markdown formatting if present
    if response_text.startswith("```json"):
        response_text = response_text.strip("```json").strip("```")  # Remove formatting

    # Step 5: Parse JSON
    try:
        parsed_output = json.loads(response_text)
    except json.JSONDecodeError:
        raise OutputParserException(f"Failed to parse LLM output as JSON. Raw output: {response_text}")

    return parsed_output

# Run the function
parsed_output = output_parsing_example()

# Print formatted output
print("\n=== Parsed JSON Output ===")
print(json.dumps(parsed_output, indent=4))
print("==========================")


=== Parsed JSON Output ===
{
    "question": "What are options?",
    "answer": "Options are financial derivatives that give the buyer the right, but not the obligation, to buy or sell an underlying asset at a specified price on or before a certain date. They are used for hedging, speculation, and to leverage positions in the financial markets."
}


# Conclusion & Next Steps

You have now seen:

1. **Few-Shot Prompting**  
2. **Re-Ranking** with a cross-encoder  
3. **Conversational Memory**  
4. **RAG Evaluation**  
5. **Output Parsing**


**Thank you**
