# Mounting Drive

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Creating Virtual Environment

In [3]:
!pip install -q virtualenv

In [4]:
!python -m virtualenv venv

created virtual environment CPython3.11.12.final.0-64 in 646ms
  creator CPython3Posix(dest=/content/venv, clear=False, no_vcs_ignore=False, global=False)
  seeder FromAppData(download=False, pip=bundle, setuptools=bundle, wheel=bundle, via=copy, app_data_dir=/root/.local/share/virtualenv)
    added seed packages: pip==25.0.1, setuptools==78.1.1, wheel==0.45.1
  activators BashActivator,CShellActivator,FishActivator,NushellActivator,PowerShellActivator,PythonActivator


# Virtual Environment Activation

In [5]:
!source venv/bin/activate

In [6]:
!which python

/usr/local/bin/python


# Write File requirements.txt

In [7]:
%%writefile requirements.txt
flask
flask-cors
werkzeug
faiss-cpu
python-dotenv
assemblyai
google-generativeai
pypdf2
python-docx
sentence-transformers
yt-dlp
langchain
langchain-community
langchain-google-genai
langchain-text-splitters
google-ai-generativelanguage
langchain_huggingface
pypdf
gradio


Overwriting requirements.txt


In [8]:
!pip install -q -r requirements.txt

In [None]:
!pip list

Package                               Version
------------------------------------- -------------------
absl-py                               1.4.0
accelerate                            1.6.0
aiofiles                              24.1.0
aiohappyeyeballs                      2.6.1
aiohttp                               3.11.15
aiosignal                             1.3.2
alabaster                             1.0.0
albucore                              0.0.24
albumentations                        2.0.6
ale-py                                0.11.0
altair                                5.5.0
annotated-types                       0.7.0
anyio                                 4.9.0
argon2-cffi                           23.1.0
argon2-cffi-bindings                  21.2.0
array_record                          0.7.2
arviz                                 0.21.0
assemblyai                            0.40.2
astropy                               7.0.1
astropy-iers-data                     0.2025.4.28.0

In [9]:
try:
    import flask
    from langchain_google_genai import ChatGoogleGenerativeAI
    import faiss
    print("✅ All packages installed successfully!")
except ImportError as e:
    print(f"❌ Error: {e}")
    print("Trying to fix...")
    !pip install -q -r requirements.txt

✅ All packages installed successfully!


# Import Google AI & Embedding model

In [10]:
from google.colab import userdata
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings

# Instantiate LLM & Embeddings

In [11]:
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=GOOGLE_API_KEY)
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)

# Import PDF Loader & Recursive Character Text Splitter

In [12]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [13]:
# Define your PDF path (use raw string or forward slashes)
pdf_path = "/content/drive/MyDrive/RAG-Bot/React-for-developers.pdf"

In [14]:
# Load the PDF
loader = PyPDFLoader(pdf_path)
documents = loader.load()

In [15]:
print(f"Loaded {len(documents)} pages from the PDF")

Loaded 488 pages from the PDF


In [16]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,  # Adjust based on your needs
    chunk_overlap=150,  # Helps maintain context
    length_function=len,
    separators=["\n\n", "\n", "(?<=\.)", " ", ""],
)
docs = text_splitter.split_documents(documents)

# Add chunk number to metadata
for i, doc in enumerate(docs):
    doc.metadata["chunk"] = i

print(f"Split into {len(docs)} chunks")

Split into 946 chunks


# Saving documents to FAISS Vectore Store

In [17]:
from langchain.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Initialize embeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)

# Create FAISS index
db = FAISS.from_documents(docs, embeddings)

# Save to Google Drive
save_path = "/content/drive/MyDrive/RAG-Bot/faiss_index"
db.save_local(save_path)

print(f"Vector store saved to {save_path}")

Vector store saved to /content/drive/MyDrive/RAG-Bot/faiss_index


# Create Retriever with MMR (Maximum Marginal Relevance) Algorithem

In [18]:
# Create retriever
base_retriever = db.as_retriever(search_type="mmr", search_kwargs={"k": 10, "lambda_mult": 0.5})  # Retrieve top 10 most similar chunks

# Import Contextual Compression Retriever


*   To reduce the size and noise of retrieved documents
*   To increase the reponse time with relevant information



In [19]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

# Use Gemini to compress retrieved docs
compressor = LLMChainExtractor.from_llm(llm)

# Wrap the base retriever with compression
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=base_retriever
)

# Import RetrievalQA with PromptTemplate

In [20]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# Initial question prompt
question_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a senior React developer helping junior developers understand advanced concepts from the book "Learning React Modern Patterns" by Eve Porcello & Alex Banks.

Use the following context to answer the question. Please:
- Respond in the tone of a React developer
- Use bullet points for clarity
- Keep explanations concise but insightful
- Cite the page number and source if available
- If you don’t know the answer, say so instead of making it up

Context:
{context}

Question:
{question}

Answer:
"""
)

# Refinement prompt
refine_prompt = PromptTemplate(
    input_variables=["context", "existing_answer", "question"],
    template="""
You are a senior React developer. Here's an existing answer to the question, but you have more context now.

Please refine the original answer if necessary, keeping it:
- Technical but clear
- In bullet points
- Cited (if possible)

Original Answer:
{existing_answer}

Additional Context:
{context}

Question:
{question}

Refined Answer:
"""
)

# Final QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="refine",
    retriever=compression_retriever,
    chain_type_kwargs={
        "question_prompt": question_prompt,
        "refine_prompt": refine_prompt,
        "document_variable_name": "context"
    },
    return_source_documents=True,
    verbose=True,
    output_key="answer"
)


# Matlab Plotting Retreival Relevancy & Distraction

### All Function Definitions

In [21]:
import numpy as np
import umap
import plotly.express as px
import pandas as pd
from langchain.vectorstores import FAISS
from scipy.spatial.distance import cdist
import plotly.graph_objects as go
import os

def generate_3d_plot(query):
    # Get the query embedding
    query_embedding = embeddings.embed_documents([query])[0]

    # Get all vectors from FAISS
    all_vectors = [db.index.reconstruct(i) for i in range(db.index.ntotal)]
    all_vectors = np.array(all_vectors)

    # Get metadata for hover labels
    metadatas = [doc.metadata for doc in db.docstore._dict.values()]
    labels = [
    f"S: {os.path.splitext(os.path.basename(m.get('source', 'N/A')))[0]}, P: {m.get('page', 'N/A')}, C: {m.get('chunk', 'N/A')}"
    for m in metadatas
    ]

    # UMAP to 3D for all vectors
    reducer = umap.UMAP(n_components=3, random_state=42)
    embedding_3d = reducer.fit_transform(all_vectors)

    # DataFrame for all
    df = pd.DataFrame({
      "x": embedding_3d[:, 0],
      "y": embedding_3d[:, 1],
      "z": embedding_3d[:, 2],
      "label": labels
    })

    # Transform query vector to 3D
    query_3d = reducer.transform([query_embedding])
    distances = cdist(query_3d, embedding_3d).flatten()
    df['distance_to_query'] = distances
    df_sorted = df.sort_values(by="distance_to_query")

    # Get top K retrieved documents & re-embed
    top_docs = compression_retriever.get_relevant_documents(query)
    retrieved_texts = [doc.page_content for doc in top_docs]
    top_vectors = embeddings.embed_documents(retrieved_texts)
    top_vectors_3d = reducer.transform(top_vectors)

    # Plot base + retrieved + query
    fig = go.Figure()
    # Add all embeddings with smaller blue markers
    fig.add_trace(go.Scatter3d(
      x=df_sorted['x'],
      y=df_sorted['y'],
      z=df_sorted['z'],
      mode='markers',
      marker=dict(size=4, color='blue', opacity=0.3),
      text=df_sorted['label'],
      name='All Embeddings'
    ))

    # Add retrieved embeddings
    fig.add_scatter3d(
      x=top_vectors_3d[:, 0],
      y=top_vectors_3d[:, 1],
      z=top_vectors_3d[:, 2],
      mode='markers',
      marker=dict(size=4, color='green', symbol='circle'),
      name='Retrieved Docs'
    )

    # Add query point
    fig.add_scatter3d(
      x=query_3d[:, 0],
      y=query_3d[:, 1],
      z=query_3d[:, 2],
      mode='markers',
      marker=dict(size=4, color='red', symbol='circle'),
      name='Query Point'
    )

    # Optional: Add lines showing distances (dotted)
    for i in range(len(top_vectors_3d)):
      fig.add_trace(
          px.line_3d(
              x=[query_3d[0][0], top_vectors_3d[i][0]],
              y=[query_3d[0][1], top_vectors_3d[i][1]],
              z=[query_3d[0][2], top_vectors_3d[i][2]],
          ).update_traces(line=dict(dash='dash', color='gray', width=5), showlegend=False).data[0]
      )

    # Display
    fig.show()

    # Save or display
    fig.write_html("/content/test_3d_plot.html")


In [22]:
from sentence_transformers import CrossEncoder
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

def rerank_documents(original_query, retrieved_documents):
    """
    Rerank retrieved Document objects based on relevance using a cross-encoder model.

    Args:
        original_query (str): The original user query.
        retrieved_documents (list of Document): Retrieved documents.

    Returns:
        list: List of suggested queries including the original query.
    """
    # Flatten and deduplicate based on page_content
    seen = set()
    unique_docs = []
    for doc in retrieved_documents:
        if doc.page_content not in seen:
            seen.add(doc.page_content)
            unique_docs.append(doc)

    # Form query-document pairs for reranking
    pairs = [[original_query, doc.page_content] for doc in unique_docs]

    # Rerank using cross-encoder
    cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
    scores = cross_encoder.predict(pairs)
    reranked = sorted(zip(unique_docs, scores), key=lambda x: x[1], reverse=True)

    # Format references for prompt
    references = []
    for idx, (doc, _) in enumerate(reranked, start=1):
        meta = doc.metadata
        ref_id = f"[{idx}]"
        source_file = meta.get('source', 'document') or "Unknown"
        page_number = meta.get("page", "N/A")
        reference_line = f"{ref_id} -> Document: {source_file}, Page: {page_number}, Chunk: {meta.get('chunk', idx)}"
        references.append(reference_line)

    references_text = "\n".join(references)

    #print("New Ordering:")
    #for o in np.argsort(scores)[::-1]:
    #print(o+1)

    # Prompt for related questions
    prompt = f"""
    Given the following question:
    {original_query}

    Suggest up to five additional related questions to help find relevant information.
    Only suggest short, complete questions. Each question should focus on a different aspect of the topic.
    Return one question per line. Do not number them.

    Here are the references you should use:
    {references_text}
    """

    response = llm.invoke(prompt)
    reranked_queries = [original_query] + [response.content]

    return reranked_queries


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [23]:
def generate_3d_plot_with_queries(original_query, augmented_queries, embeddings, db):
    from scipy.spatial.distance import cdist
    import umap
    import pandas as pd
    import plotly.graph_objects as go
    import os
    from textwrap import fill as word_wrap

    # Get embeddings
    original_query_embedding = embeddings.embed_documents([original_query])[0]
    augmented_query_embeddings = embeddings.embed_documents(augmented_queries)
    all_vectors = [db.index.reconstruct(i) for i in range(db.index.ntotal)]
    all_vectors = np.array(all_vectors)

    # Metadata
    metadatas = [doc.metadata for doc in db.docstore._dict.values()]
    labels = [
        f"S: {os.path.splitext(os.path.basename(m.get('source', 'N/A')))[0]}, P: {m.get('page', 'N/A')}, C: {m.get('chunk', 'N/A')}"
        for m in metadatas
    ]

    # UMAP to 3D
    reducer = umap.UMAP(n_components=3, random_state=42)
    embedding_3d = reducer.fit_transform(all_vectors)

    # DataFrame for all points
    df = pd.DataFrame({
        "x": embedding_3d[:, 0],
        "y": embedding_3d[:, 1],
        "z": embedding_3d[:, 2],
        "label": labels
    })

    # Distance sorting
    original_query_3d = reducer.transform([original_query_embedding])
    distances_to_query = cdist(original_query_3d, embedding_3d).flatten()
    df['distance_to_query'] = distances_to_query
    df_sorted = df.sort_values(by="distance_to_query")

    # Transform augmented queries
    augmented_queries_3d = reducer.transform(augmented_query_embeddings)

    # Retrieved documents (you must pass or define `unique_documents`)
    retrieved_texts = list(unique_documents)
    retrieved_embeddings = embeddings.embed_documents(retrieved_texts)
    retrieved_3d = reducer.transform(retrieved_embeddings)

    # Start 3D Plot
    fig = go.Figure()

    # All embeddings
    fig.add_trace(go.Scatter3d(
        x=df_sorted["x"], y=df_sorted["y"], z=df_sorted["z"],
        mode='markers',
        marker=dict(size=4, color='blue', opacity=0.3),
        text=df_sorted["label"],
        name="All Embeddings"
    ))

    # Original query
    fig.add_trace(go.Scatter3d(
        x=original_query_3d[:, 0], y=original_query_3d[:, 1], z=original_query_3d[:, 2],
        mode='markers',
        marker=dict(size=8, color='red', symbol='circle'),
        name="Original Query"
    ))

    # Augmented queries
    fig.add_trace(go.Scatter3d(
        x=augmented_queries_3d[:, 0], y=augmented_queries_3d[:, 1], z=augmented_queries_3d[:, 2],
        mode='markers',
        marker=dict(size=8, color='green', symbol='cross'),
        name="Augmented Queries"
    ))

    # Retrieved documents
    fig.add_trace(go.Scatter3d(
        x=retrieved_3d[:, 0], y=retrieved_3d[:, 1], z=retrieved_3d[:, 2],
        mode='markers',
        marker=dict(size=8, color='purple', symbol='diamond'),
        text=[word_wrap(doc, width=80) for doc in retrieved_texts],
        name="Retrieved Docs"
    ))

    # Optional: lines from original to augmented
    for i in range(len(augmented_queries_3d)):
        fig.add_trace(go.Scatter3d(
            x=[original_query_3d[0][0], augmented_queries_3d[i][0]],
            y=[original_query_3d[0][1], augmented_queries_3d[i][1]],
            z=[original_query_3d[0][2], augmented_queries_3d[i][2]],
            mode="lines",
            line=dict(color="gray", width=2, dash="dash"),
            showlegend=False
        ))

    # Layout
    fig.update_layout(
        title="3D Embedding Visualization",
        scene=dict(
            xaxis_title="UMAP X",
            yaxis_title="UMAP Y",
            zaxis_title="UMAP Z"
        ),
        height=800
    )

    # Display
    fig.show()

    # Save to file
    fig.write_html("/content/augmented_queries_and_retrieved_docs_3d_plot.html")


In [24]:
from textwrap import fill as word_wrap
from typing import List, Set

def retrieve_and_print_documents(
    db,
    original_query: str,
    expanded_queries: List[str],
    k: int = 5,
    fetch_k: int = 10,
) -> tuple[list[list], set[str]]:
    """
    Perform retrieval and print results from original and expanded queries.

    Args:
        db: The vector database with a similarity search method.
        original_query (str): The main query.
        expanded_queries (List[str]): List of additional queries for expansion.
        k (int): Number of results to retrieve.
        fetch_k (int): Number of candidates to fetch internally.

    Returns:
        Tuple: (retrieved_documents, unique_documents)
    """
    retrieved_documents = []
    unique_documents: Set[str] = set()

    # Combine and deduplicate queries
    queries = list(set([original_query] + expanded_queries))

    for query in queries:
        docs = db.max_marginal_relevance_search(query, k=k, fetch_k=fetch_k)
        retrieved_documents.append(docs)

        for doc in docs:
            unique_documents.add(doc.page_content)

    # Print results
    for i, documents in enumerate(retrieved_documents):
        print(f"Query: {queries[i]}")
        print("\nResults:")
        for doc in documents:
            print(word_wrap(doc.page_content, width=80))
            print()
        print('-' * 100)

    return retrieved_documents, unique_documents


In [25]:
from functools import partial

# Generic respond function
def respond(message, chat_history, use_query_augmentation=False, use_refine_chain=False,  plot_func=None, is_reranked_queries=False):
    original_query = message

    # Optionally expand query
    expanded_queries = augment_multiple_query(original_query) if use_query_augmentation else None

    # Retrieve documents
    retrieved_docs = retrieve_documents(db, original_query, expanded_queries)
    #print(retrieved_docs)

    # Call the function
    reranked_queries = rerank_documents(original_query, retrieved_docs)

    if not retrieved_docs:
        response = "❌ Sorry, I couldn’t find any relevant information."
    else:
        if use_refine_chain:
            # Use refine chain
            response = qa_chain.combine_documents_chain.run(
                input_documents=retrieved_docs,
                question=original_query
            )
        else:
            # Basic chain
            result = qa_chain({"query": message})
            response = result["answer"]

        # Extract sources
        sources = []
        for doc in retrieved_docs:
            src = f"Page {doc.metadata.get('page', 'N/A')} from {doc.metadata.get('source', 'document')}"
            if src not in sources:
                sources.append(src)
        if sources:
            response += "\n\n📚 Sources:\n" + "\n".join(f"• {s}" for s in sources)

    # Optional visualization
    # Use the provided plotting function, or default to generate_3d_plot
    if plot_func is None:
        generate_3d_plot(original_query)
    elif expanded_queries != [] and not is_reranked_queries:
      plot_func(original_query, expanded_queries, embeddings, db)
    else:
      # Call the function
      plot_func(original_query, reranked_queries, embeddings, db)

    chat_history.append((message, response))
    return "", chat_history


In [26]:
def basic_respond(message, chat_history):
    original_query = message

    # Use the basic QA chain
    result = qa_chain({"query": message})
    response = result["answer"]
    #print(result);

    # Include sources with page numbers if available
    if 'source_documents' in result:
        sources = []
        for doc in result['source_documents']:
            source = f"Page {doc.metadata.get('page', 'N/A')} from {doc.metadata.get('source', 'document')}"
            if source not in sources:
                sources.append(source)

        # Format sources as a bullet-pointed list
        sources_list = "\n".join([f"• {source}" for source in sources])
        response += f"\n\n📚 Sources:\n{sources_list}"

    # Always call the default plot for basic
    generate_3d_plot(original_query)

    chat_history.append((message, response))
    return "", chat_history


In [27]:
def retrieve_documents(db, original_query, expanded_queries=None):
    from textwrap import fill as word_wrap

    retrieved_documents = []
    unique_documents = {}

    if expanded_queries is None:
        expanded_queries = []
    queries = list(set([original_query] + expanded_queries))

    for query in queries:
        docs = db.max_marginal_relevance_search(query, k=5, fetch_k=10)
        for doc in docs:
            unique_documents[doc.page_content] = doc  # deduplicate by content

    return list(unique_documents.values())


# Import Gradio for UI - MMR Technique

In [28]:
import gradio as gr
from functools import partial

# Preconfigured handlers (choose one below)
#basic_respond = partial(respond, use_query_augmentation=False, use_refine_chain=False, plot_func=None, is_reranked_queries=False)

# Custom CSS for better appearance
css = """
.title {
    text-align: center;
    font-weight: bold;
    font-size: 24px;
    margin-bottom: 20px;
}
.description {
    text-align: center;
    margin-bottom: 20px;
}
"""

with gr.Blocks(css=css) as demo:
    # Title and Description
    gr.Markdown("""
    <div class="title">React Patterns RAG Assistant</div>
    <div class="description">
    Ask questions about "Learning React Modern Patterns" by Eve Porcello & Alex Banks
    </div>
    """)

    # Chat Interface
    chatbot = gr.Chatbot(
        height=500,
        bubble_full_width=False,
        show_copy_button=True
    )

    # Input and Controls
    with gr.Row():
        msg = gr.Textbox(
            label="Your question",
            placeholder="Ask about React patterns...",
            scale=4
        )
        clear = gr.Button("🧹 Clear Memory", scale=1)

    # 3D Plot Image
    plot_image = gr.Image("/content/query_3d_plot.png", label="3D Plot of Query Relevancy")

    # Interaction Handlers — choose ONE of the following:
    #msg.submit(refine_respond, [msg, chatbot], [msg, chatbot])   # <- Block 3 behavior
    # msg.submit(augmented_respond, [msg, chatbot], [msg, chatbot])  # <- Block 2 behavior
    msg.submit(basic_respond, [msg, chatbot], [msg, chatbot])    # <- Block 1 behavior

    clear.click(lambda: None, None, chatbot, queue=False)

# Launch
demo.launch(
    share=True,
    debug=True
)


  chatbot = gr.Chatbot(
  chatbot = gr.Chatbot(


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://5b388045dd2fd7554a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


  result = qa_chain({"query": message})




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


  warn(
  top_docs = compression_retriever.get_relevant_documents(query)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://5b388045dd2fd7554a.gradio.live




# Expansion with Multiple Queries

In [30]:
# Initial question prompt
question_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a senior React developer helping junior developers understand advanced concepts from the book "Learning React Modern Patterns" by Eve Porcello & Alex Banks.

Use the following context to answer the question. Please:
- Respond in the tone of a React developer
- Use bullet points for clarity
- Keep explanations concise but insightful
- Cite the page number and source if available
- If you don’t know the answer, say so instead of making it up

Context:
{context}

Question:
{question}

Answer:
"""
)

In [31]:
def augment_multiple_query(query):
    prompt = (
        "Given the following question:\n"
        f"{query}\n\n"
        "Suggest up to five additional related questions to help find relevant information. "
        "Only suggest short, complete questions (no compound sentences). "
        "Each question should focus on a different aspect of the topic. "
        "Return one question per line. Do not number them."
    )

    try:
        response = llm.invoke(prompt)  # This returns an AIMessage object
        print(response)  # Shows full object for debugging

        # Extract the actual text content from the AIMessage
        raw_text = response.content

        # Split and clean the lines
        content = raw_text.strip().split("\n")
        expanded_questions = [query] + [q.strip() for q in content if q.strip()]

        return expanded_questions
    except Exception as e:
        print(f"Error during query generation: {e}")
        return []


In [32]:
# For Testing Purpose
original_query = "What are closures?"
augmented_queries = augment_multiple_query(original_query)
for q in augmented_queries:
    print("-", q)

queries = [original_query] + augmented_queries
print(queries)

content='How do closures work?\nWhat is closure scope?\nWhat are common uses for closures?\nWhat languages support closures?\nAre closures the same as lambda functions?' additional_kwargs={} response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []} id='run-4cfee1ba-492f-470e-9401-f14478f2d1c4-0' usage_metadata={'input_tokens': 58, 'output_tokens': 35, 'total_tokens': 93, 'input_token_details': {'cache_read': 0}}
- What are closures?
- How do closures work?
- What is closure scope?
- What are common uses for closures?
- What languages support closures?
- Are closures the same as lambda functions?
['What are closures?', 'What are closures?', 'How do closures work?', 'What is closure scope?', 'What are common uses for closures?', 'What languages support closures?', 'Are closures the same as lambda functions?']


In [33]:
retrieved_documents, unique_documents = retrieve_and_print_documents(
    db=db,
    original_query=original_query,
    expanded_queries=augmented_queries
)

Query: How do closures work?

Results:
not collide with other requests. The solution is to define  loadStatus using a
closure: const   loadStatus   =   ( function ()   {    let   error ,   promise ,
response ;    return   function ()   {      if   ( error )   throw   error ;
if   ( response )   return   response ;      throw   promise ;    }; })(); This
is a closure. The scope of the  error ,  promise , and  response  are closed off
from any code outside of the function where they’re defined. When we declare
loadStatus , an anonymous function is declared and

Breeding lasts from around November to January. Males go through several bodily
changes in preparation of mating, including the development of a subcutaneous
armor that helps during confrontations with rivals; they travel long distances,
eating very little on the way, to locate a sow. Average litters contain four to
six piglets.

Hooks:  useInput  and  useColors . There’s more where that came from, though.
React comes with more Ho

In [34]:
# Get embedding for the original query (list of one string)
original_query_embedding = embeddings.embed_documents([original_query])[0]

# Get embeddings for augmented queries (list of strings)
augmented_query_embeddings = embeddings.embed_documents(augmented_queries)


# Gradio UI - Augmented Response

In [35]:
import gradio as gr
from functools import partial

# Preconfigured handlers (choose one below)
augmented_respond = partial(respond, use_query_augmentation=True, use_refine_chain=False, plot_func=generate_3d_plot_with_queries, is_reranked_queries=False)


# Custom CSS for better appearance
css = """
.title {
    text-align: center;
    font-weight: bold;
    font-size: 24px;
    margin-bottom: 20px;
}
.description {
    text-align: center;
    margin-bottom: 20px;
}
"""

with gr.Blocks(css=css) as demo:
    # Title and Description
    gr.Markdown("""
    <div class="title">React Patterns RAG Assistant</div>
    <div class="description">
    Ask questions about "Learning React Modern Patterns" by Eve Porcello & Alex Banks
    </div>
    """)

    # Chat Interface
    chatbot = gr.Chatbot(
        height=500,
        bubble_full_width=False,
        show_copy_button=True
    )

    # Input and Controls
    with gr.Row():
        msg = gr.Textbox(
            label="Your question",
            placeholder="Ask about React patterns...",
            scale=4
        )
        clear = gr.Button("🧹 Clear Memory", scale=1)

    # 3D Plot Image
    plot_image = gr.Image("/content/query_3d_plot.png", label="3D Plot of Query Relevancy")

    # Interaction Handlers — choose ONE of the following:
    #msg.submit(refine_respond, [msg, chatbot], [msg, chatbot])   # <- Block 3 behavior
    msg.submit(augmented_respond, [msg, chatbot], [msg, chatbot])  # <- Block 2 behavior
    # msg.submit(basic_respond, [msg, chatbot], [msg, chatbot])      # <- Block 1 behavior

    clear.click(lambda: None, None, chatbot, queue=False)

# Launch
demo.launch(
    share=True,
    debug=True
)



You have not specified a value for the `type` parameter. Defaulting to the 'tuples' format for chatbot messages, but this is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style dictionaries with 'role' and 'content' keys.


The 'bubble_full_width' parameter is deprecated and will be removed in a future version. This parameter no longer has any effect.



Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://da6cdca58439fddf88.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


content='How do closures work?\nWhat are closures used for?\nWhat languages support closures?\nWhat is a lexical environment?\nHow do closures relate to scope?' additional_kwargs={} response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []} id='run-69f8d686-6b44-41b2-ab28-b0bbb72f2748-0' usage_metadata={'input_tokens': 58, 'output_tokens': 34, 'total_tokens': 92, 'input_token_details': {'cache_read': 0}}


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m



'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.



Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://da6cdca58439fddf88.gradio.live




# Cross Encoder Re-Ranking

# Re-Ranking with Query Expansion

In [None]:
# Get embedding for the original query (list of one string)
#original_query_embedding = embeddings.embed_documents([original_query])[0]

# Get embeddings for reranked queries (list of strings)
#reranked_query_embeddings = embeddings.embed_documents(reranked_queries)


# Gradio UI - Refine-Reranked Response

In [None]:
import gradio as gr
from functools import partial

# Preconfigured handlers (choose one below)
refine_respond = partial(respond, use_query_augmentation=True, use_refine_chain=True,  plot_func=generate_3d_plot_with_queries, is_reranked_queries=True)

# Custom CSS for better appearance
css = """
.title {
    text-align: center;
    font-weight: bold;
    font-size: 24px;
    margin-bottom: 20px;
}
.description {
    text-align: center;
    margin-bottom: 20px;
}
"""

with gr.Blocks(css=css) as demo:
    # Title and Description
    gr.Markdown("""
    <div class="title">React Patterns RAG Assistant</div>
    <div class="description">
    Ask questions about "Learning React Modern Patterns" by Eve Porcello & Alex Banks
    </div>
    """)

    # Chat Interface
    chatbot = gr.Chatbot(
        height=500,
        bubble_full_width=False,
        show_copy_button=True
    )

    # Input and Controls
    with gr.Row():
        msg = gr.Textbox(
            label="Your question",
            placeholder="Ask about React patterns...",
            scale=4
        )
        clear = gr.Button("🧹 Clear Memory", scale=1)

    # 3D Plot Image
    plot_image = gr.Image("/content/query_3d_plot.png", label="3D Plot of Query Relevancy")

    # Interaction Handlers — choose ONE of the following:
    msg.submit(refine_respond, [msg, chatbot], [msg, chatbot])   # <- Block 3 behavior
    # msg.submit(augmented_respond, [msg, chatbot], [msg, chatbot])  # <- Block 2 behavior
    # msg.submit(basic_respond, [msg, chatbot], [msg, chatbot])      # <- Block 1 behavior

    clear.click(lambda: None, None, chatbot, queue=False)

# Launch
demo.launch(
    share=True,
    debug=True
)



You have not specified a value for the `type` parameter. Defaulting to the 'tuples' format for chatbot messages, but this is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style dictionaries with 'role' and 'content' keys.


The 'bubble_full_width' parameter is deprecated and will be removed in a future version. This parameter no longer has any effect.



Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://3ee389a083b1244200.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


content='How do closures work?\nWhat are common uses for closures?\nWhat languages support closures?\nHow do closures affect memory management?\nWhat are the advantages of using closures?' additional_kwargs={} response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []} id='run-05c2625f-8b00-44ae-b1a0-7ef80ddb1057-0' usage_metadata={'input_tokens': 58, 'output_tokens': 37, 'total_tokens': 95, 'input_token_details': {'cache_read': 0}}



The method `Chain.run` was deprecated in langchain 0.1.0 and will be removed in 1.0. Use :meth:`~invoke` instead.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.

