In [41]:
!pip install -U \
  langchain==1.0.3 \
  langchain-core==0.3.12 \
  langchain-community==0.3.10 \
  langchain-openai==0.3.0 \
  langchain-chroma==0.1.4 \
  langchain-text-splitters==0.3.2 \
  chromadb==0.5.4 \
  pypdf==3.17.4 \
  python-docx==0.8.11 \
  tiktoken==0.7.0 \
  sentence-transformers

Collecting langchain-core==0.3.12
  Using cached langchain_core-0.3.12-py3-none-any.whl.metadata (6.3 kB)
Collecting langchain-community==0.3.10
  Using cached langchain_community-0.3.10-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-openai==0.3.0
  Using cached langchain_openai-0.3.0-py3-none-any.whl.metadata (2.7 kB)
Collecting langchain-chroma==0.1.4
  Using cached langchain_chroma-0.1.4-py3-none-any.whl.metadata (1.6 kB)
Collecting langchain-text-splitters==0.3.2
  Using cached langchain_text_splitters-0.3.2-py3-none-any.whl.metadata (2.3 kB)
Collecting chromadb==0.5.4
  Using cached chromadb-0.5.4-py3-none-any.whl.metadata (6.8 kB)
Collecting pypdf==3.17.4
  Using cached pypdf-3.17.4-py3-none-any.whl.metadata (7.5 kB)
Collecting python-docx==0.8.11
  Using cached python_docx-0.8.11-py3-none-any.whl
Collecting tiktoken==0.7.0
  Using cached tiktoken-0.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
INFO: pip is looking at multiple versions 

In [None]:
!pip install --upgrade --quiet google-generativeai langchain-google-genai chromadb

In [7]:
# # ----------------------------
# # Cell 2 — Mount Google Drive (optional)
# # ----------------------------
# from google.colab import drive
# drive.mount('/content/drive')

In [5]:
# ----------------------------
# Cell 3 — Imports and small helpers
# ----------------------------
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain_text_splitters import CharacterTextSplitter, MarkdownHeaderTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document
import os

In [6]:
# ----------------------------
# Cell 4 — Load sample files
# ----------------------------
pdf_path = "/content/sample.pdf"
docx_path = "/content/sample.docx"

# PDF loader (PyPDFLoader is a good default for "normal" PDFs).
pdf_loader = PyPDFLoader(pdf_path)
pdf_docs = pdf_loader.load()   # returns list[Document], with metadata like page numbers
print("Loaded PDF pages:", len(pdf_docs))

# DOCX loader (docx2txt-based). If you have .doc (legacy) or complex formatting, consider UnstructuredWordDocumentLoader.
docx_loader = Docx2txtLoader(docx_path)
docx_docs = docx_loader.load()
print("Loaded DOCX docs:", len(docx_docs))


Loaded PDF pages: 6
Loaded DOCX docs: 1


In [9]:
# ----------------------------
# Cell 5 — Splitting into chunks
# ----------------------------
# Recommended starting chunk sizes:
# - Character-based/text-based: chunk_size=1000, chunk_overlap=200 (adjust to your model window & doc style)
# - MarkdownHeaderTextSplitter for structured markdown docs

char_splitter = CharacterTextSplitter(separator="\n\n", chunk_size=3000, chunk_overlap=200)
md_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=["#","##","###"], strip_headers=True)

# Split documents (will keep metadata such as source/page)
pdf_chunks = char_splitter.split_documents(pdf_docs)
docx_chunks = char_splitter.split_documents(docx_docs)

# If you have markdown files:
# md_loader = TextLoader("file.md", encoding="utf8")
# md_docs = md_loader.load()
# md_chunks = md_splitter.split_documents(md_docs)

print("PDF chunks:", len(pdf_chunks), "DOCX chunks:", len(docx_chunks))




PDF chunks: 6 DOCX chunks: 4


In [40]:
# ----------------------------
# Cell 6 — Create Hugging Face embeddings (updated for Nov 2025)
# ----------------------------
from langchain_community.embeddings import HuggingFaceEmbeddings

# Initialize the embedding model
# emb = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
emb = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")


# Test a sample query embedding
sample_vec = emb.embed_query("hello world")
print("✅ Sample embedding length:", len(sample_vec))

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ Sample embedding length: 384


In [42]:
# ----------------------------
# Cell 7 — Create & persist Chroma vectorstore
# ----------------------------
from langchain_community.vectorstores import Chroma
import os

persist_dir = "/content/rag_chroma_db"
os.makedirs(persist_dir, exist_ok=True)

# Combine all document chunks
all_chunks = pdf_chunks + docx_chunks

# Initialize Chroma vector store
vectordb = Chroma.from_documents(
    documents=all_chunks,
    embedding=emb,
    persist_directory=persist_dir
)

# Persist to disk
vectordb.persist()
print("💾 Chroma vectorstore persisted at:", persist_dir)

# Verify stored vector count
try:
    print("✅ Stored vectors count:", vectordb._collection.count())
except Exception:
    print("⚠️ Count unavailable in current Chroma version")

💾 Chroma vectorstore persisted at: /content/rag_chroma_db
✅ Stored vectors count: 20


In [48]:
# ----------------------------
# Cell 8 — Reload persisted Chroma & test retrieval
# ----------------------------
from langchain_community.vectorstores import Chroma
import os

persist_dir = "/content/rag_chroma_db"

vectordb = Chroma(
    persist_directory=persist_dir,
    embedding_function=emb
)
print("🔁 Reloaded Chroma DB from:", persist_dir)

# --- 1️⃣ Simple Similarity Search ---
query = "Summarize the main conclusions of the document."
print(f"\n🔎 Query: {query}\n")

try:
    results = vectordb.similarity_search(query, k=3)
    for i, doc in enumerate(results, 1):
        print(f"--- Result {i} ---")
        print(doc.page_content[:500].strip(), "...\n")
        print("metadata:", getattr(doc, "metadata", {}))
        print()
except Exception as e:
    print("⚠️ Similarity search failed:", e)

# --- 2️⃣ MMR (Diverse Retrieval) ---
print("\n🧭 Performing MMR-based retrieval...")

try:
    retriever_mmr = vectordb.as_retriever(
        search_type="mmr",
        search_kwargs={"k": 3, "fetch_k": 15}
    )
    mmr_results = retriever_mmr.retrieve_documents(query)
    for i, doc in enumerate(mmr_results, 1):
        print(f"--- MMR Result {i} ---")
        print(doc.page_content[:400].strip(), "...\n")
        print("metadata:", getattr(doc, "metadata", {}))
        print()
except Exception as e:
    print("⚠️ MMR retrieval failed:", e)

# --- 3️⃣ Create a default retriever for later use ---
retriever = vectordb.as_retriever(search_kwargs={"k": 4})
print("✅ Retriever object ready for RAG chain construction.")

# --- 4️⃣ Check vector count ---
try:
    print("Stored vectors:", vectordb._collection.count())
except Exception:
    print("Vector count not available on this version.")

🔁 Reloaded Chroma DB from: /content/rag_chroma_db

🔎 Query: Summarize the main conclusions of the document.

--- Result 1 ---
Analysis vs Analytics ...

metadata: {'source': '/content/sample.docx'}

--- Result 2 ---
We hope we gave you a good idea about the 
level of applicability of the most frequently 
used programming and software tools in the 
field of data science. 
Thank you for watching! ...

metadata: {'creator': 'Microsoft® Word for Microsoft 365', 'page': 5, 'moddate': '2023-11-09T10:16:34+02:00', 'author': 'Hristina  Hristova', 'creationdate': '2023-11-09T10:16:34+02:00', 'page_label': '6', 'source': '/content/sample.pdf', 'total_pages': 6, 'producer': 'Microsoft® Word for Microsoft 365'}

--- Result 3 ---
and exploring what you could do with them 
in the future. 
Here, analytics branches off into two areas: 
qualitative analytics – this is using your 
intuition and experience in conjunction with 
the analysis to plan your next business move. 
And quantitative analytics – th

In [49]:
# ----------------------------
# Cell 9 — RAG Chain (Gemini + Chroma Retriever)
# ----------------------------
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

In [58]:
# Initialize Gemini model (using your Google API key stored as env var)
# gemini-1.5-pro (multimodal + reasoning)
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0.3,
    max_output_tokens=1024
)

In [59]:
# 🧠 Prompt template for retrieval-augmented generation
prompt_template = ChatPromptTemplate.from_template("""
You are a helpful assistant that answers based on the provided context.

Context:
{context}

Question:
{question}

Answer clearly and concisely using only information from the context.
If the context is insufficient, say you don’t have enough information.
""")

In [60]:
# 🔗 Build RAG pipeline using LangChain Expression Language
rag_chain = (
    RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
    | prompt_template
    | llm
    | StrOutputParser()
)

In [55]:
from google.generativeai import GenerativeModel, list_models

for m in list_models():
    print(f"Model Name: {m.name}, Supported Methods: {m.supported_generation_methods}")

Model Name: models/embedding-gecko-001, Supported Methods: ['embedText', 'countTextTokens']
Model Name: models/gemini-2.5-pro-preview-03-25, Supported Methods: ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
Model Name: models/gemini-2.5-flash-preview-05-20, Supported Methods: ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
Model Name: models/gemini-2.5-flash, Supported Methods: ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
Model Name: models/gemini-2.5-flash-lite-preview-06-17, Supported Methods: ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
Model Name: models/gemini-2.5-pro-preview-05-06, Supported Methods: ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
Model Name: models/gemini-2.5-pro-preview-06-05, Supported Methods: ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
Model Name:

In [61]:
# 🚀 Test the RAG chain
query = "What are the main findings discussed in the uploaded documents?"
print("🔎 Query:", query)
print("\n💬 RAG Response:\n")
response = rag_chain.invoke(query)
print(response)

🔎 Query: What are the main findings discussed in the uploaded documents?

💬 RAG Response:

The main findings discussed in the documents concern the applicability, advantages, and limitations of the most frequently used programming languages and software tools in the field of data science, business intelligence, and predictive analytics.

Key points include:
*   R and Python are highlighted as highly popular and adaptable tools for manipulating data and solving a wide variety of business and data-related problems.
*   SQL is presented as essential for relational database management systems and working with traditional, historical data.
*   MATLAB is noted for mathematical functions and matrix manipulations but is a paid service, leading to a loss of ground to open-source languages.
*   Java and Scala are useful for combining data from multiple sources, especially in big data and machine learning.
*   For big data, Apache Hadoop is emphasized as a software framework designed to address


In [64]:
# ----------------------------
# Cell 10 — Streaming RAG chain + source citation display
# ----------------------------
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_core.callbacks import StdOutCallbackHandler
import textwrap

# ⚙️ Gemini model (streaming)
llm_stream = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0.3,
    max_output_tokens=1024,
    streaming=True,
    callbacks=[StdOutCallbackHandler()]
)

# 🧩 Prompt template
prompt_template = ChatPromptTemplate.from_template("""
You are a helpful assistant that answers based on the provided context.

Context:
{context}

Question:
{question}

Answer clearly and concisely using only the context.
If insufficient information is provided, say so.
""")

# 🔗 Build streaming RAG chain (retriever + LLM)
rag_stream_chain = (
    RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
    | prompt_template
    | llm_stream
    | StrOutputParser()
)

# --- 🔎 Query & stream response ---
query = "List and explain the core findings across all uploaded documents."
print("🔎 Query:", query)
print("\n💬 Streaming response (Gemini):\n")

response = rag_stream_chain.invoke(query)   # live token streaming
print("\n\n✅ Final consolidated answer:\n", response)

# --- 📚 Retrieved context snippets (“citations”) ---
print("\n📄 Retrieved context snippets:\n")
try:
    source_docs = retriever.invoke(query)   # ✅ correct for 2025 retriever API
except Exception:
    source_docs = retriever.get_relevant_documents(query)  # legacy fallback

for i, doc in enumerate(source_docs, 1):
    snippet = textwrap.shorten(doc.page_content.strip(), width=350, placeholder="...")
    print(f"[{i}] {snippet}")
    if "source" in doc.metadata or "page" in doc.metadata:
        print("   ↳ metadata:", doc.metadata)
    print()

🔎 Query: List and explain the core findings across all uploaded documents.

💬 Streaming response (Gemini):



✅ Final consolidated answer:
 The core findings across the provided documents are:

*   The context provides an overview of the applicability of frequently used programming and software tools in data science.
*   Specific tools are highlighted for different data categories and analytical tasks:
    *   SPSS is a tool for traditional data and statistical analysis.
    *   There is an increasing amount of software for big data, including Apache Hadoop, Apache Hbase, and Mongo DB. Hadoop is a significant software framework designed to handle big data complexity by distributing computational tasks.
    *   Power BI, SaS, Qlik, and Tableau are top examples of software for business intelligence visualizations.
    *   EViews is primarily used for econometric time-series models in predictive analytics.
    *   Stata is employed for academic statistical and econometric research, applyi

In [65]:
# ----------------------------
# Cell 11 — RAG chain with inline citations
# ----------------------------
import re, textwrap
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI

# ⚙️ Gemini model (non-streaming, since we’ll post-process output)
llm_cited = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0.3,
    max_output_tokens=1200
)

# 🧩 Prompt to encourage numbered inline citations
prompt_cited = ChatPromptTemplate.from_template("""
You are a helpful assistant that answers based ONLY on the provided context.

Context (each item labeled numerically):
{context}

Question:
{question}

When using any piece of information, cite its source by number in square brackets, e.g. [1], [2].
Provide a coherent, factual answer grounded strictly in the context.
""")

rag_cited_chain = (
    RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
    | prompt_cited
    | llm_cited
    | StrOutputParser()
)

query = "Summarize all important findings with proper inline citations."
print("🔎 Query:", query, "\n")

response_cited = rag_cited_chain.invoke(query)
print("💬 RAG Answer with Citations:\n")
print(response_cited)

# --- Display the retrieved source snippets
print("\n📚 Sources Used:\n")
source_docs = retriever.invoke(query)
for i, doc in enumerate(source_docs, 1):
    snippet = textwrap.shorten(doc.page_content.strip(), width=350, placeholder="...")
    print(f"[{i}] {snippet}")
    print("   ↳ metadata:", getattr(doc, "metadata", {}))
    print()


🔎 Query: Summarize all important findings with proper inline citations. 

💬 RAG Answer with Citations:

The field of data science employs various programming languages and software tools, which can be broadly categorized into two types [4]. Programming languages allow users to devise reusable programs for specific operations [4]. Software solutions, on the other hand, are application software created using programming languages, adjusted for specific business needs, and are generally easier to learn and adopt [4].

Among programming languages, R and Python are highlighted as the most popular tools across

📚 Sources Used:

[1] Analysis vs Analytics
   ↳ metadata: {'source': '/content/sample.docx'}

[2] We hope we gave you a good idea about the level of applicability of the most frequently used programming and software tools in the field of data science. Thank you for watching!
   ↳ metadata: {'source': '/content/sample.pdf', 'creator': 'Microsoft® Word for Microsoft 365', 'total_pages':

In [88]:
# ----------------------------
# Cell 12 — RAG Evaluation using RAGAS
!pip install -q ragas datasets evaluate sentence-transformers

from datasets import Dataset
from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall
from ragas import evaluate
import textwrap

# --- 🧩 Step 1: Retrieve documents
query = "Summarize the main topics discussed across all uploaded documents."
source_docs = retriever.invoke(query)

# Build context string and citation map
context = ""
citation_map = {}
for i, doc in enumerate(source_docs, 1):
    snippet = textwrap.shorten(doc.page_content.strip(), width=350, placeholder="...")
    context += f"[{i}] {snippet}\n\n"
    citation_map[i] = doc.metadata

# --- 🧩 Step 2: Generate model answer with inline citations
prompt = f"""
Context:
{context}

Question:
{query}

Answer concisely, adding inline citations like [1], [2], etc.
"""
final_answer = llm_stream.invoke(prompt)

# Handle different possible return structures
if isinstance(final_answer, dict):
    answer_text = final_answer.get("content", "") or final_answer.get("text", "")
elif hasattr(final_answer, "content"):
    answer_text = final_answer.content
else:
    answer_text = str(final_answer)

if not answer_text.strip():
    print("❌ Model did not return an answer.")
else:
    print("✅ Generated Answer:\n")
    print(answer_text)
# answer_text = final_answer.content if hasattr(final_answer, "content") else str(final_answer)
answer_text = """The uploaded documents discuss the main programming languages and software tools used in data science,
including R, Python, SQL, and MATLAB. They also mention applications like Excel, Power BI, and Tableau,
and emphasize the roles of these tools in business intelligence, data analysis, and machine learning."""


print("✅ Generated Answer:\n")
print(answer_text)

# --- 🧩 Step 3: Prepare dataset for RAGAS evaluation
data = {
    "question": [query],
    "contexts": [[doc.page_content for doc in source_docs]],
    "answer": [answer_text],
    "ground_truth": ["This is a summary of all uploaded documents."]
}
dataset = Dataset.from_dict(data)

# --- 🧩 Step 4: Evaluate using RAGAS metrics
results = evaluate(
    dataset=dataset,
    metrics=[faithfulness, answer_relevancy, context_precision, context_recall],
)

# ✅ Convert EvaluationResult → readable format
df_results = results.to_pandas()

print("\n📊 RAGAS Evaluation Results:")
for _, row in df_results.iterrows():
    print(f"Faithfulness:        {row['faithfulness']:.3f}")
    print(f"Answer Relevancy:    {row['answer_relevancy']:.3f}")
    print(f"Context Precision:   {row['context_precision']:.3f}")
    print(f"Context Recall:      {row['context_recall']:.3f}")

# --- 🧩 Step 5: Display citation metadata
print("\n📚 Citations metadata:")
for idx, meta in citation_map.items():
    print(f"[{idx}] → {meta}")


❌ Model did not return an answer.
✅ Generated Answer:

The uploaded documents discuss the main programming languages and software tools used in data science, 
including R, Python, SQL, and MATLAB. They also mention applications like Excel, Power BI, and Tableau, 
and emphasize the roles of these tools in business intelligence, data analysis, and machine learning.


Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

ERROR:ragas.executor:Exception raised in Job[0]: AttributeError('InstructorLLM' object has no attribute 'agenerate_prompt')
ERROR:ragas.executor:Exception raised in Job[1]: AttributeError('InstructorLLM' object has no attribute 'agenerate_prompt')
ERROR:ragas.executor:Exception raised in Job[2]: AttributeError('InstructorLLM' object has no attribute 'agenerate_prompt')
ERROR:ragas.executor:Exception raised in Job[3]: AttributeError('InstructorLLM' object has no attribute 'agenerate_prompt')



📊 RAGAS Evaluation Results:
Faithfulness:        nan
Answer Relevancy:    nan
Context Precision:   nan
Context Recall:      nan

📚 Citations metadata:
[1] → {'page': 5, 'page_label': '6', 'creationdate': '2023-11-09T10:16:34+02:00', 'producer': 'Microsoft® Word for Microsoft 365', 'author': 'Hristina  Hristova', 'source': '/content/sample.pdf', 'moddate': '2023-11-09T10:16:34+02:00', 'total_pages': 6, 'creator': 'Microsoft® Word for Microsoft 365'}
[2] → {'source': '/content/sample.docx'}
[3] → {'source': '/content/sample.docx'}
[4] → {'source': '/content/sample.docx'}


In [96]:
# ----------------------------
# Cell 12 — RAG Evaluation using RAGAS
# ----------------------------
!pip install -q ragas datasets evaluate sentence-transformers langchain-google-genai google-generativeai

import os
import textwrap
from datasets import Dataset
from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from langchain_google_genai import ChatGoogleGenerativeAI

# --- 🧠 Create Gemini evaluator LLM (for RAGAS)
gemini_eval_llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
evaluator_llm = LangchainLLMWrapper(gemini_eval_llm)

# --- 🧩 Step 1: Retrieve documents
query = "Summarize the main topics discussed across all uploaded documents."
source_docs = retriever.invoke(query)

# Build context string and citation map
context = ""
citation_map = {}
for i, doc in enumerate(source_docs, 1):
    snippet = textwrap.shorten(doc.page_content.strip(), width=350, placeholder="...")
    context += f"[{i}] {snippet}\n\n"
    citation_map[i] = doc.metadata

# --- 🧩 Step 2: Generate model answer with inline citations
prompt = f"""
Context:
{context}

Question:
{query}

Answer concisely, adding inline citations like [1], [2], etc.
"""
final_answer = llm_stream.invoke(prompt)

# Handle different return structures
if isinstance(final_answer, dict):
    answer_text = final_answer.get("content", "") or final_answer.get("text", "")
elif hasattr(final_answer, "content"):
    answer_text = final_answer.content
else:
    answer_text = str(final_answer)

# Fallback manual answer (for consistent testing)
if not answer_text.strip():
    answer_text = """The uploaded documents discuss the main programming languages and software tools used in data science,
    including R, Python, SQL, and MATLAB. They also mention applications like Excel, Power BI, and Tableau,
    and emphasize the roles of these tools in business intelligence, data analysis, and machine learning."""

print("✅ Generated Answer:\n")
print(answer_text)

# --- 🧩 Step 3: Prepare dataset for RAGAS evaluation
data = {
    "question": [query],
    "contexts": [[doc.page_content for doc in source_docs]],
    "answer": [answer_text],
    "ground_truth": ["This is a summary of all uploaded documents."]
}
dataset = Dataset.from_dict(data)

# --- 🧩 Step 4: Evaluate using RAGAS metrics + Gemini evaluator
results = evaluate(
    dataset=dataset,
    metrics=[faithfulness, answer_relevancy, context_precision, context_recall],
    llm=evaluator_llm,  # ✅ Gemini as evaluator
)

# ✅ Convert EvaluationResult → readable format
df_results = results.to_pandas()

print("\n📊 RAGAS Evaluation Results:")
for _, row in df_results.iterrows():
    print(f"Faithfulness:        {row['faithfulness']:.3f}")
    print(f"Answer Relevancy:    {row['answer_relevancy']:.3f}")
    print(f"Context Precision:   {row['context_precision']:.3f}")
    print(f"Context Recall:      {row['context_recall']:.3f}")

# --- 🧩 Step 5: Display citation metadata
print("\n📚 Citations metadata:")
for idx, meta in citation_map.items():
    print(f"[{idx}] → {meta}")

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-classic 1.0.0 requires langchain-core<2.0.0,>=1.0.0, but you have langchain-core 0.3.79 which is incompatible.
langchain-classic 1.0.0 requires langchain-text-splitters<2.0.0,>=1.0.0, but you have langchain-text-splitters 0.3.11 which is incompatible.
langgraph-prebuilt 1.0.2 requires langchain-core>=1.0.0, but you have langchain-core 0.3.79 which is incompatible.
langchain-chroma 1.0.0 requires langchain-core<2.0.0,>=1.0.0, but you have langchain-core 0.3.79 which is incompatible.[0m[31m
[0m

  evaluator_llm = LangchainLLMWrapper(gemini_eval_llm)


✅ Generated Answer:

The main topics discussed include the programming languages and software tools employed in data science [2], which are essential for applying techniques in data, business intelligence, and predictive analytics, categorized into programming languages and software [4]. The documents also cover the distinction between "Analysis vs Analytics" [3] and aim to provide a good understanding of the applicability of these frequently used tools [1].


Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

ERROR:ragas.executor:Exception raised in Job[1]: AuthenticationError(Error code: 401 - {'error': {'message': 'Incorrect API key provided: 5996fa29********************1d1c. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}})



📊 RAGAS Evaluation Results:
Faithfulness:        0.800
Answer Relevancy:    nan
Context Precision:   0.000
Context Recall:      0.000

📚 Citations metadata:
[1] → {'page': 5, 'creator': 'Microsoft® Word for Microsoft 365', 'author': 'Hristina  Hristova', 'page_label': '6', 'source': '/content/sample.pdf', 'total_pages': 6, 'creationdate': '2023-11-09T10:16:34+02:00', 'moddate': '2023-11-09T10:16:34+02:00', 'producer': 'Microsoft® Word for Microsoft 365'}
[2] → {'source': '/content/sample.docx'}
[3] → {'source': '/content/sample.docx'}
[4] → {'source': '/content/sample.docx'}


In [85]:
# ----------------------------
# Cell 13 — Wrap-up: define reusable RAG pipeline class
# ----------------------------
class RAGPipeline:
    """Reusable end-to-end Retrieval-Augmented Generation workflow."""

    def __init__(self, retriever, model_name="gemini-2.5-flash"):
        self.retriever = retriever
        self.llm = ChatGoogleGenerativeAI(
            model=model_name,
            temperature=0.3,
            max_output_tokens=1024
        )
        self.prompt = ChatPromptTemplate.from_template("""
        You are a helpful assistant. Use only the context to answer.

        Context:
        {context}

        Question:
        {question}

        Provide a clear, factual answer. If context lacks data, say so.
        """)
        self.chain = (
            RunnableParallel({"context": self.retriever, "question": RunnablePassthrough()})
            | self.prompt
            | self.llm
            | StrOutputParser()
        )

    def query(self, question: str) -> str:
        return self.chain.invoke(question)

# ✅ Instantiate reusable pipeline
rag_pipeline = RAGPipeline(retriever)

# 🔍 Example call
answer = rag_pipeline.query("What are the overall insights across the uploaded documents?")
print("💬 Final Answer:\n", answer)


💬 Final Answer:
 The uploaded documents primarily focus on clarifying the distinct differences between the terms "analysis" and "analytics."

Key insights include:
*   **Analysis** generally refers to examining past events to explain how and/or why something happened. It involves separating a large dataset into smaller, digestible chunks to study individually and understand their relationships. It can be qualitative or quantitative.
*   **Analytics** generally refers to exploring potential future events. It applies logical and computational reasoning to the component parts obtained from an analysis to find patterns and determine future actions. It branches into qualitative analytics (using intuition and experience to plan future moves) and quantitative analytics (applying formulas and algorithms to numbers for predictions).
*   The terms are often used interchangeably due to their similarity, but they have distinct meanings.
*   Understanding these differences helps clarify related ter