In [136]:
from langchain_core.documents import Document
from langchain_community.document_loaders import PyPDFLoader,TextLoader,UnstructuredMarkdownLoader

In [137]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_experimental.text_splitter import SemanticChunker
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.embeddings import Embeddings
from langchain_community.retrievers import BM25Retriever
from langchain_chroma import Chroma


In [138]:
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate,ChatPromptTemplate
from langsmith.evaluation import StringEvaluator
import sys
import langchain_classic as _lc
sys.modules["langchain"] = _lc
from ragas.integrations.langchain import EvaluatorChain
from ragas.metrics import faithfulness
from ragas.metrics import _context_precision

  from ragas.metrics import faithfulness


In [139]:
#creating the embedding model
embedding_model=HuggingFaceEmbeddings(
    model_name='sentence-transformers/all-MiniLM-L6-v2',
    model_kwargs={'device':'cpu'},
    encode_kwargs={"normalize_embeddings": True}#doing the l2 noramlization
)


In [140]:
# Load MySQL Handbook PDF
loader = PyPDFLoader(r'C:\Users\Acer\OneDrive\Documents\books\python\MySQL Handbook.pdf')

documents = loader.load()
print(f"Loaded {len(documents)} pages from MySQL Handbook")

Loaded 72 pages from MySQL Handbook


In [141]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex=False
)

chunks = text_splitter.split_documents(documents)
print(f"Split into {len(chunks)} chunks")
print(f"\nFirst chunk preview:")
print(chunks[0].page_content[:200] if chunks else "No chunks created")

Split into 71 chunks

First chunk preview:
Installing MySQL
What is MySQL workbench?
MySQL Workbench is a visual tool for database architects, developers, and DBAs. It
provides data modeling, SQL development, and comprehensive administration
t


In [142]:
# Initialize the ChromaDB vector store
vector_store = Chroma.from_documents(
    documents=chunks,
    embedding=embedding_model)

In [143]:
# Perform similarity search
query = "What are python variables?"  # Change this to your query
k = 3  # Number of results to return

results = vector_store.similarity_search(query=query, k=k)

print(f"Query: {query}")
print(f"\nFound {len(results)} relevant documents:\n")
for i, doc in enumerate(results, 1):
    print(f"--- Document {i} ---")
    print(doc.page_content[:300])
    print(f"Metadata: {doc.metadata}")
    print()

Query: What are python variables?

Found 3 relevant documents:

--- Document 1 ---
might also see the keyword let, indicating a variable that
may be reassigned. These were also introduced in ES6.
Passing around functions as variables is also extremely
useful when it comes to handling user actions and
callbacks, and it is worth getting comfortable with this style
of programming whe
Metadata: {'page_label': '324', 'page': 323, 'total_pages': 539, 'creator': 'calibre 6.28.1', 'moddate': '2024-02-24T08:23:02+00:00', 'creationdate': '2024-02-24T08:23:02+00:00', 'producer': 'calibre 6.28.1', 'source': 'C:\\Users\\Acer\\OneDrive\\Documents\\books\\python\\Mitchell R. Web Scraping with Python. Data Extraction...the Modern Web 3ed 2024Mitchell R. Web Scraping with Python. Data Extraction...the Modern Web 3ed 2024.pdf', 'author': 'Ryan Mitchell', 'title': 'Web Scraping with Python'}

--- Document 2 ---
might also see the keyword let, indicating a variable that
may be reassigned. These were also 

In [144]:
# Similarity search with scores
results_with_scores = vector_store.similarity_search_with_score(query=query, k=k)

print(f"Query: {query}")
print(f"\nResults with relevance scores:\n")
for i, (doc, score) in enumerate(results_with_scores, 1):
    print(f"--- Document {i} (Score: {score:.4f}) ---")
    print(doc.page_content[:250])
    print()

Query: What are python variables?

Results with relevance scores:

--- Document 1 (Score: 0.9694) ---
might also see the keyword let, indicating a variable that
may be reassigned. These were also introduced in ES6.
Passing around functions as variables is also extremely
useful when it comes to handling user actions and
callbacks, and it is worth gett

--- Document 2 (Score: 0.9694) ---
might also see the keyword let, indicating a variable that
may be reassigned. These were also introduced in ES6.
Passing around functions as variables is also extremely
useful when it comes to handling user actions and
callbacks, and it is worth gett

--- Document 3 (Score: 0.9694) ---
might also see the keyword let, indicating a variable that
may be reassigned. These were also introduced in ES6.
Passing around functions as variables is also extremely
useful when it comes to handling user actions and
callbacks, and it is worth gett



In [145]:
# Create a retriever from the vector store
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3}
)

# Test the retriever
retrieved_docs = retriever.invoke(query)
print(f"Retrieved {len(retrieved_docs)} documents using the retriever")
for i, doc in enumerate(retrieved_docs, 1):
    print(f"\nDocument {i}:")
    print(doc.page_content[:200])

Retrieved 3 documents using the retriever

Document 1:
might also see the keyword let, indicating a variable that
may be reassigned. These were also introduced in ES6.
Passing around functions as variables is also extremely
useful when it comes to handlin

Document 2:
might also see the keyword let, indicating a variable that
may be reassigned. These were also introduced in ES6.
Passing around functions as variables is also extremely
useful when it comes to handlin

Document 3:
might also see the keyword let, indicating a variable that
may be reassigned. These were also introduced in ES6.
Passing around functions as variables is also extremely
useful when it comes to handlin


In [146]:
# Initialize LLM (using Groq - you can change the provider)
from langchain_core.output_parsers import StrOutputParser

llm = ChatGroq(
    api_key="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",  # or replace with your actual key
    model="llama-3.3-70b-versatile",  # or "mixtral-8x7b-32768", "gemma-7b-it"
    temperature=0.3
)

output_parser = StrOutputParser()
print("LLM initialized successfully!")

LLM initialized successfully!


In [147]:
# Define QA Prompt Template with Page Number Citations
QA_SYSTEM_PROMPT = """You are an AI Study Assistant designed to help students understand their study materials. 
Your role is to provide clear, accurate, and helpful explanations based ONLY on the provided context.

CRITICAL INSTRUCTIONS FOR PREVENTING HALLUCINATIONS:
1. Answer ONLY based on the provided context from the student's uploaded materials.
2. ALWAYS cite the page number(s) when stating facts or information (e.g., "According to page 33..." or "[Page 35]").
3. If the answer is not in the context, clearly say "I couldn't find information about this in your study materials."
4. NEVER make up information or add details not present in the provided context.
5. If you're unsure about something, explicitly state your uncertainty.
6. When referencing multiple sources, cite each page number separately.
7. Provide detailed explanations that help students understand concepts.
8. Use simple language and examples ONLY from the provided context.
9. Structure your answers with headings and bullet points when helpful.

CITATION FORMAT:
- For single facts: "According to page X, [fact]..."
- For multiple pages: "Based on pages X and Y..."
- At the end: "References: Pages X, Y, Z"

Context from your study materials (each source includes page numbers):
{context}
"""

QA_HUMAN_PROMPT = """Question: {question}

Please provide a clear and helpful answer based on the study materials above."""

QA_PROMPT = ChatPromptTemplate.from_messages([
    ("system", QA_SYSTEM_PROMPT),
    ("human", QA_HUMAN_PROMPT)
])

In [148]:
def format_docs(docs):
    """Format retrieved documents into a single context string with explicit page numbers."""
    formatted_parts = []
    for i, doc in enumerate(docs, 1):
        source = doc.metadata.get("source", "Unknown")
        page = doc.metadata.get("page", "")
        page_info = f" (Page {page + 1})" if page != "" else ""  # page +1 because pdf stores page number as 0 indexed
        
        # Add clear page number markers for hallucination detection
        page_label = f"PAGE {page + 1}" if page != "" else "PAGE UNKNOWN"
        
        formatted_parts.append(
            f"[Source {i}: {source}{page_info}]\n[{page_label}]\n{doc.page_content}"
        )
    return "\n\n---\n\n".join(formatted_parts)

In [149]:

from langchain_core.runnables import RunnablePassthrough#LCEL pipeline

# Build the chain: retrieve -> format -> prompt -> llm -> parse
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | QA_PROMPT
    | llm
    | output_parser
)

print("RAG Pipeline created successfully!")

RAG Pipeline created successfully!


### Query the Pipeline
Now you can ask questions about your documents

In [150]:
# Example 1: Simple query
question = "What is scrapy?"

print(f"Question: {question}\n")
print("Generating answer...\n")

answer = rag_chain.invoke(question)
print(f"Answer:\n{answer}")

Question: What is scrapy?

Generating answer...

Answer:
## Introduction to Scrapy
According to page 198, Scrapy is described as "an extremely large, sprawling library with many features." 

## Key Characteristics of Scrapy
The key points about Scrapy are:
* It has many features that work together seamlessly.
* Its features have many areas of overlap, allowing users to develop their own style within the library.
* If there's something you'd like to do with Scrapy that hasn't been mentioned, there is likely a way (or several) to do it.

## Additional Resources
According to page 198, for a comprehensive discourse on the Scrapy framework, you can also refer to Scrapy's official tutorial pages.

References: Pages 198


### Hybrid Retrieval (BM25 + Vector)
Combine keyword-based (BM25) and semantic (vector) search for better results

In [151]:
# Initialize BM25 retriever
bm25_retriever = BM25Retriever.from_documents(chunks, k=3)

def hybrid_retrieve(query, k=3):
    bm25_docs = bm25_retriever.invoke(query)
    vector_docs = retriever.invoke(query)
    seen_content = set()
    combined_docs = []
    for doc in vector_docs + bm25_docs:
        content_hash = hash(doc.page_content[:200])
        if content_hash not in seen_content:
            seen_content.add(content_hash)
            combined_docs.append(doc)
            if len(combined_docs) >= k:
                break
    
    return combined_docs

test_query = "What are Python variables?"
hybrid_docs = hybrid_retrieve(test_query, k=3)

print(f"Hybrid retrieval found {len(hybrid_docs)} unique documents\n")
for i, doc in enumerate(hybrid_docs, 1):
    print(f"Document {i}:")
    print(doc.page_content[:200])
    print()

Hybrid retrieval found 3 unique documents

Document 1:
might also see the keyword let, indicating a variable that
may be reassigned. These were also introduced in ES6.
Passing around functions as variables is also extremely
useful when it comes to handlin

Document 2:
Quick Quiz
What does the following queries do?
above)
SELECT * FROM users ORDER BY created_at DESC LIMIT 10;
SELECT * FROM users WHERE salary >60000 ORDER BY created_at DESC LIMIT 5;
SELECT * FROM use

Document 3:
Installing MySQL
What is MySQL workbench?
MySQL Workbench is a visual tool for database architects, developers, and DBAs. It
provides data modeling, SQL development, and comprehensive administration
t



In [152]:
hybird_rag_chain=(
    QA_PROMPT|
    llm|
    StrOutputParser()
)

In [153]:
context=hybrid_retrieve(question)
context=format_docs(context)
answer=hybird_rag_chain.invoke({
    'context':context,
    'question':question
})
print(answer)

## Introduction to Scrapy
According to page 198, Scrapy is an extremely large, sprawling library with many features. It is a framework that allows users to develop their own particular style within it.

## Key Features of Scrapy
The provided context does not give a detailed description of Scrapy's features, but it mentions that:
* Scrapy has many areas of overlap that allow users to easily develop their own particular style within it.
* If there’s something you’d like to do with Scrapy that has not been mentioned, there is likely a way (or several) to do it!

## Additional Resources
For a comprehensive discourse on the Scrapy framework, it is recommended to refer to Scrapy’s official tutorial pages, as mentioned on page 198.

References: Page 198


## Evaluate Hybrid RAG Model
Test faithfulness and context precision metrics

In [154]:
# Import evaluation metrics
from ragas.metrics import faithfulness, context_precision
from ragas import evaluate
from datasets import Dataset

print("Evaluation metrics imported successfully!")

Evaluation metrics imported successfully!


  from ragas.metrics import faithfulness, context_precision
  from ragas.metrics import faithfulness, context_precision


In [155]:
# Prepare MySQL-specific test questions and ground truth answers
test_questions = [
    "What is a PRIMARY KEY in MySQL?",
    "What is the difference between CHAR and VARCHAR data types?",
    "How do you create a database in MySQL?",
    "What is a foreign key constraint?",
    "What is the purpose of the SELECT statement?",
    "What is an INDEX in MySQL and why is it used?",
    "How do you perform a JOIN operation in MySQL?"
]

# Ground truth answers (reference answers for evaluation)
ground_truths = [
    "A PRIMARY KEY is a column or set of columns that uniquely identifies each row in a table. It must contain unique values and cannot contain NULL values.",
    "CHAR is a fixed-length string data type that always uses the specified length, while VARCHAR is a variable-length string that only uses as much space as needed up to the maximum length specified.",
    "To create a database in MySQL, you use the CREATE DATABASE statement followed by the database name, for example: CREATE DATABASE database_name;",
    "A foreign key is a constraint that establishes a relationship between two tables by referencing the primary key of another table, ensuring referential integrity.",
    "The SELECT statement is used to query and retrieve data from one or more tables in a database. It allows you to specify which columns to retrieve and filter the results.",
    "An INDEX is a database structure that improves the speed of data retrieval operations. It creates a sorted reference to data in a table, allowing faster lookups but requiring additional storage space.",
    "A JOIN operation combines rows from two or more tables based on a related column between them. Common types include INNER JOIN, LEFT JOIN, RIGHT JOIN, and FULL OUTER JOIN."
]

print(f"Created {len(test_questions)} MySQL-specific test questions with ground truth answers")

Created 7 MySQL-specific test questions with ground truth answers


In [156]:
# Test retrieval with one question to see context and page numbers
test_question = "What is a PRIMARY KEY in MySQL?"
print(f"Test Question: {test_question}\n")
print("="*80)

# Retrieve documents
test_docs = hybrid_retrieve(test_question, k=3)

print(f"\nRetrieved {len(test_docs)} documents:\n")
for i, doc in enumerate(test_docs, 1):
    page_num = doc.metadata.get('page', 'N/A')
    source = doc.metadata.get('source', 'Unknown')
    print(f"Document {i} (Page {page_num}):")
    print(f"Content preview: {doc.page_content[:200]}...")
    print(f"Full metadata: {doc.metadata}")
    print("-"*80)

# Show formatted context as it will be sent to LLM
formatted = format_docs(test_docs)
print("\nFormatted context with page numbers:")
print(formatted[:500] + "...")
print("\n" + "="*80)

Test Question: What is a PRIMARY KEY in MySQL?


Retrieved 3 documents:

Document 1 (Page 32):
Content preview: Understanding PRIMARY KEY in MySQL
A PRIMARY KEY is a constraint in SQL that uniquely identifies each row in a table.
It is one of the most important concepts in database design.
What is a Primary Key...
Full metadata: {'creationdate': '2025-07-13T11:32:27+00:00', 'page_label': '33', 'producer': 'pdf-lib (https://github.com/Hopding/pdf-lib)', 'source': 'C:\\Users\\Acer\\OneDrive\\Documents\\books\\python\\MySQL Handbook.pdf', 'total_pages': 72, 'creator': 'pdf-lib (https://github.com/Hopding/pdf-lib)', 'moddate': '2025-07-13T11:32:27+00:00', 'page': 32}
--------------------------------------------------------------------------------
Document 2 (Page 34):
Content preview: This may fail if the primary key is being used elsewhere (like in a foreign key
or auto_increment column).
To drop a UNIQUE constraint:
Auto Increment
In MySQL, a PRIMARY KEY is often used with the AU...
Full

In [157]:
# Test answer generation with page citations
test_question = "What is a PRIMARY KEY in MySQL?"
print(f"Question: {test_question}\n")
print("="*80)

# Retrieve and format context
test_docs = hybrid_retrieve(test_question, k=3)
formatted_context = format_docs(test_docs)

# Generate answer
print("\nGenerating answer with page citations...\n")
try:
    answer = hybird_rag_chain.invoke({
        'context': formatted_context,
        'question': test_question
    })
    
    print("ANSWER WITH PAGE CITATIONS:")
    print("="*80)
    print(answer)
    print("="*80)
    
    # Show which pages were in the context
    pages = [doc.metadata.get('page', 'N/A') for doc in test_docs]
    print(f"\nContext pages available: {[p+1 if isinstance(p, int) else p for p in pages]}")
    print("\nNOTE: Check if the answer cites specific page numbers to prevent hallucinations!")
    
except Exception as e:
    print(f"Error: {e}")
    print("You may need to wait for rate limit to reset (~5 minutes)")

Question: What is a PRIMARY KEY in MySQL?


Generating answer with page citations...

ANSWER WITH PAGE CITATIONS:
## Definition of PRIMARY KEY
According to page 33, a PRIMARY KEY is a constraint in SQL that uniquely identifies each row in a table. It is one of the most important concepts in database design.

## Characteristics of PRIMARY KEY
Based on page 33, a PRIMARY KEY:
* Must be unique
* Cannot be NULL
* Is used to identify rows in a table
* Can be a single column or a combination of columns
* Each table can have only one primary key

## Example of PRIMARY KEY
As shown on pages 33 and 35, an example of a PRIMARY KEY is:
```sql
CREATE TABLE users (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(100)
);
```
In this example, the `id` column is the PRIMARY KEY, which uniquely identifies each row in the `users` table.

## Key Takeaways
According to page 35, the key takeaways for PRIMARY KEY are:
* Use PRIMARY KEY for the main identifier of a row.
* Use UNIQUE for enforcing non-duplica

In [158]:
# Run hybrid RAG on test questions and collect results with rate limiting
import time

evaluation_data = {
    "question": [],
    "answer": [],
    "contexts": [],
    "ground_truth": []
}

print("Generating answers using Hybrid RAG with rate limiting...\n")
print("Note: Adding delays between requests to avoid rate limits\n")

for i, question in enumerate(test_questions):
    print(f"Processing question {i+1}/{len(test_questions)}: {question}")
    
    # Retrieve context using hybrid retrieval
    retrieved_docs = hybrid_retrieve(question, k=3)
    
    # Extract context strings (RAGAS expects list of strings)
    contexts = [doc.page_content for doc in retrieved_docs]
    
    # Format context for the RAG chain (with page numbers)
    formatted_context = format_docs(retrieved_docs)
    
    # Generate answer
    try:
        answer = hybird_rag_chain.invoke({
            'context': formatted_context,
            'question': question
        })
        
        # Store evaluation data
        evaluation_data["question"].append(question)
        evaluation_data["answer"].append(answer)
        evaluation_data["contexts"].append(contexts)
        evaluation_data["ground_truth"].append(ground_truths[i])
        
        print(f"  Answer: {answer[:100]}...")
        print(f"  Retrieved {len(contexts)} context documents")
        
        # Show page numbers from retrieved documents
        pages = [doc.metadata.get('page', 'N/A') for doc in retrieved_docs]
        print(f"  Pages referenced: {pages}")
        print()
        
        # Add delay to avoid rate limits (wait 10 seconds between requests)
        if i < len(test_questions) - 1:
            print(f"  Waiting 10 seconds before next request...")
            time.sleep(10)
            print()
        
    except Exception as e:
        print(f"  Error processing question: {str(e)}")
        print(f"  Skipping this question and continuing...\n")
        continue

print(f"\nSuccessfully processed {len(evaluation_data['question'])} out of {len(test_questions)} questions!")

Generating answers using Hybrid RAG with rate limiting...

Note: Adding delays between requests to avoid rate limits

Processing question 1/7: What is a PRIMARY KEY in MySQL?
  Answer: ## Definition of PRIMARY KEY
According to page 33, a PRIMARY KEY is a constraint in SQL that uniquel...
  Retrieved 3 context documents
  Pages referenced: [32, 34, 23]

  Waiting 10 seconds before next request...

Processing question 2/7: What is the difference between CHAR and VARCHAR data types?
  Answer: ## Difference between CHAR and VARCHAR Data Types

Unfortunately, the provided study materials do no...
  Retrieved 3 context documents
  Pages referenced: [211, 35, 1]

  Waiting 10 seconds before next request...

Processing question 3/7: How do you create a database in MySQL?
  Answer: ## Creating a Database in MySQL
To create a database in MySQL, you can follow these steps:

### Meth...
  Retrieved 3 context documents
  Pages referenced: [4, 210, 32]

  Waiting 10 seconds before next request...

P

In [159]:
# Display evaluation data with page number tracking
import pandas as pd

if len(evaluation_data['question']) > 0:
    print("\n" + "="*100)
    print("EVALUATION DATA SUMMARY WITH PAGE REFERENCES")
    print("="*100 + "\n")
    
    for i in range(len(evaluation_data['question'])):
        print(f"Question {i+1}: {evaluation_data['question'][i]}")
        print(f"-" * 100)
        
        # Show answer preview
        answer_preview = evaluation_data['answer'][i][:200] + "..." if len(evaluation_data['answer'][i]) > 200 else evaluation_data['answer'][i]
        print(f"Answer Preview: {answer_preview}\n")
        
        # Show contexts with page info
        print(f"Retrieved Contexts ({len(evaluation_data['contexts'][i])} documents):")
        for j, ctx in enumerate(evaluation_data['contexts'][i], 1):
            ctx_preview = ctx[:150] + "..." if len(ctx) > 150 else ctx
            print(f"  Context {j}: {ctx_preview}")
        
        print(f"\nGround Truth: {evaluation_data['ground_truth'][i]}")
        print("\n" + "="*100 + "\n")
else:
    print("No evaluation data available yet. Run the cell above to generate answers first.")


EVALUATION DATA SUMMARY WITH PAGE REFERENCES

Question 1: What is a PRIMARY KEY in MySQL?
----------------------------------------------------------------------------------------------------
Answer Preview: ## Definition of PRIMARY KEY
According to page 33, a PRIMARY KEY is a constraint in SQL that uniquely identifies each row in a table. It is one of the most important concepts in database design.

## C...

Retrieved Contexts (3 documents):
  Context 1: Understanding PRIMARY KEY in MySQL
A PRIMARY KEY is a constraint in SQL that uniquely identifies each row in a table.
It is one of the most important ...
  Context 2: This may fail if the primary key is being used elsewhere (like in a foreign key
or auto_increment column).
To drop a UNIQUE constraint:
Auto Increment...
  Context 3: Example:
Each new row gets the next available integer value in id.
Summary Table
Constraint Purpose
UNIQUE Prevents duplicate values
NOT NULL Ensures ...

Ground Truth: A PRIMARY KEY is a column or set of c

In [160]:
# Create dataset for evaluation
eval_dataset = Dataset.from_dict(evaluation_data)

print("Evaluation dataset created:")
print(f"  - {len(eval_dataset)} samples")
print(f"  - Columns: {eval_dataset.column_names}")
print("\nDataset preview:")
print(eval_dataset)

Evaluation dataset created:
  - 7 samples
  - Columns: ['question', 'answer', 'contexts', 'ground_truth']

Dataset preview:
Dataset({
    features: ['question', 'answer', 'contexts', 'ground_truth'],
    num_rows: 7
})


In [161]:
# Evaluate Faithfulness
# Faithfulness measures how factually accurate the generated answer is based on the given context
print("Evaluating Faithfulness...")
print("=" * 60)

from ragas import evaluate
import numpy as np

faithfulness_score = evaluate(
    eval_dataset,
    metrics=[faithfulness],
    llm=llm,
    embeddings=embedding_model
)

print("\nFaithfulness Evaluation Results:")
print(f"Overall Faithfulness Score: {faithfulness_score['faithfulness']}")
print("\nInterpretation:")
print("  - Score range: 0.0 to 1.0")
print("  - Higher is better")
print("  - Measures if the answer is grounded in the retrieved context")
print("  - Score > 0.8: Excellent faithfulness")
print("  - Score 0.6-0.8: Good faithfulness")
print("  - Score < 0.6: Poor faithfulness, answers may contain hallucinations")

Evaluating Faithfulness...


Evaluating: 100%|██████████| 7/7 [00:55<00:00,  7.97s/it]



Faithfulness Evaluation Results:
Overall Faithfulness Score: [1.0, 0.75, 1.0, 1.0, 0.8333333333333334, 1.0, 0.6153846153846154]

Interpretation:
  - Score range: 0.0 to 1.0
  - Higher is better
  - Measures if the answer is grounded in the retrieved context
  - Score > 0.8: Excellent faithfulness
  - Score 0.6-0.8: Good faithfulness
  - Score < 0.6: Poor faithfulness, answers may contain hallucinations


In [162]:
# Evaluate Context Precision
# Context Precision measures how relevant the retrieved contexts are to the question
print("\nEvaluating Context Precision...")
print("=" * 60)

context_precision_score = evaluate(
    eval_dataset,
    metrics=[context_precision],
    llm=llm,
    embeddings=embedding_model
)

print(f"Overall Context Precision Score: {context_precision_score['context_precision']}")


Evaluating Context Precision...


Evaluating: 100%|██████████| 7/7 [01:34<00:00, 13.52s/it]


Overall Context Precision Score: [0.9999999999666667, 0.0, 0.99999999995, 0.5833333333041666, 0.99999999995, 0.99999999995, 0.8333333332916666]


In [None]:
# Semantic Chunking + Hybrid RAG + Evaluation (All in One)
import time
from ragas import evaluate
from datasets import Dataset

# Step 1: Create semantic chunks
print("Step 1: Creating semantic chunks...")
semantic_chunker = SemanticChunker(embeddings=embedding_model, breakpoint_threshold_type='gradient', breakpoint_threshold_amount=0.8)
semantic_chunks = semantic_chunker.split_documents(documents)
print(f"  Created {len(semantic_chunks)} semantic chunks")

# Step 2: Create vector store and retrievers
print("\nStep 2: Building hybrid retrieval...")
vs_semantic = Chroma.from_documents(semantic_chunks, embedding_model, collection_name="semantic_eval")
retriever_sem = vs_semantic.as_retriever(search_kwargs={"k": 3})
bm25_sem = BM25Retriever.from_documents(semantic_chunks, k=3)

def hybrid_retrieve_sem(query, k=3):
    seen, combined = set(), []
    for doc in retriever_sem.invoke(query) + bm25_sem.invoke(query):
        h = hash(doc.page_content[:200])
        if h not in seen:
            seen.add(h)
            combined.append(doc)
            if len(combined) >= k: break
    return combined

# Step 3: Generate answers
print("\nStep 3: Generating answers...")
rag_chain = QA_PROMPT | llm | StrOutputParser()
eval_data = {"question": [], "answer": [], "contexts": [], "ground_truth": []}

for i, q in enumerate(test_questions):
    print(f"  Q{i+1}: {q[:50]}...")
    docs = hybrid_retrieve_sem(q, k=3)
    ctx = [d.page_content for d in docs]
    ctx_fmt = "\n---\n".join([f"[Page {d.metadata.get('page',0)+1}]\n{d.page_content}" for d in docs])
    
    try:
        ans = rag_chain.invoke({"context": ctx_fmt, "question": q})
        eval_data["question"].append(q)
        eval_data["answer"].append(ans)
        eval_data["contexts"].append(ctx)
        eval_data["ground_truth"].append(ground_truths[i])
        print(f"     Done")
        if i < len(test_questions) - 1: time.sleep(8)
    except Exception as e:
        print(f"     Error: {str(e)[:50]}")

# Step 4: Evaluate
print(f"\nStep 4: Evaluating {len(eval_data['question'])} samples...")
ds = Dataset.from_dict(eval_data)
results = evaluate(ds, metrics=[faithfulness, context_precision], llm=llm, embeddings=embedding_model)

# Results
print("\n" + "="*60)
print("SEMANTIC HYBRID RAG - EVALUATION RESULTS")
print("="*60)


Step 1: Creating semantic chunks...
  Created 293 semantic chunks

Step 2: Building hybrid retrieval...

Step 3: Generating answers...
  Q1: What is a PRIMARY KEY in MySQL?...
     ✓ Done
  Q2: What is the difference between CHAR and VARCHAR da...
     ✓ Done
  Q3: How do you create a database in MySQL?...
     ✓ Done
  Q4: What is a foreign key constraint?...
     ✓ Done
  Q5: What is the purpose of the SELECT statement?...
     ✓ Done
  Q6: What is an INDEX in MySQL and why is it used?...
     ✓ Done
  Q7: How do you perform a JOIN operation in MySQL?...
     ✓ Done

Step 4: Evaluating 7 samples...


Evaluating: 100%|██████████| 14/14 [02:03<00:00,  8.82s/it]



SEMANTIC HYBRID RAG - EVALUATION RESULTS


TypeError: unsupported format string passed to list.__format__

In [169]:
print(f"Faithfulness:      {results['faithfulness']}")
print(f"Context Precision: {results['context_precision']}")

Faithfulness:      [0.875, 0.3333333333333333, 1.0, 0.8, 1.0, 1.0, 1.0]
Context Precision: [0.5833333333041666, 0.0, 0.49999999995, 0.49999999995, 0.9999999999, 0.99999999995, 0.9999999999]
