In [10]:
import os
from getpass import getpass
os.environ["OPENAI_API_KEY"] = getpass("Please enter your OpenAI API key!")

In [11]:
from langchain_community.document_loaders import PyMuPDFLoader

# Load only the meditation research PDF
loader = PyMuPDFLoader("data/MeditationResearchacomprehensivereview.pdf")
docs = loader.load()


In [13]:
# Generate Synthetic Test Data with RAGAS
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.testset import TestsetGenerator
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# Setup generators
generator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))
generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

# Generate test dataset
generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
dataset = generator.generate_with_langchain_docs(docs, testset_size=10)

Applying HeadlinesExtractor:   0%|          | 0/7 [00:00<?, ?it/s]

Applying HeadlineSplitter:   0%|          | 0/8 [00:00<?, ?it/s]

unable to apply transformation: 'headlines' property not found in this node


Applying SummaryExtractor:   0%|          | 0/10 [00:00<?, ?it/s]

Property 'summary' already exists in node '39b640'. Skipping!
Property 'summary' already exists in node '227ab5'. Skipping!
Property 'summary' already exists in node '53b1bd'. Skipping!


Applying CustomNodeFilter:   0%|          | 0/8 [00:00<?, ?it/s]

Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:   0%|          | 0/24 [00:00<?, ?it/s]

Property 'summary_embedding' already exists in node '53b1bd'. Skipping!
Property 'summary_embedding' already exists in node '39b640'. Skipping!
Property 'summary_embedding' already exists in node '227ab5'. Skipping!


Applying [CosineSimilarityBuilder, OverlapScoreBuilder]:   0%|          | 0/2 [00:00<?, ?it/s]

Generating personas:   0%|          | 0/3 [00:00<?, ?it/s]

Generating Scenarios:   0%|          | 0/3 [00:00<?, ?it/s]

Generating Samples:   0%|          | 0/12 [00:00<?, ?it/s]

In [14]:
dataset.to_pandas()

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,What role does India play in the research of m...,[S Rajaraman et al Int. Journal of Engineering...,India is highlighted as a significant location...,single_hop_specifc_query_synthesizer
1,How does meditation affect the Autonomous Nerv...,[Meditation techniques I. INTRODUCTION Meditat...,Meditation involves a complicated form of rela...,single_hop_specifc_query_synthesizer
2,Wht are the benfits of Buddist meditaton techn...,[S Rajaraman et al Int. Journal of Engineering...,"Buddhist meditation techniques, such as Zen Me...",single_hop_specifc_query_synthesizer
3,Can you elaborate on the contributions of Leon...,[METHODOLOGY Opher Caspi et al. [13] suggested...,Leonaite A. Vainoras performed an analysis of ...,single_hop_specifc_query_synthesizer
4,What are the physiological benefits of Mindful...,[<1-hop>\n\nS Rajaraman et al Int. Journal of ...,Mindfulness Meditation is noted for its effect...,multi_hop_abstract_query_synthesizer
5,What are the physiological effects of meditati...,[<1-hop>\n\nS Rajaraman et al Int. Journal of ...,Research by S Rajaraman et al. indicates that ...,multi_hop_abstract_query_synthesizer
6,What are the effects of meditation techniques ...,[<1-hop>\n\nS Rajaraman et al Int. Journal of ...,Meditation techniques have been shown to provi...,multi_hop_abstract_query_synthesizer
7,How do meditation techniques contribute to car...,[<1-hop>\n\nS Rajaraman et al Int. Journal of ...,Meditation techniques have been shown to provi...,multi_hop_abstract_query_synthesizer
8,What were the findings of Atefeh Goshvarpour e...,[<1-hop>\n\nMETHODOLOGY Opher Caspi et al. [13...,Atefeh Goshvarpour et al. analyzed the lagged ...,multi_hop_specific_query_synthesizer
9,What were the findings of Damodar Prasad Goswa...,[<1-hop>\n\nMETHODOLOGY Opher Caspi et al. [13...,Damodar Prasad Goswami et al. analyzed heart r...,multi_hop_specific_query_synthesizer


 ### Baseline RAG with Traditional Chunking

In [15]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langgraph.graph import StateGraph, END
from typing import TypedDict, List
from langchain_core.documents import Document

# Traditional chunking
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
traditional_docs = text_splitter.split_documents(docs)

# Vector store setup
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
client = QdrantClient(":memory:")
client.create_collection(
    collection_name="traditional_chunks",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)
vector_store = QdrantVectorStore(
    client=client,
    collection_name="traditional_chunks",
    embedding=embeddings,
)
_ = vector_store.add_documents(documents=traditional_docs)

# Naive retriever
naive_retriever = vector_store.as_retriever(search_kwargs={"k": 3})

# RAG components
RAG_PROMPT = """You are a helpful assistant who answers questions based on provided context. You must only use the provided context, and cannot use your own knowledge.

### Question
{question}

### Context
{context}"""

rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
llm = ChatOpenAI(model="gpt-4o-mini")

# LangGraph state
class RAGState(TypedDict):
    question: str
    context: List[Document]
    response: str

# Nodes
def retrieve_traditional(state: RAGState) -> RAGState:
    retrieved_docs = naive_retriever.invoke(state["question"])
    return {"context": retrieved_docs}

def generate(state: RAGState) -> RAGState:
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = rag_prompt.format_messages(question=state["question"], context=docs_content)
    response = llm.invoke(messages)
    return {"response": response.content}

# Build graph
workflow = StateGraph(RAGState)
workflow.add_node("retrieve", retrieve_traditional)
workflow.add_node("generate", generate)
workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve", "generate")
workflow.add_edge("generate", END)

traditional_graph = workflow.compile()

In [18]:
response = traditional_graph.invoke({"question" : "Who wrote the paper?"})

In [19]:
response['response']

'The paper "Meditation Research: A Comprehensive Review" was written by Sivaramakrishnan Rajaraman.'

###  Semantic Chunking RAG

In [None]:
from langchain_experimental.text_splitter import SemanticChunker

semantic_chunker = SemanticChunker(
    embeddings,
    breakpoint_threshold_type="interquartile",
    breakpoint_threshold_amount=95.0,
    min_chunk_size=50
)

semantic_docs = semantic_chunker.create_documents([doc.page_content for doc in docs])

semantic_documents = []
for i, doc in enumerate(semantic_docs):
    semantic_documents.append(Document(
        page_content=doc.page_content,
        metadata={"chunk_index": i, "chunking_method": "semantic"}
    ))

client_semantic = QdrantClient(":memory:")
client_semantic.create_collection(
    collection_name="semantic_chunks",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)
semantic_vector_store = QdrantVectorStore(
    client=client_semantic,
    collection_name="semantic_chunks",
    embedding=embeddings,
)
_ = semantic_vector_store.add_documents(documents=semantic_documents)

semantic_retriever = semantic_vector_store.as_retriever(search_kwargs={"k": 3})

def retrieve_semantic(state: RAGState) -> RAGState:
    retrieved_docs = semantic_retriever.invoke(state["question"])
    return {"context": retrieved_docs}

semantic_workflow = StateGraph(RAGState)
semantic_workflow.add_node("retrieve", retrieve_semantic)
semantic_workflow.add_node("generate", generate)
semantic_workflow.set_entry_point("retrieve")
semantic_workflow.add_edge("retrieve", "generate")
semantic_workflow.add_edge("generate", END)

semantic_graph = semantic_workflow.compile()

#### Running the queries from SDG in both chucking methods 

In [21]:
# Run traditional RAG
traditional_results = []
for test_row in dataset:
    response = traditional_graph.invoke({"question": test_row.eval_sample.user_input})
    test_row.eval_sample.response = response["response"]
    test_row.eval_sample.retrieved_contexts = [context.page_content for context in response["context"]]
    traditional_results.append(test_row)

# Run semantic RAG
semantic_results = []
for test_row in dataset:
    response = semantic_graph.invoke({"question": test_row.eval_sample.user_input})
    test_row.eval_sample.response = response["response"]
    test_row.eval_sample.retrieved_contexts = [context.page_content for context in response["context"]]
    semantic_results.append(test_row)

##### Evaluation of Rag System with text splitter chunking

In [22]:
from ragas import EvaluationDataset, evaluate
from ragas.metrics import Faithfulness, AnswerRelevancy, ContextPrecision, ContextRecall, AnswerCorrectness

# Prepare evaluation dataset for traditional RAG
traditional_eval_dataset = EvaluationDataset.from_pandas(dataset.to_pandas())

# Evaluate traditional RAG
traditional_evaluation = evaluate(
    dataset=traditional_eval_dataset,
    metrics=[
        Faithfulness(),
        AnswerRelevancy(), 
        ContextPrecision(),
        ContextRecall(),
        AnswerCorrectness()
    ],
    llm=generator_llm
)

traditional_evaluation

Evaluating:   0%|          | 0/60 [00:00<?, ?it/s]

{'faithfulness': 0.7587, 'answer_relevancy': 0.9655, 'context_precision': 0.9444, 'context_recall': 0.6194, 'answer_correctness': 0.6720}

##### Semantic evaluation

In [23]:
semantic_eval_dataset = EvaluationDataset.from_pandas(dataset.to_pandas())

# Evaluate semantic RAG
semantic_evaluation = evaluate(
    dataset=semantic_eval_dataset,
    metrics=[
        Faithfulness(),
        AnswerRelevancy(),
        ContextPrecision(), 
        ContextRecall(),
        AnswerCorrectness()
    ],
    llm=generator_llm
)
semantic_evaluation

Evaluating:   0%|          | 0/60 [00:00<?, ?it/s]

{'faithfulness': 0.7306, 'answer_relevancy': 0.9665, 'context_precision': 0.9444, 'context_recall': 0.5847, 'answer_correctness': 0.6622}

#### Evaluation Results Comparison

| Metric | Text Splitter Evaluation | Semantic Evaluation | Difference | Change |
|--------|------------------|-------------------|------------|---------|
| **Faithfulness** | 0.7587 | 0.7306 | -0.0281 | -3.7% |
| **Answer Relevancy** | 0.9655 | 0.9665 | +0.0010 | +0.1% |
| **Context Precision** | 0.9444 | 0.9444 | 0.0000 | 0.0% |
| **Context Recall** | 0.6194 | 0.5847 | -0.0347 | -5.6% |
| **Answer Correctness** | 0.6720 | 0.6622 | -0.0098 | -1.5% |

### Analysis of Differences

- The second evaluation shows a degradation in most metrics compared to the first
- Only Answer Relevancy improved slightly (+0.1%)
- Context Precision remained exactly the same (0.9444)

##### **Key Performance Declines**
**Context Recall (-5.6%)**:                                                                                     This is the largest decrease, indicating that the system is now retrieving less relevant information from the knowledge base. This suggests the retrieval mechanism may have become less effective at finding pertinent context.

**Faithfulness (-3.7%)**:The second evaluation shows the system is less faithful to the retrieved context, meaning it's either:
- Hallucinating more information not present in the context
- Misinterpreting the retrieved information
- Adding unsupported claims

**Answer Correctness (-1.5%)**: A modest decline, likely correlated with the faithfulness drop, as incorrect information affects overall answer quality.