In [3]:
import os
import json
import pandas as pd
import mlflow
from mlflow import log_params, log_metrics
from typing import List, Dict, TypedDict

# LangChain & AI Imports
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# LangGraph Imports
from langgraph.graph import START, END, StateGraph
from langgraph.graph.message import MessagesState

# Ragas Imports
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
)
from datasets import Dataset

# Ragas Generation Imports (Necessary for Step 3)
from ragas.testset import TestsetGenerator
from ragas.testset.synthesizers import (
    SingleHopSpecificQuerySynthesizer,
    MultiHopAbstractQuerySynthesizer,
    MultiHopSpecificQuerySynthesizer,
)
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

In [6]:
from dotenv import load_dotenv
load_dotenv()

False

In [None]:
# ==========================================
# Configuration
# ==========================================
PDF_PATH = "xx.pdf"
CHROMA_PERSIST_DIR = "./chroma_db"
COLLECTION_NAME = "dspy_book_collection"

In [10]:
# Initialize Global Models
llm_engine = ChatOpenAI(model="gpt-4o-mini")
embedding_engine = OpenAIEmbeddings( model="text-embedding-3-large")

lm_judge = ChatOpenAI(
    model="gpt-4o",
    temperature=0.0,
)

In [11]:
# ==========================================
# Step 1 & 2: Extract & Vectorize (Chroma)
# ==========================================
def process_and_vectorize_pdf(file_path: str, persist_dir: str, collection_name: str = COLLECTION_NAME):
    """Loads PDF, splits, and saves to ChromaDB."""
    print(f"--- Loading {file_path} ---")
    loader = PyPDFLoader(file_path)
    documents = loader.load() # Defaults to page-by-page chunking

    print(f"--- Vectorizing {len(documents)} pages to ChromaDB ---")
    vectorstore = Chroma.from_documents(
        documents=documents,
        embedding=embedding_engine,
        collection_name = collection_name,
        persist_directory=persist_dir
    )
    return vectorstore, documents

In [12]:
chromaVectorStore, langchainDocLs = process_and_vectorize_pdf("complete-book.pdf", "./chroma_db")

--- Loading complete-book.pdf ---


Got invalid hex string: Odd-length string (b'1f5a5')
Got invalid hex string: Odd-length string (b'1f4e6')
Got invalid hex string: Odd-length string (b'1f517')
Got invalid hex string: Odd-length string (b'1f4da')
Got invalid hex string: Odd-length string (b'1f680')
Got invalid hex string: Odd-length string (b'1f3a7')
Got invalid hex string: Odd-length string (b'1f5a5')
Got invalid hex string: Odd-length string (b'1f4e6')
Got invalid hex string: Odd-length string (b'1f517')
Got invalid hex string: Odd-length string (b'1f4da')
Got invalid hex string: Odd-length string (b'1f680')
Got invalid hex string: Odd-length string (b'1f3a7')
Got invalid hex string: Odd-length string (b'1f5a5')
Got invalid hex string: Odd-length string (b'1f4e6')
Got invalid hex string: Odd-length string (b'1f517')
Got invalid hex string: Odd-length string (b'1f4da')
Got invalid hex string: Odd-length string (b'1f680')
Got invalid hex string: Odd-length string (b'1f3a7')
Got invalid hex string: Odd-length string (b'1

--- Vectorizing 254 pages to ChromaDB ---


In [13]:
len(langchainDocLs)

254

In [14]:
langchainDocLs[108]

Document(metadata={'producer': 'Asciidoctor PDF 2.3.20, based on Prawn 2.4.0', 'creator': '', 'creationdate': '2025-12-14T17:21:32+05:00', 'title': 'Untitled', 'moddate': '2025-12-14T17:21:29+05:00', 'source': 'complete-book.pdf', 'total_pages': 254, 'page': 108, 'page_label': '108'}, page_content='Integrating DSPy with MCP Server\nPlaywright is an open-source automation framework created by Microsoft for\nprogrammatic browser control. It allows you to automate actions in modern\nbrowsers like Chromium (Chrome, Edge), Firefox, and WebKit (Safari) across\nWindows, macOS, and Linux. The playwright-mcp package is an MCP server that\nexposes tools for browser control.\nPlaywright MCP Repository - https://github.com/microsoft/playwright-mcp\nLet us run the Playwright MCP Server.\nInstalling playwright MCP Server\n$ npx @playwright/mcp@latest --port 8931\nListening on http://localhost:8931\nPut this in your client config:\n{\n\xa0 "mcpServers": {\n\xa0   "playwright": {\n\xa0     "url": "htt

In [15]:
# ==========================================
# Step 3: Generate Synthetic Test Sets
# ==========================================
def save_for_dspy(testset_df: pd.DataFrame, filename: str):
    """Saves Ragas testset in DSPy-compatible JSON format."""
    dspy_data = []
    for _, row in testset_df.iterrows():
        entry = {
            "question": row['user_input'],
            "answer": row['reference'],
            "gold_context": row['reference_contexts'],
            "metadata": {"synthesizer": row.get('synthesizer_name', 'unknown')}
        }
        dspy_data.append(entry)

    with open(filename, 'w') as f:
        json.dump(dspy_data, f, indent=4)
    print(f"Saved DSPy dataset: {filename}")

def generate_ragas_testsets(documents):
    """Generates Single, Multi-hop, and Mixed test sets."""
    print("--- Initializing Ragas Testset Generator ---")

    # Wrappers for Ragas
    generator_llm = LangchainLLMWrapper(llm_engine)
    generator_embeddings = LangchainEmbeddingsWrapper(embedding_engine)

    generator = TestsetGenerator(
        llm=generator_llm,
        embedding_model=generator_embeddings
    )

    # Define Synthesizer Distributions
    distributions = {
        "single_hop": [
            (SingleHopSpecificQuerySynthesizer(llm=generator_llm), 1.0)
        ],
        "multi_hop": [
            (MultiHopAbstractQuerySynthesizer(llm=generator_llm), 0.5),
            (MultiHopSpecificQuerySynthesizer(llm=generator_llm), 0.5)
        ],
        "mixed": [
            (SingleHopSpecificQuerySynthesizer(llm=generator_llm), 0.5),
            (MultiHopAbstractQuerySynthesizer(llm=generator_llm), 0.25),
            (MultiHopSpecificQuerySynthesizer(llm=generator_llm), 0.25)
        ]
    }

    test_size = 5  # Small size for demo; increase for production


    datasetLs = []
    dataFrameLs = []
    for name, dist in distributions.items():
        print(f"Generating {name} testset...")
        testset = generator.generate_with_langchain_docs(
            documents,
            testset_size=test_size,
            query_distribution=dist
        )

        df = testset.to_pandas()
        save_for_dspy(df, f"ragas_testset_{name}.json")
        datasetLs.append(testset)
        dataFrameLs.append(df)

    return datasetLs, dataFrameLs

In [None]:
synthethicDatasetLs, datasetDFLs = generate_ragas_testsets(langchainDocLs)

--- Initializing Ragas Testset Generator ---
Generating single_hop testset...


  generator_llm = LangchainLLMWrapper(llm_engine)
  generator_embeddings = LangchainEmbeddingsWrapper(embedding_engine)
Applying SummaryExtractor: 100%|██████████| 239/239 [16:43<00:00,  4.20s/it]
Applying CustomNodeFilter:   0%|          | 0/254 [00:00<?, ?it/s]Node acda5c8a-482f-424a-9c3b-3b1de4ca33f4 does not have a summary. Skipping filtering.
Node 75baf941-8f13-4777-a5e3-6ebd2f151b63 does not have a summary. Skipping filtering.
Applying CustomNodeFilter:   1%|          | 2/254 [00:03<08:08,  1.94s/it]Node 54d0e0fe-b396-4fb3-a48e-4ff2b493a536 does not have a summary. Skipping filtering.
Node 12c52ff6-be56-4e74-90f6-c8529827c531 does not have a summary. Skipping filtering.
Applying CustomNodeFilter:  22%|██▏       | 56/254 [02:28<06:37,  2.01s/it]Node 43e92e72-cf72-4e1c-a412-83fd22bb625a does not have a summary. Skipping filtering.
Applying CustomNodeFilter:  29%|██▉       | 74/254 [03:15<06:02,  2.01s/it]Node fc1bb5f6-ad32-4bb7-b9eb-7bd4c5bbd888 does not have a summary. Skipping fi

Saved DSPy dataset: ragas_testset_single_hop.json
Generating multi_hop testset...


Applying SummaryExtractor:  57%|█████▋    | 137/239 [05:23<02:57,  1.74s/it]

In [None]:


# ==========================================
# Step 4a: Define LangGraph RAG Agent
# ==========================================

# Define State
class GraphState(TypedDict):
    question: str
    context: List[Document]
    answer: str

def build_rag_graph(vectorstore):
    """Builds the compiled LangGraph workflow."""

    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

    # Node 1: Retrieve
    def retrieve(state: GraphState):
        print(f"Retrieving for: {state['question']}")
        docs = retriever.invoke(state["question"])
        return {"context": docs}

    # Node 2: Generate
    def generate(state: GraphState):
        print("Generating answer...")
        prompt = ChatPromptTemplate.from_template(
            """Answer the question based only on the following context:
            {context}

            Question: {question}
            """
        )

        # Format context into a single string for the prompt
        context_text = "\n\n".join([doc.page_content for doc in state["context"]])

        chain = prompt | llm_engine | StrOutputParser()
        response = chain.invoke({"context": context_text, "question": state["question"]})

        return {"answer": response}

    # Build Graph
    workflow = StateGraph(GraphState)
    workflow.add_node("retrieve", retrieve)
    workflow.add_node("generate", generate)

    workflow.add_edge(START, "retrieve")
    workflow.add_edge("retrieve", "generate")
    workflow.add_edge("generate", END)

    return workflow.compile()

# ==========================================
# Step 4b: Evaluate with Ragas & MLflow
# ==========================================
def evaluate_langgraph_agent(app, testsets_generator):
    """Evaluates the LangGraph app using Ragas and logs to MLflow."""

    # Ragas Metrics
    metrics = [faithfulness, answer_relevancy, context_precision, context_recall]

    # Iterate over generated testsets
    for name, test_df in testsets_generator:
        print(f"\n--- Evaluating Testset: {name} ---")

        # Start MLflow Run
        with mlflow.start_run(run_name=f"RAG_Eval_{name}"):
            log_params({"testset_type": name, "model": "gpt-4o", "vector_db": "Chroma"})

            questions = test_df['user_input'].tolist()
            ground_truths = test_df['reference'].tolist()

            answers = []
            contexts = []

            # Run Inference
            for q in questions:
                # Invoke LangGraph
                result = app.invoke({"question": q})

                answers.append(result["answer"])
                # Extract page content for Ragas
                retrieved_texts = [doc.page_content for doc in result["context"]]
                contexts.append(retrieved_texts)

            # Prepare Ragas Dataset
            eval_data = {
                "question": questions,
                "answer": answers,
                "contexts": contexts,
                "ground_truth": ground_truths
            }
            dataset = Dataset.from_dict(eval_data)

            # Run Evaluation
            results = evaluate(
                dataset,
                metrics=metrics,
                llm=LangchainLLMWrapper(llm_engine),
                embeddings=LangchainEmbeddingsWrapper(embedding_engine)
            )

            print(f"Results for {name}: {results}")

            # Log Metrics to MLflow
            for metric_name, score in results.items():
                log_metrics({metric_name: score})

            # Save CSV artifact
            csv_name = f"eval_results_{name}.csv"
            results.to_pandas().to_csv(csv_name, index=False)
            mlflow.log_artifact(csv_name)

# ==========================================
# Main Execution
# ==========================================
if __name__ == "__main__":
    if os.path.exists(PDF_PATH):
        # 1. Setup Data
        vectorstore, documents = process_and_vectorize_pdf(PDF_PATH, CHROMA_PERSIST_DIR)

        # 2. Build LangGraph Agent
        rag_app = build_rag_graph(vectorstore)

        # 3. Generate & Evaluate
        # Note: generate_ragas_testsets yields (name, df) tuples
        evaluate_langgraph_agent(rag_app, generate_ragas_testsets(documents))

    else:
        print(f"Error: File {PDF_PATH} not found.")