In [None]:
#create RAG using OPENAI embeddings and Chroma db
import os
from dotenv import load_dotenv
load_dotenv() 

In [None]:
from langchain_openai import OpenAIEmbeddings

openAIEmbeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [None]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false" #this is to load complex excels

In [None]:
#load data
from langchain_community.document_loaders import UnstructuredExcelLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


loader = UnstructuredExcelLoader("/Users/kuldeep/Documents/mriduladata/agenticAICourse/agentic2.0/data/SimplifiedPY2026-NA-Baseline-and-Alternative-T&D-Standards.xlsx", mode="elements")
docs = loader.load()

print(len(docs))

docs

In [None]:
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)

In [None]:
new_docs=text_splitter.split_documents(documents=docs)

In [None]:
#using chroma db
from langchain_community.vectorstores import Chroma
from langchain_community.vectorstores.utils import filter_complex_metadata

vectorstore = Chroma.from_documents(filter_complex_metadata(new_docs), openAIEmbeddings)

In [None]:
retriever=vectorstore.as_retriever(search_kwargs={"k": 3})

In [None]:
retriever.invoke("what is FIPS code for Bacon county?")

In [None]:
retriever.invoke("What are 2026 Network Adequacy Time and Distance Rules for Cardiothoracic Surgery for Rural Counties in GA? ")

In [None]:
from pydantic import BaseModel , Field

In [None]:
class TopicSelectionParser(BaseModel):
    Topic: str=Field(description="Selected Topic")
    Reasoning: str=Field(description="Reasoning behind topic selection")

In [None]:
from langchain.output_parsers import PydanticOutputParser

In [None]:
parser=PydanticOutputParser(pydantic_object=TopicSelectionParser)

In [None]:
from typing import TypedDict
import operator
from typing import List
from pydantic import BaseModel , Field
from langchain.prompts import PromptTemplate
from typing import TypedDict, Annotated, Sequence
from langchain_core.messages import BaseMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.messages import HumanMessage, AIMessage
from langgraph.graph import StateGraph,END

In [None]:
class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
model=ChatGoogleGenerativeAI(model='gemini-1.5-flash')
output=model.invoke("hi")
print(output.content)

In [None]:
def supervisorFunc(state:AgentState):
    
    question=state["messages"][-1]
    
    print("Question",question)
    
    template="""
    Your task is to classify the given user query into one of the following categories: [2026 Network Adequacy Rules for GA,Something else but not real-time, Something else but I need to pull from internet to get latest information]. 
    Only respond with the category name and nothing else.

    User query: {question}
    {format_instructions}
    """
  

    prompt= PromptTemplate(
        template=template,
        input_variable=["question"],
        partial_variables={"format_instructions": parser.get_format_instructions()}
    )
    
    
    chain= prompt | model | parser
    
    response = chain.invoke({"question":question})
    
    print("Parsed response:", response)
    
    return {"messages": [response.Topic]}

In [None]:
state={"messages":["what is a today weather?"]}
supervisorFunc(state)

In [None]:
state={"messages":["What are 2026 Network Adequacy Time and Distance Rules for Cardiothoracic Surgery for Rural Counties in GA? "]}
supervisorFunc(state)

In [None]:
state={"messages":["who was first president of india?"]}
supervisorFunc(state)

In [None]:
def routerFunc(state:AgentState):
    print("-> ROUTER ->")
    
    last_message=state["messages"][-1]
    print("last_message:", last_message)
    
    if "adequacy" in last_message.lower():
        return "RAG Call"
    elif "internet" in last_message.lower():
        return "WEB Call"
    else:
        return "LLM Call"

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
# RAG Function
def ragFunc(state:AgentState):
    print("-> RAG Call ->")
    
    question = state["messages"][0]
    
    prompt=PromptTemplate(
        template="""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:""",
        
        input_variables=['context', 'question']
    )
    
    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | model
        | StrOutputParser()
    )
    result = rag_chain.invoke(question)
    return  {"messages": [result]}

In [None]:
state={"messages":["how many miles for Cardiothoracic Surgery in 2026 filling?"]}
ragFunc(state)

In [None]:
state={"messages":["What are 2026 Network Adequacy Time and Distance Rules for Cardiothoracic Surgery for Rural Counties in GA? "]}
ragFunc(state)

In [None]:
# LLM Function
def llmFunc(state:AgentState):
    print("-> LLM Call ->")
    question = state["messages"][0]
    
    # Normal LLM call
    complete_query = "Anwer the follow question with your knowledge of the real world. Following is the user question: " + question
    response = model.invoke(complete_query)
    return {"messages": [response.content]}

In [None]:
#WEB Function create a RAG
from langchain_community.document_loaders import SeleniumURLLoader
from langchain_community.vectorstores import Chroma

In [None]:
#load documents
loader=SeleniumURLLoader(
        urls=["https://www.nextgen.com/blog/industry-news/new-cms-regulation-establishes-maximum-appointment-wait-time-standards-for-medicaid"]
    )
documents=loader.load()
documents

In [None]:
#split documents
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    add_start_index=True)

new_docs=text_splitter.split_documents(documents)
new_docs

In [None]:
webVectorStore = Chroma.from_documents(new_docs, openAIEmbeddings)
webRetriever=webVectorStore.as_retriever(search_kwargs={"k": 3})

In [None]:
webRetriever.invoke("what is new rule starting 2027?")

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
def webFunc(state:AgentState):
    print("-> WEB Call ->")
    question = state["messages"][0]

    prompt=PromptTemplate(
        template="""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
        Question: {question}
        Context: {context}
        Answer:""",
        input_variables=['context', 'question']
    )
    rag_chain = (
        {"context": webRetriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | model
        | StrOutputParser()
    )
    result = rag_chain.invoke(question)
    return  {"messages": [result]}

In [None]:
state={"messages":["what is latest 2027 Network Adequacy requirements?"]}
webFunc(state)

In [None]:
from langgraph.graph import StateGraph,END
workflow=StateGraph(AgentState)

In [None]:
workflow.add_node("Supervisor",supervisorFunc)

In [None]:
workflow.add_node("RAG",ragFunc)

In [None]:
workflow.add_node("LLM",llmFunc)

In [None]:
workflow.add_node("WEB",webFunc)

In [None]:
workflow.set_entry_point("Supervisor")

In [None]:
workflow.add_conditional_edges(
    "Supervisor",
    router,
    {
        "RAG Call": "RAG",
        "LLM Call": "LLM",
        "WEB Call": "WEB",
    }
)

In [None]:
workflow.add_edge("RAG",END)
workflow.add_edge("LLM",END)
workflow.add_edge("WEB",END)

In [None]:
app=workflow.compile()

In [None]:
from IPython.display import Image, display
display(Image(app.get_graph().draw_mermaid_png()))

In [None]:
state={"messages":["How many miles for Cardiothoracic surgery in Network Adequacy?"]}
app.invoke(state)

In [None]:
state={"messages":["What are 2026 network adequacy facility type specialties"]}
app.invoke(state)

In [None]:
#validation
from langsmith import Client
from typing_extensions import Annotated, TypedDict

client = Client()

# Define the examples for the dataset
examples = [
    {
        "inputs": {"question": "What are 2026 Network Adequacy Time and Distance Rules for Cardiothoracic Surgery for Rural Counties in GA? "},
        "outputs": {"answer": "2026 Network Adequacy Time and Distance Rules for Cardiothoracic Surgery for Rural Counties in GA are Baseline Distance: 90 Miles, Baseline Time: 110 Minutes and Alternative Distance: 90 Miles"},
    },
    {
        "inputs": {"question": "What are 2026 Network Adequacy rural counties in GA?"},
        "outputs": {"answer": "GA Rural counties are Appling, Bacon, Calhoun, Charlton, Clay, Early, Glascock, Hancock, Jeff Davis, Lincoln, McIntosh, Marion, Meriwether, Miller, Mitchell, Quitman, Randolh, Stewart, Telfair, Warren, Wheeler and Wikes."},
    },
    {
        "inputs": {"question": "What are 2026 network adequacy facility type specialties?"},
        "outputs": {"answer": "2026 network adequacy facility type specialties are Acute Inpatient Hospitals(Must have emergency services available 24/7), Cardiac Catherization Services, Cardiac Surgery Program, Critical Care Services - Intensive Care Units (ICU), Diagnostic Radiology (Free-standing; hospital outpatient; ambulatory health facilities with Dx Radiology),Inpatient or Residential Behavioral Health Facility Services, Mammography, Outpatient Infusion/ Chemotherapy, Skilled Nursing Facilities, Surgical Services (Ambulatory Surgical Centers and Outpatient Hospital), Urgent Care."},
    },
]

In [None]:
# Create the dataset and examples in LangSmith
dataset_name = "2026 NetworkAdequcy Q&A"
if not client.has_dataset(dataset_name=dataset_name):
    dataset = client.create_dataset(dataset_name=dataset_name)
    client.create_examples(
        dataset_id=dataset.id,
        examples=examples
    )

In [None]:
# Grade output schema
from langchain_openai import ChatOpenAI
class CorrectnessGrade(TypedDict):
    # Note that the order in the fields are defined is the order in which the model will generate them.
    # It is useful to put explanations before responses because it forces the model to think through
    # its final response before generating it:
    explanation: Annotated[str, ..., "Explain your reasoning for the score"]
    correct: Annotated[bool, ..., "True if the answer is correct, False otherwise."]


# Grade prompt
correctness_instructions = """You are a teacher grading a quiz. 

You will be given a QUESTION, the GROUND TRUTH (correct) ANSWER, and the STUDENT ANSWER. 

Here is the grade criteria to follow:
(1) Grade the student answers based ONLY on their factual accuracy relative to the ground truth answer. 
(2) Ensure that the student answer does not contain any conflicting statements.
(3) It is OK if the student answer contains more information than the ground truth answer, as long as it is factually accurate relative to the  ground truth answer.

Correctness:
A correctness value of True means that the student's answer meets all of the criteria.
A correctness value of False means that the student's answer does not meet all of the criteria.

Explain your reasoning in a step-by-step manner to ensure your reasoning and conclusion are correct. 

Avoid simply stating the correct answer at the outset."""

# Grader LLM
grader_llm = ChatOpenAI(model="gpt-4o", temperature=0).with_structured_output(
    CorrectnessGrade, method="json_schema", strict=True
)


def correctness(inputs: dict, outputs: dict, reference_outputs: dict) -> bool:
    """An evaluator for RAG answer accuracy"""
    answers = f"""\
QUESTION: {inputs['question']}
GROUND TRUTH ANSWER: {reference_outputs['answer']}
STUDENT ANSWER: {outputs['answer']}"""

    # Run evaluator
    grade = grader_llm.invoke(
        [
            {"role": "system", "content": correctness_instructions},
            {"role": "user", "content": answers},
        ]
    )
    return grade["correct"]


In [None]:
#call original RAG model with same questions
def target(inputs: dict) -> dict:
    answer = state["messages"][-1]
    return {"answer": answer}

In [None]:
#connect to Application
def validator_node(state: AgentState):
    print(f"state: {state}")
    answer = state["messages"][-1]
    print(f"answer: {answer}")
    dataset_name = "2026 NetworkAdequcy Q&A"
    experiment_results = client.evaluate(
        target,# replace with output from each node
        data = dataset_name,
        evaluators=[correctness],
        experiment_prefix="rag-doc-relevance",
        metadata={"version": "LCEL context, gpt-4-0125-preview"},
    )
    print(experiment_results)

In [None]:
answer = state["messages"][-1]
print(f"answer: {answer}") # answer is not getting captured from graph so validator function always say incorrect