## Load Necessary Dependenceies

In [None]:
import json
from langchain_openai import AzureChatOpenAI
from langchain_core.tools import tool
from typing import Annotated
from typing_extensions import TypedDict
from langgraph.graph.message import add_messages
from langchain_core.messages import SystemMessage
from langgraph.graph import StateGraph, START, END
from langgraph.prebuilt import create_react_agent,ToolNode, tools_condition
from datetime import datetime
import csv
import pandas as pd

## Setup Authentication and LLM Client
The following code sets up the UAIS environment to authenticate the application and securely connect to the Azure OpenAI services. It retrieves the access token required for authorization and initializes both the LLM and embedding clients using Azure OpenAI authentication.

In [None]:
import os
from dotenv import load_dotenv
import httpx

def get_access_token():
    auth = "https://api.uhg.com/oauth2/token"
    scope = "https://api.uhg.com/.default"
    grant_type = "client_credentials"


    with httpx.Client() as client:
        body = {
            "grant_type": grant_type,
            "scope": scope,
            "client_id": dbutils.secrets.get(scope="AIML_Training", key="client_id"),
            "client_secret": dbutils.secrets.get(scope="AIML_Training", key="client_secret"),
        }
        headers = {"Content-Type": "application/x-www-form-urlencoded"}
        resp = client.post(auth, headers=headers, data=body, timeout=60)
        access_token = resp.json()["access_token"]
        return access_token


load_dotenv('./Data/UAIS_vars.env')


AZURE_OPENAI_ENDPOINT = os.environ["AZURE_OPENAI_ENDPOINT"]
OPENAI_API_VERSION = os.environ["OPENAI_API_VERSION"]
EMBEDDINGS_DEPLOYMENT_NAME = os.environ["EMBEDDINGS_DEPLOYMENT_NAME"]
MODEL_DEPLOYMENT_NAME = os.environ["MODEL_DEPLOYMENT_NAME"]
PROJECT_ID = os.environ['PROJECT_ID']

model = AzureChatOpenAI(
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version=OPENAI_API_VERSION,
    azure_deployment = MODEL_DEPLOYMENT_NAME,
    temperature=0,
    azure_ad_token=get_access_token(),
    default_headers={
        "projectId": PROJECT_ID
    }
)

## Preparing and loading necessary files

In [None]:
with open('./Data/reference_codes.json', 'r') as f:
    reference_codes = json.load(f)

with open('./Data/validation_records.json', 'r') as f:
    patient_records = json.load(f)

with open('./Data/test_records.json', 'r') as f:
    test_patient_records = json.load(f)

with open('./Data/insurance_policies.json', 'r') as f:
    policies = json.load(f)

## Defining & Implementing Tools

### Tool 1 - **summarize_patient_record**

Tool to calculate the patient age and summarise the patient record. It takes in a parameter as patient record dictionary and returns a LLM generated string output.

In [None]:
@tool
def summarize_patient_record(record_str:dict) -> str:
    """Summarise the patient record using patient record and reference_codes"""
    data = record_str
    
    dob = datetime.strptime(data['date_of_birth'], "%Y-%m-%d")
    date_of_service = datetime.strptime(data['date_of_service'], "%Y-%m-%d")

    # Calculate age
    age = date_of_service.year - dob.year - ((date_of_service.month, date_of_service.day) < (dob.month, dob.day))

    # Add age to dictionary
    data['age'] = age
    prompt = f"""
     Create a clearly formatted summary of a patient using the data {data} and mapping the reference codes using {reference_codes}.
     Also calculate the age by subtracting date of birth from date of service if the age is not provided to you in the data
        Include the following seven labeled sections in the specified order:
        • Patient Demographics: Include: name, gender, and age
        • Insurance Policy ID
        • Diagnoses and Descriptions: Include ICD10 codes and their mapped descriptions.
        • Procedures and Descriptions: Include CPT codes and their mapped descriptions.
        • Preauthorization Status: Clearly mention if preauthorization was required and whether it was obtained.
        • Billed Amount (in USD)
        • Date of Service
    """
    result = model.invoke(prompt)
    return result.content


### Tool 2 - **summarize_policy_guidelines**

Tool to summarise the policy guideline. It takes in a parameter as policy ID(string)and returns a LLM generated string output.

In [None]:
@tool
def summarize_policy_guidelines(policy_id:str) -> str:
    """Summarise the policy guidelines using policy_id and reference_codes mentioned in the patient record """
    
    prompt = f"""
    Generate a well-structured and clearly formatted summary for the policy with ID: {policy_id}, using the provided data: {policies} and {reference_codes}.

    Your summary should include the following sections in order:

    1. **Policy Details**
    - Policy ID
    - Plan Name

    2. **Covered Procedures**
    For each procedure listed in the policy, present the following details as separate entries:
    - **Procedure Code and Description** (use CPT code mappings)
    - **Covered Diagnoses and Descriptions** (use ICD-10 code mappings)
    - **Gender Restriction**
    - **Age Range**
    - **Preauthorization Requirement**
    - **Notes on Coverage** (if applicable)

    Ensure each procedure is clearly separated and all subpoints are properly labeled for readability.
    """

    result = model.invoke(prompt)
    return result.content

### Tool 3 - **get_coverage_status**

Tool to summarise the coverage status of the patient. It takes in a parameter of patient summary and policy summary and returns the coverage status as output

In [None]:
@tool
def get_coverage_status(patient_summary:str, policy_summary:str) -> str:
    """Summarise the coverage status using patient summary and policy summary"""
    
    prompt = f"""
    Using the provided patient summary: {patient_summary} and policy summary: {policy_summary}, evaluate whether the claim should be classified as "APPROVED" or "ROUTE FOR REVIEW" based on the policy guidelines.

    A procedure should be marked as "APPROVED" **only if all** of the following conditions are met:

    1. The patient's diagnosis code(s) match one or more of the policy-covered diagnoses for the claimed procedure.
    2. The procedure code is explicitly listed in the policy, and all associated conditions are satisfied.
    3. The patient's age falls within the policy-defined age range (inclusive of the lower bound, exclusive of the upper bound).
    4. The patient's gender matches the policy’s requirement for the procedure.
    5. If the policy requires preauthorization for the procedure, it must have been obtained.

    Only procedures and diagnoses explicitly listed in the patient record should be considered during evaluation.

    Return a clear decision: either "APPROVED" or "ROUTE FOR REVIEW".
    """

    result = model.invoke(prompt)
    return result.content

**Listing out the tools and binding them with LLM model**

In [None]:
# List of all tools that the LLM should be aware of
# These tools were defined earlier using the @tool decorator
tools = [summarize_patient_record, summarize_policy_guidelines, get_coverage_status]

model_with_tools = model.bind_tools(tools=tools)

## Implementing ReAct Agent

**SYSTEM PROMPT**

In [None]:
AGENT_PROMPT_TXT = """You are an agent from an insurance company who is an expert in reviewing patient record, reviewing policy guidelines and decide whether the claim needs to approved or need to be routed for human review.

Given a patient record, call the below relevant tools and provide the most appropriate response:
- summarize_patient_record
- summarize_policy_guidelines
- get_coverage_status

Follow the below steps to help you make more informed decisions:
  - Use the patient record and summarise it
  - Extract the 'insurance_policy_id' from the patient record and summarise the policy guidelines
  - Use the summarised patient record and summarised policy guidelines and determine whether the claim needs to be "APPROVED" or "ROUTE FOR REVIEW" based on the policy guidelines.

If you're unable to find relevant information related to patient or policy or make any certain decisions , just end the process for human to make the decison i.e.ROUTE FOR REVIEW
   

The final response should contain the following:
- Decision - APPROVED or ROUTE FOR REVIEW.
- Reason - Summarise why the above decision has been taken
"""
AGENT_SYS_PROMPT = SystemMessage(content=AGENT_PROMPT_TXT)

**Defining the AGENT GRAPH**

In [None]:
class State(TypedDict):
    messages: Annotated[list, add_messages]

# Create the node function that handles reasoning and planning using the LLM
def tool_calling_llm(state: State) -> State:
    return {"messages": [model_with_tools.invoke([AGENT_SYS_PROMPT] + state["messages"])]}

# Build the graph
builder = StateGraph(State)

# Add nodes
builder.add_node("tool_calling_llm", tool_calling_llm)
builder.add_node("tools", ToolNode(tools=tools))

# Add edges
builder.add_edge(START, "tool_calling_llm")
# Conditional edge
builder.add_conditional_edges(
    "tool_calling_llm",
    tools_condition, # conditional routing function
    # If the latest message (result) from LLM is a tool call request -> tools_condition routes to tools
    # If the latest message (result) from LLM is a not a tool call -> tools_condition routes to END
    ["tools", END]
)
builder.add_edge("tools", "tool_calling_llm") # this is the key feedback loop in the agentic system

# Compile Agent Graph
healthbuddy_agent = builder.compile()

In [None]:
responses = []
for patient_record in patient_records:
    print(patient_record)
    input = {"messages":[("user",f"What is the coverage status of {patient_record}")]}
    result = healthbuddy_agent.invoke(input)
    responses.append(
        {   
            "patient_id": patient_record["patient_id"],
            "generated_output": result['messages'][-1].content
        })


**Creating csv on validation data**

In [None]:
columns = ["patient_id", "generated_output"]
validation_csv_path = "./agent_validation_record_result.csv"  # <-- Change this to your desired path

# Check if file exists to decide whether to write header
write_header = not os.path.exists(validation_csv_path)

with open(validation_csv_path, mode='a', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=columns)
    if write_header:
        writer.writeheader()
    for row in responses:
        writer.writerow(row)

In [None]:
# Function for meaning based comparison 
def llm_judge(agentReponse: str , actualResponse:str):
    prompt = f"""
    You are an evaluator. Compare the agent's response:
    {agentReponse}

    to the actual response:
    {actualResponse}

    Score the agent's response from 0 to 10 based on:
    - Whether the claim decision matches.
    - Whether the reasoning for the decision is the same or sufficiently similar.

    Return only the integer score.
    """

    result = model.invoke(prompt)
    return int(''.join(filter(str.isdigit, result.content.strip())))


## AGENT VALIDATION AND AVERAGE SCORE

### Meaning based comparison

In [None]:
def validate_agent():
    df1 = pd.read_csv('./agent_validation_record_result.csv')
    df2 = pd.read_csv('./Data/validation_reference_results.csv')
    totalscore = 0

    # Loop through each row
    for i in range(len(patient_records)):
        agent_output = df1.iloc[i]['generated_output']
        print(agent_output)
        print("\n")
        actual_output = df2.iloc[i]['reference_response']
        print(actual_output)
        print("\n")
        score = llm_judge(agent_output, actual_output)
        totalscore += score
        print(score)
    
    print("Average Score of the agent is - ",totalscore/len(patient_records))
        
validate_agent()

## FINAL - TESTING THE AGENT AND SUBMISSION

In [None]:
test_responses = []
for patient_record in test_patient_records:
    input = {"messages":[("user",f"What is the coverage status of {patient_record}")]}
    result = healthbuddy_agent.invoke(input)
    test_responses.append(
        {   
            "patient_id": patient_record["patient_id"],
            "generated_response": result['messages'][-1].content
        })
    
submission_csv_path = "./submission.csv"  # <-- Change this to your desired path
submission_columns = ["patient_id", "generated_response"]

# Check if file exists to decide whether to write header
write_header = not os.path.exists(submission_csv_path)

with open(submission_csv_path, mode='a', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=submission_columns)
    if write_header:
        writer.writeheader()
    for row in test_responses:
        writer.writerow(row)