In [10]:
import os
from dotenv import load_dotenv

from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings

load_dotenv()

True

In [2]:
from langchain.chat_models import init_chat_model

llm = init_chat_model("gemini-2.5-flash", model_provider="google_genai")

In [3]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

  from .autonotebook import tqdm as notebook_tqdm


### Load Vector store

In [None]:
persist_directory = "chroma_langchain_db"

# Load existing Chroma vector store
vector_store = Chroma(
    collection_name="example_collection",
    embedding_function=embeddings,
    persist_directory=persist_directory )


### Integrating structured LLM with the retriever

In [207]:
import json
from typing import TypedDict
from langchain.prompts import ChatPromptTemplate
from langgraph.graph import START, StateGraph
from pydantic import BaseModel, Field

In [208]:
#output schema

class QueryWithRemarks(BaseModel):
    remarks: str = Field(
        None, description="Additional context, observations, or analyst notes about the query."
    )
    relevant_details: list[str] = Field(
        None, description="Key details, metadata, or attributes that should guide retrieval or filtering."
    )

In [209]:
structured_llm = llm.with_structured_output(QueryWithRemarks)

In [210]:
custom_prompt = ChatPromptTemplate.from_messages([
    ("system", 
     "You are a domain expert assistant for test execution and compliance verification. "
     "Your job is to analyze each individual test action and provide only the most relevant supporting information from the given context."
     "Do not invent facts. Focus on compliance, security, and functional details that apply directly to the action.\n\n"
     "Use Metadata for Filters: If regulatory tags are provided (e.g., HIPAA, FDA_510K, IEC_62304), "
     "prioritize retrieving and applying context that matches those tags. This ensures that the supporting information aligns with the compliance requirements of the test case."),
    
    ("human", 
     "Title: {title}\n\n"
     "Description: {description}\n\n"
     "Test Action: {steps}\n\n"
     "Regulatory Tags: {regulatory_tags}\n\n"
     "Retrieved Context:\n{context}\n\n"
     "Based on the context (and matching the regulatory tags if applicable), explain what requirements, "
     "rules, or technical details are relevant for this action.\n\nAnswer:")
])

In [None]:
class State(TypedDict):
    title: str
    description: str
    steps: list[str]
    context: list
    regulatory_tags: list[str] 
    answer: str

In [212]:
def retrieve(state: State):
    """Fetches context from vector database for particular actions"""

    retrieved_context = " "
    for step in state["steps"]:
        context = vector_store.similarity_search(step["action"])
        print(f"Searching context for {step["action"]}")
        context_txt = "\n\n".join(doc.page_content for doc in context)
        retrieved_context += context_txt + "\n\n"

    return {"context": retrieved_context.strip()}

In [217]:
def generate(state: State):
    messages = custom_prompt.invoke({
        "title": state["title"],
        "description": state["description"],
        "steps": state["steps"],
        "regulatory_tags": state["regulatory_tags"],
        "context": state["context"],
    })
    response = structured_llm.invoke(messages)
    return {"answer": response}

In [218]:
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [219]:
class State(TypedDict):
    title: str
    description: str
    steps: list[str]
    context: list
    regulatory_tags: list[str]  
    answer: str

In [220]:
with open('test-case.json', 'r') as file:    
    data = json.load(file)

result = graph.invoke(
    {"title": data["title"],
     "description": data["description"],
     "steps": data["steps"],
     "regulatory_tags": data["regulatory_tags"]}
    )

Searching context for Review HIPAA requirements
Searching context for Test patient_data_handling against requirements
Searching context for Document compliance evidence


In [None]:
## OUTPUT WITH STRUCTURE SCHEMA

print(f"Remarks: {result['answer'].remarks}\n\n")
print(f"Relevant Details: {result['answer'].relevant_details}")

Remarks: The user is asking for relevant information to verify HIPAA compliance for patient data handling, based on the provided context. The context primarily discusses FDA regulations related to medical device software and electronic health records, including requirements for electronic records and signatures (21 CFR Part 11), and general software validation principles. While HIPAA is the regulatory tag, the context does not explicitly mention HIPAA requirements. I need to extract information from the context that is broadly relevant to secure and compliant patient data handling and documentation, even if it's under FDA regulations.


Relevant Details: ['HIPAA compliance', 'patient data handling', 'electronic health records', 'electronic signatures', 'audit trails', 'system logs', 'compliance documentation', 'software testing']


In [None]:
## OUTPUT WITHOUT STRUCTURED SCHEMA

print(f"Context: {result['context']}\n\n")
print(f"Answer: {result['answer']}")

Answer: For the test action "Review HIPAA requirements," "Test patient_data_handling against requirements," and "Document compliance evidence," the following details are relevant:

*   **Patient Data Handling Context:** The software functions involved in "patient_data_handling" often include those that allow individuals to view, transfer, or download Electronic Health Record (EHR) data, facilitating general patient health information management and record-keeping activities. These functions may also provide patients with tools to organize, track, and access information related to their health conditions or treatments.
*   **Compliance Benchmarks:** For electronic patient record functions, meeting the full description of section 520(o)(1)(C) in the FD&C Act implies that these functions are not considered devices if they are reviewed by Healthcare Professionals (HCPs) and **certified under the ONC Health IT Certification Program**. While the context notes FDA's enforcement discretion reg