In [66]:
# Contrived example using json as our questionnaire repository, with a RAG based questionnaire discover system.

# AI-Powered Clinical Documentation Assistant

# Background

Healthcare professionals face a significant burden from medical documentation. This project focuses on leveraging generative AI to alleviate this burden by automatically extracting structured information from physician-patient audio conversations and using it to pre-fill administrative forms by generating data points that can be electronically stored in EMRs, and EHRS.

This tool outputs data in a FHIR compatible format which ensures seamless integration with existing healthcare systems through a standardized, interoperable format. This structured approach unlocks the data's potential for reusability in various clinical workflows, analytics, and future healthcare applications beyond just form filling.

In [67]:
!pip uninstall -qqy jupyterlab kfp  # Remove unused conflicting packages
!pip install -qU "google-genai==1.7.0" "chromadb==0.6.3" "langchain==0.3.23" "langgraph==0.3.29" "json-repair==0.41.1" "google-api-core==2.24.2" "langchain-google-genai==2.1.2"


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


**Set up your API key**

To run the following cell, your API key must be stored it in a [Kaggle secret](https://www.kaggle.com/discussions/product-feedback/114053) named `GOOGLE_API_KEY`.

If you don't already have an API key, you can grab one from [AI Studio](https://aistudio.google.com/app/apikey). You can find [detailed instructions in the docs](https://ai.google.dev/gemini-api/docs/api-key).

To make the key available through Kaggle secrets, choose `Secrets` from the `Add-ons` menu and follow the instructions to add your key or enable it for this notebook.

In [68]:
# from kaggle_secrets import UserSecretsClient

# GOOGLE_API_KEY = UserSecretsClient().get_secret("GOOGLE_API_KEY")

In [69]:
import os

GOOGLE_API_KEY="AIzaSyDAZjElfeaJqItRsB21v3p4ETShat1PzmI"

# print(dict(os.environ))

# os.environ["GOOGLE_API_KEY"]

**Prepare the data store and embeddings**

Discover the questionnaire metadata that we will use to create an embedding database

In [70]:
# Define some constants
# HAPI_FHIR_BASE_URL = "https://hapi.fhir.org/baseR4"
# HAPI_FHIR_BASE_URL = "http://localhost:8081/fhir"
# QUESTIONNAIRE_ENDPOINT = f"{HAPI_FHIR_BASE_URL}/Questionnaire"


In [71]:
import json

_quest_docs = None


def read_questionnaires_from_store():
    global _quest_docs
    if _quest_docs is None:
        with open("./quest.db.json", "r") as file:
            _quest_docs = json.loads(file.read())
    return _quest_docs


def get_quest_docs_meta():
    quest_docs = read_questionnaires_from_store()
    doc_with_metad = []
    doc_ids = []
    for doc in quest_docs:
        doc_id = doc.get("id")
        doc_meta = {
            k: v
            for k, v in {
                "id": doc_id,
                "title": doc.get("title"),
                "name": doc.get("name"),
            }.items()
            if v is not None
        }
        doc_desc = (
            doc.get("description") if doc.get("description") else "No description"
        )
        doc_with_metad.append((doc_desc, doc_meta))
        doc_ids.append(doc_id)
    return doc_with_metad, doc_ids


print(get_quest_docs_meta())

([('No description', {'id': '47004539', 'title': 'AAA New Form'}), ('No description', {'id': '47004537', 'title': 'ee New Form'}), ('No description', {'id': '47004533', 'title': 'New Form'}), ('No description', {'id': 'adc-extraction-test-1', 'title': 'Extraction test - Karnofsky/Lansky Score'}), ('No description', {'id': '47002099', 'title': 'Demographic Survey', 'name': 'Demographic Survey'}), ('No description', {'id': '46994390', 'title': 'Extraction test - Karnofsky/Lansky Score'}), ('A questionnaire to collect basic health history information.', {'id': 'health-history-questionnaire-2021-06', 'title': 'Health History Questionnaire (June 2021)'}), ('Questionnaire for routine collection of vital signs and physical observations.', {'id': 'routine-obs', 'title': 'Routine Observation Questionnaire', 'name': 'RoutineObservation'}), ('A standardized questionnaire to assess depression severity over the last 2 weeks.', {'id': 'phq9', 'title': 'Patient Health Questionnaire-9 (PHQ-9)', 'name'

## Creating the embedding database with ChromaDB

We create a [custom function](https://docs.trychroma.com/guides/embeddings#custom-embedding-functions) to generate embeddings with the Gemini API. 

The questionnaire metadata are the items that are in the database. They are inserted first, and later retrieved. Queries will be a description of the form to be filled derived from the prompt instruction.

In [72]:
from chromadb import Documents, EmbeddingFunction, Embeddings
from google.api_core import retry
from google.genai import types, Client

gda_client = Client(api_key=GOOGLE_API_KEY)
gda_client_model = "gemini-2.0-flash"
# Define a helper to retry when per-minute quota is reached.
is_retriable = lambda e: (isinstance(e, genai.errors.APIError) and e.code in {429, 503})


class GeminiEmbeddingFunction(EmbeddingFunction):
    # Specify whether to generate embeddings for documents, or queries
    document_mode = True

    @retry.Retry(predicate=is_retriable)
    def __call__(self, input: Documents) -> Embeddings:
        if self.document_mode:
            embedding_task = "retrieval_document"
        else:
            embedding_task = "retrieval_query"

        response = gda_client.models.embed_content(
            model="models/text-embedding-004",
            contents=input,
            config=types.EmbedContentConfig(
                task_type=embedding_task,
            ),
        )
        return [e.values for e in response.embeddings]

Now create a [Chroma database client](https://docs.trychroma.com/getting-started) that uses the `GeminiEmbeddingFunction` and populate the database with the questionnaire metadata from above

In [73]:
import chromadb

DB_NAME = "fhir-quest-semantic"

embed_fn = GeminiEmbeddingFunction()
chroma_client = chromadb.Client()
db = chroma_client.get_or_create_collection(name=DB_NAME, embedding_function=embed_fn)

def populate_vector_db():
    embed_fn.document_mode = True
    (desc_with_metad, doc_ids) = get_quest_docs_meta()
    descriptions, meta = zip(*desc_with_metad)
    print(meta)

    db.add(documents=list(descriptions), ids=doc_ids, metadatas=list(meta))

populate_vector_db()

({'id': '47004539', 'title': 'AAA New Form'}, {'id': '47004537', 'title': 'ee New Form'}, {'id': '47004533', 'title': 'New Form'}, {'id': 'adc-extraction-test-1', 'title': 'Extraction test - Karnofsky/Lansky Score'}, {'id': '47002099', 'title': 'Demographic Survey', 'name': 'Demographic Survey'}, {'id': '46994390', 'title': 'Extraction test - Karnofsky/Lansky Score'}, {'id': 'health-history-questionnaire-2021-06', 'title': 'Health History Questionnaire (June 2021)'}, {'id': 'routine-obs', 'title': 'Routine Observation Questionnaire', 'name': 'RoutineObservation'}, {'id': 'phq9', 'title': 'Patient Health Questionnaire-9 (PHQ-9)', 'name': 'PHQ9'}, {'id': '46934304', 'name': 'LifelinesQuestionnaire'})


Insert of existing embedding ID: 47004539
Insert of existing embedding ID: 47004537
Insert of existing embedding ID: 47004533
Insert of existing embedding ID: adc-extraction-test-1
Insert of existing embedding ID: 47002099
Insert of existing embedding ID: 46994390
Insert of existing embedding ID: health-history-questionnaire-2021-06
Insert of existing embedding ID: routine-obs
Insert of existing embedding ID: phq9
Insert of existing embedding ID: 46934304
Add of existing embedding ID: 47004539
Add of existing embedding ID: 47004537
Add of existing embedding ID: 47004533
Add of existing embedding ID: adc-extraction-test-1
Add of existing embedding ID: 47002099
Add of existing embedding ID: 46994390
Add of existing embedding ID: health-history-questionnaire-2021-06
Add of existing embedding ID: routine-obs
Add of existing embedding ID: phq9
Add of existing embedding ID: 46934304


Confirm that the data was inserted by looking at the database.

In [74]:
db.count()
# You can peek at the data too.
# db.peek(1)

10

In [75]:
from langgraph.prebuilt import create_react_agent
from langchain_core.tools import tool
from typing_extensions import TypedDict, Any, Dict

# Define the state of our graph
class AgentState(TypedDict):
    audio_file_path: Any
    instructions: str
    transcription: str
    quest: Dict[str, Any]
    medical_records: str
    quest_resp: str
    quest_found: bool
    quest_resp_valid: bool

In [76]:
def init_workflow(state: AgentState):
    """ 
    Start: preserves input prompt, which includes audio file and prompt instructions
    """
    return {"audio_file_path": state["audio_file_path"], "instructions": state["instructions"]}

## Retrieval: Finding relevant questionnaires

We can then use the prompt to get the questionnaire

In [None]:
import enum


class RelevantRating(enum.Enum):
    YES = "Yes"
    NO = "No"


def discover_questionnaire(query):
    try:
        embed_fn.document_mode = False
        result = db.query(query_texts=[query], n_results=1)
        queried_doc_ids = result.get("ids")
        try:
            interest_doc_id = queried_doc_ids[0][0]
        except IndexError:
            return None
        queried_doc_desc = result.get("documents")[0][0]
        queried_doc_meta = result.get("metadatas")[0][0]

        
        print(result)
        structured_output_config = types.GenerateContentConfig(
            response_mime_type="text/x.enum",
            response_schema=RelevantRating,
        )
        response = gda_client.models.generate_content(
            model=gda_client_model, contents=[prompt], config=structured_output_config
        )
        parsed_resp = response.parsed

        if parsed_resp is RelevantRating.YES:
            return interest_doc_id
        else:
            return
    except:
        return None


def fetch_questionnaire(state: AgentState):
    query = state.get("instructions")
    quest_id = discover_questionnaire(query)

    full_quest_docs = read_questionnaires_from_store()
    of_interest_quest = None
    for quest in full_quest_docs:
        if quest["id"] == quest_id:
            of_interest_quest = quest
            break
    if of_interest_quest is None:
        return {"quest_found": False}
    else:
        return {"quest_found": True, "quest": of_interest_quest}


Now that we have the questionnaire and the transcripted audio files, we can move on to generate the questionnaireResponse

In [78]:
import json_repair
google_model_id = "gemini-2.0-flash"

def generate_questresp(state: AgentState) -> dict:
    """
    Extract relevant information,
    and return a FHIR QuestionnaireResponse resource as a dict.
    """
    transcribed = state.get("transcription")
    questionnaire = state.get("quest")
    # 3. Prepare the LLM prompt
    prompt = create_prompt_for_questionnaire_response(
        transcribed, questionnaire
    )

    response = gda_client.models.generate_content(
        model=google_model_id, contents=[prompt, transcribed], config={
            'response_mime_type': 'application/json'
        }
    )

    qr_string = response.text.strip()
    qr = json_repair.loads(qr_string)
    # try:
    #     questionnaire_response = json.loads(llm_output)
    # except Exception as e:
    #     raise ValueError(f"Invalid JSON from LLM: {e}")

    # # 6. Validate the JSON against FHIR schema (optional but recommended)
    # #    This step ensures the object meets the QuestionnaireResponse structure
    # if not validate_fhir_questionnaire_response(questionnaire_response):
    #     raise ValueError("Generated QuestionnaireResponse is not valid FHIR.")

    # # 7. Return or store the final resource
    return {"quest_resp": qr}


def create_prompt_for_questionnaire_response(
    cleaned_text: str, questionnaire_template: dict
) -> str:
    # Construct a system/user prompt with instructions,
    # referencing relevant sections of the conversation
    prompt = f"""
    You are a medical documentation assistant.
    Below is a transcribed patient-physician conversation:
    ---
    {cleaned_text}
    ---

    You have a FHIR Questionnaire defined as follows:
    {json.dumps(questionnaire_template, indent=2)}

    Extract the relevant data from the conversation to populate a FHIR QuestionnaireResponse
    based on the provided Questionnaire. Return ONLY valid JSON representing this 
    QuestionnaireResponse with fields "resourceType": "QuestionnaireResponse", 
    "questionnaire": "<Questionnaire-identifier>",
    "status", "subject", "authored", "item", etc.

    If a field is unknown, leave it blank or null. 
    Do not add additional commentary.

    Use this JSON schema:

    QuestionnaireResponse = <generated questionnaireResponse>
    return: QuestionnaireResponse
    """
    return prompt


In [79]:
# def validate_qr(state: AgentState):
#     qr = state.get("quest_resp")
#     # use a publicly available fhir instance.
#     url = f"{QUESTIONNAIRE_ENDPOINT}/$validate"
#     headers = {"Content-Type": "application/fhir+json"}

#     response = requests.post(url, json=qr, headers=headers)
#     if response.ok:
#         return {"quest_resp_valid": True}
#     else:
#         return {"quest_resp_valid": False}
    

In [80]:
# def save_questionnaire_response(state: AgentState) -> str:
#     """
#     Saves the validated QuestionnaireResponse to the HAPI FHIR server.

#     Args:
#         questionnaire_response (Dict): The validated FHIR QuestionnaireResponse in JSON format.

#     Returns:
#         str: A success message or an error message if saving fails.
#     """
#     quest_resp = state.get("quest_resp")
#     print("\n\n\n\n",quest_resp, type(quest_resp))
#     questionnaire_response_endpoint = f"{HAPI_FHIR_BASE_URL}/QuestionnaireResponse"
#     headers = {"Accept": "application/fhir+json", "Content-Type": "application/fhir+json"}

#     response = requests.post(questionnaire_response_endpoint, headers=headers, json=quest_resp)
#     response.raise_for_status() # Raise exception for HTTP errors

#     if response.status_code in range(200, 300):
#         created_resource = response.json()
#         return {}


In [81]:
# --- Transcription Function ---
# TODO - why do we need to this.
def diarize_audio(state: AgentState):
    """
    Transcribes the given audio file using the Gemini model, aiming for a
    conversation-style output with speaker labels.

    Args:
        model: The initialized Gemini GenerativeModel instance.
        audio_file_path: Path to the audio file (e.g., .wav, .mp3, .flac).
    """
    # TODO - check that audio format is supported.
    uploaded_file = state["audio_file_path"]

    
    # 2. Construct the Prompt - Key Considerations for Conversation Style:
    #    - Explicitly ask for transcription.
    #    - Request speaker diarization (identifying and labeling speakers).
    #    - Suggest common labels (like 'Doctor:', 'Patient:', or 'Speaker 1:', 'Speaker 2:').
    #    - Ask for natural punctuation and formatting.
    #    - Specify how to handle non-speech sounds (ignore, note in brackets, etc.).

    prompt = """
    Diarize and transcribe this health-related interview, maintaining chronological order with timestamps if possible. Add labels for speaker (like 'Doctor:', 'Patient:', or 'Speaker 1:', 'Speaker 2:') at the beginning of each turn.
    Accurately capture medical terms, mark unclear words as “[INAUDIBLE],” avoid adding extra commentary or guesses, and keep overlapping speech on separate lines. 
    Return only the final transcript.
    """

    response = gda_client.models.generate_content(
        model=google_model_id,
        contents = [prompt, uploaded_file]
    )

    transcription = response.text.strip()
    return {"transcription": transcription}



In [82]:
def terminate_workflow(state: AgentState):
    # TODO - 
    print("Workflow end")
    return {}

In [83]:
from langchain_google_genai import ChatGoogleGenerativeAI # Correct import path
from langgraph.graph import StateGraph, END, START
# from langgraph.pregel import PregelProcess # TODO???

model_id = "gemini-2.0-flash"
model = ChatGoogleGenerativeAI(model=model_id, google_api_key=GOOGLE_API_KEY)

# Defined the graph
wk_graph = StateGraph(AgentState)

# Nodes
wk_graph.add_node("discover_and_fetch_questionnaire", fetch_questionnaire)
wk_graph.add_node("transcribe_audio", diarize_audio)
wk_graph.add_node("generate_questionnaire_response", generate_questresp)
# wk_graph.add_node("save_response", save_questionnaire_response) 
wk_graph.add_node("terminate_workflow", terminate_workflow)

# Edges
wk_graph.add_edge(START, "discover_and_fetch_questionnaire")
wk_graph.add_conditional_edges(
    "discover_and_fetch_questionnaire",
    lambda state: [
        "generate_questionnaire_response",
        "generate_soap_note"
    ] if state["quest_found"] else "terminate_workflow"
)
# wk_graph.add_conditional_edges("discover_and_fetch_questionnaire", lambda state: "transcribe_audio" if state["quest_found"] else "terminate_workflow")
wk_graph.add_edge("transcribe_audio", "generate_questionnaire_response")
wk_graph.add_edge("generate_questionnaire_response", "terminate_workflow")
wk_graph.add_edge("terminate_workflow", END)



graph = wk_graph.compile()


In [84]:
# from IPython.display import Image, display

# display(Image(graph.get_graph().draw_mermaid_png()))


In [85]:

local_input_file_url = "./Data/Audio Recordings/CAR0002.mp3"
uploaded_file_uri = gda_client.files.upload(file=local_input_file_url)

inputs = {"audio_file_path": uploaded_file_uri, "instructions": "Process audio and fill out a medical history report"}
result = graph.invoke(inputs)

{'ids': [['health-history-questionnaire-2021-06']], 'embeddings': None, 'documents': [['A questionnaire to collect basic health history information.']], 'uris': None, 'data': None, 'metadatas': [[None]], 'distances': [[0.9071840643882751]], 'included': [<IncludeEnum.distances: 'distances'>, <IncludeEnum.documents: 'documents'>, <IncludeEnum.metadatas: 'metadatas'>]}
Workflow end
