In [None]:
## Setup

First, install ChromaDB and the Gemini API Python SDK.

In [None]:
!pip uninstall -qqy jupyterlab kfp  # Remove unused conflicting packages
!pip install -qU "google-genai==1.7.0" "chromadb==0.6.3" "requests==2.32.3" 

In [None]:
from google import genai
from google.genai import types

from IPython.display import Markdown

genai.__version__

In [None]:
### Set up your API key

To run the following cell, your API key must be stored it in a [Kaggle secret](https://www.kaggle.com/discussions/product-feedback/114053) named `GOOGLE_API_KEY`.

If you don't already have an API key, you can grab one from [AI Studio](https://aistudio.google.com/app/apikey). You can find [detailed instructions in the docs](https://ai.google.dev/gemini-api/docs/api-key).

To make the key available through Kaggle secrets, choose `Secrets` from the `Add-ons` menu and follow the instructions to add your key or enable it for this notebook.

In [None]:
# from kaggle_secrets import UserSecretsClient

# GOOGLE_API_KEY = UserSecretsClient().get_secret("GOOGLE_API_KEY")

In [None]:
import os

os.environs["GOOGLE_API_KEY"]

In [1]:
# Define some constants
HAPI_FHIR_BASE_URL = "https://hapi.fhir.org/baseR4"
QUESTIONNAIRE_ENDPOINT = f"{HAPI_FHIR_BASE_URL}/Questionnaire"


In [None]:
### Data

Discover the questionnaire metadata that we will use to create an embedding database

In [None]:
def fetch_questionnaires_from_hapi() -> List[Dict]:
    """
    Fetches all Questionnaire resources from the HAPI FHIR server.

    Returns:
        List[Dict]: A list of Questionnaire resources in JSON format.
                     Returns an empty list if there's an error.
    """
    questionnaires = []
    try:
 # count_params = {"_summary": "count"}
        # response = requests.get(QUESTIONNAIRE_ENDPOINT, params=count_params)
        # questionnaire_count = response.json()["total"]
        questionnaire_count = 100
        all_params = {"_count": questionnaire_count, "_sort": "-_lastUpdated", "_element":"description,id,identifier,name,title"}
        response = requests.get(QUESTIONNAIRE_ENDPOINT,params=all_params, headers={"Accept": "application/fhir+json"})
        response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
        bundle = response.json()

        if bundle.get('resourceType') == 'Bundle' and bundle.get('type') == 'searchset':
            for entry in bundle.get('entry', []):
                if entry.get('resource') and entry['resource'].get('resourceType') == 'Questionnaire':
                    questionnaires.append(entry['resource'])
        else:
            print(f"Unexpected response format from FHIR server: {bundle.get('resourceType')}")

    except requests.exceptions.RequestException as e:
        print(f"Error fetching Questionnaires from HAPI FHIR: {e}")
        return []  # Return empty list in case of error

    return questionnaires


In [None]:
## Creating the embedding database with ChromaDB

We create a [custom function](https://docs.trychroma.com/guides/embeddings#custom-embedding-functions) to generate embeddings with the Gemini API. 

The questionnaire metadata are the items that are in the database. They are inserted first, and later retrieved. Queries will be a description of the form to be filled derived from the prompt instruction.

In [None]:
from chromadb import Documents, EmbeddingFunction, Embeddings
from google.api_core import retry

from google.genai import types


# Define a helper to retry when per-minute quota is reached.
is_retriable = lambda e: (isinstance(e, genai.errors.APIError) and e.code in {429, 503})


class GeminiEmbeddingFunction(EmbeddingFunction):
    # Specify whether to generate embeddings for documents, or queries
    document_mode = True

    @retry.Retry(predicate=is_retriable)
    def __call__(self, input: Documents) -> Embeddings:
        if self.document_mode:
            embedding_task = "retrieval_document"
        else:
            embedding_task = "retrieval_query"

        response = client.models.embed_content(
            model="models/text-embedding-004",
            contents=input,
            config=types.EmbedContentConfig(
                task_type=embedding_task,
            ),
        )
        return [e.values for e in response.embeddings]

In [None]:
Now create a [Chroma database client](https://docs.trychroma.com/getting-started) that uses the `GeminiEmbeddingFunction` and populate the database with the questionnaire metadata from above

In [None]:
import chromadb

DB_NAME = "fhir-questionnaire"

embed_fn = GeminiEmbeddingFunction()
embed_fn.document_mode = True

chroma_client = chromadb.Client()
db = chroma_client.get_or_create_collection(name=DB_NAME, embedding_function=embed_fn)
questionnare_documents = fetch_questionnaires_from_hapi()

db.add(documents=questionnare_documents, ids=[quest.get("id") for quest in questionnare_documents])

In [None]:
Confirm that the data was inserted by looking at the database.

In [None]:
db.count()
# You can peek at the data too.
# db.peek(1)

In [None]:
## Retrieval: Finding relevant questionnaires

We can then use the prompt to get the questionnaire

In [None]:
def discover_relevant_quest(query):
    embed_fn.document_mode = false
    result = db.query(query_texts=[query], n_results=1)
    # TODO -> how we parse the results here
    [all_passages] = result["documents"]
    return all_passages[0]

In [None]:
# Now that we have the questionnaire and the transcripted audio files, we can move on to generate the questionnaireResponse

In [2]:
qr_client = genai.Client(api_key=GOOGLE_API_KEY)
qr_client_model_id = "gemini-2.0-flash"

def direct_text_to_qr(transcribed: str, questionnaire_template: dict) -> dict:
    """
    Extract relevant information,
    and return a FHIR QuestionnaireResponse resource as a dict.
    """
    # 3. Prepare the LLM prompt
    prompt = create_prompt_for_questionnaire_response(
        transcribed, questionnaire_template
    )

    response = qr_client.models.generate_content(
        model=qr_client_model_id, contents=[prompt, transcribed], config={
            'response_mime_type': 'application/json'
        }
    )
    response = transcription_client.generate_content(
            model=transcription_model_id,
            contents = [prompt, uploaded_audio_file]
        )

    qr_string = response.text.strip()
    qr = repair_json.loads(qr_string)
    # try:
    #     questionnaire_response = json.loads(llm_output)
    # except Exception as e:
    #     raise ValueError(f"Invalid JSON from LLM: {e}")

    # # 6. Validate the JSON against FHIR schema (optional but recommended)
    # #    This step ensures the object meets the QuestionnaireResponse structure
    # if not validate_fhir_questionnaire_response(questionnaire_response):
    #     raise ValueError("Generated QuestionnaireResponse is not valid FHIR.")

    # # 7. Return or store the final resource
    return qr


def create_prompt_for_questionnaire_response(
    cleaned_text: str, questionnaire_template: dict
) -> str:
    # Construct a system/user prompt with instructions,
    # referencing relevant sections of the conversation
    prompt = f"""
    You are a medical documentation assistant.
    Below is a transcribed patient-physician conversation:
    ---
    {cleaned_text}
    ---

    You have a FHIR Questionnaire defined as follows:
    {json.dumps(questionnaire_template, indent=2)}

    Extract the relevant data from the conversation to populate a FHIR QuestionnaireResponse
    based on the provided Questionnaire. Return ONLY valid JSON representing this 
    QuestionnaireResponse with fields "resourceType": "QuestionnaireResponse", 
    "questionnaire": "<Questionnaire-identifier>",
    "status", "subject", "authored", "item", etc.

    If a field is unknown, leave it blank or null. 
    Do not add additional commentary.

    Use this JSON schema:

    QuestionnaireResponse = <generated questionnaireResponse>
    return: QuestionnaireResponse
    """
    return prompt


NameError: name 'genai' is not defined

In [None]:
def validate_qr(qr):
    # use a publicly available fhir instance.
    url = "http://hapi.fhir.org/baseR4/Patient/$validate"
    headers = {"Content-Type": "application/fhir+json"}

    response = requests.post(url, json=qr, headers=headers)
    return response.ok():
    

In [None]:
def save_questionnaire_response(questionnaire_response: Dict) -> str:
    """
    Saves the validated QuestionnaireResponse to the HAPI FHIR server.

    Args:
        questionnaire_response (Dict): The validated FHIR QuestionnaireResponse in JSON format.

    Returns:
        str: A success message or an error message if saving fails.
    """
    print("Saving QuestionnaireResponse to HAPI FHIR...")
    questionnaire_response_endpoint = f"{HAPI_FHIR_BASE_URL}/QuestionnaireResponse"
    headers = {"Accept": "application/fhir+json", "Content-Type": "application/fhir+json"}

    try:
        response = requests.post(questionnaire_response_endpoint, headers=headers, json=questionnaire_response)
        response.raise_for_status() # Raise exception for HTTP errors

        created_resource = response.json()
        if created_resource.get('resourceType') == 'QuestionnaireResponse' and response.status_code == 201: # Check for 201 Created status
            resource_id = created_resource.get('id')
            return f"QuestionnaireResponse saved successfully with ID: {resource_id}"
        else:
            return f"Error saving QuestionnaireResponse. FHIR server response: {created_resource.get('resourceType')}, Status Code: {response.status_code}"

    except requests.exceptions.RequestException as e:
        return f"Error saving QuestionnaireResponse to HAPI FHIR: {e}"
