# Imports

In [1]:
!pip install -q bert_score transformers pandas numpy torch pydantic tqdm bitsandbytes

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.7/60.7 MB[0m [31m35.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# from langchain_huggingface import HuggingFacePipeline
# from langchain_core.prompts import PromptTemplate
import torch
from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer, pipeline, BitsAndBytesConfig, BertModel, BertTokenizer
from bert_score import BERTScorer

import numpy as np
import pandas as pd
from typing import List, Any, Tuple
import json
import time
from tqdm import tqdm
import logging
import gc

# Functions

In [81]:
# evaluation functions
def calculate_bert(original: str, summary: str, scorer: BERTScorer) -> float:
  '''This function serves as a relevance calculation between the two texts'''
  _, __, f1 = scorer.score([original], [summary])
  return f1

def nli(m: AutoModelForSequenceClassification, t: AutoTokenizer, original: str, summary_sentences: List[str], hyperparameters: dict) -> bool:
  '''This function serves as a measure of information faithfulness'''
  claims = []
  for summary_sentence in summary_sentences:
    input_tokens = t(original, summary_sentence, return_tensors="pt", truncation=True).to(m.device)
    with torch.no_grad():
      o = m(**input_tokens)
      probs = torch.softmax(o.logits, dim=1)
      index = np.dot(probs.to("cpu"), np.array([0, 1, 2]))
      claims.append(index)
  return np.mean(claims)

def simplicity(m: AutoModelForCausalLM, t: AutoTokenizer, sp: str, summary: str, hyperparameters: dict) -> bool:
  '''This function serves as a measure of how simple the text is to understand'''
  query = f"""
  Evaluate the provided summary against the source text based on your assigned role.

  Return your response ONLY as a JSON object that adheres strictly to the following schema.

  ### JSON SCHEMA ###
  ```json{{
    "simplicity_score": "INTEGER (1 to 5)",
    "readability_critique": "STRING (A brief, two-sentence explanation of why the score was assigned, focusing on vocabulary, sentence length, and flow.)",
    "most_confusing_term": "STRING (The single word or phrase that would be most challenging for your persona, or 'N/A' if none.)"
  }}```
  ### END JSON SCHEMA ###

  Summary:
  ---
  {summary}
  ---
  """
  o, i, __ = generate(m, t, query, hyperparameters, sp=sp)
  return o, i

In [3]:
# functions for generation
def generate(model: AutoModelForCausalLM, tokenizer: AutoTokenizer, query: str, hyperparameters: dict, sp: str=None) -> Tuple[str, Any, str]:
  messages = []
  if sp != None:
      messages.append({
          "role": "system",
          "content": sp
          }
      )
  messages.append({
      "role": "user",
      "content": query
  })


  input_text = tokenizer.apply_chat_template(
      messages,
      tokenize=False,
      add_generation_prompt=True
  )
  input_tokens = tokenizer.apply_chat_template(
      messages,
      tokenize=True,
      add_generation_prompt=True,
      return_tensors="pt"
  ).to(model.device)
  with torch.no_grad():
    output = model.generate(input_ids=input_tokens.input_ids,
                            return_dict_in_generate=True,
                            output_scores=False,
                            **hyperparameters)
  return output, input_tokens, input_text


def process_output(o, input_len, model_final_tag, model_final_tag_end, model_output_start, model_output_end):
  output_no_input = tokenizer.decode(o[input_len:])
  output_no_thinking = output_no_input[output_no_input.index(model_final_tag)+len(model_final_tag):output_no_input.index(model_final_tag_end)]
  output_no_spaces = output_no_thinking.replace("\n", "")
  output_only_json = output_no_spaces[output_no_spaces.index(model_output_start)+len(model_output_start):output_no_spaces.rindex(model_output_end)]
  example_output = json.loads(output_only_json)
  return example_output

# Configs

In [4]:
# configs
system_prompt = """
You are a highly reliable and expert Clinical Data Abstraction Agent specializing in natural language processing of Electronic Health Records (EHRs). Your primary goal is to transform unstructured clinical text into reliable, structured, and actionable data or patient-centric summaries, acting with the expertise of a medical writer and public health educator.

RULES:
1. Output Format (CRITICAL): The final and ONLY output must be a valid JSON object enclosed in ```json ... ``` (triple backticks). Do not include any text, reasoning, or verification output outside these fences.
2. Model Role: Strictly adhere to the functional role of a Data Abstraction Agent and a Medical Writer/Educator.
3. Traceability & Grounding: All extracted facts and summaries must be directly traceable to the provided clinical text. Do not invent or assume information. This rule is essential for liability mitigation.
4. Jargon Control: Use the vocabulary and complexity level strictly defined in the <readability> field.
5. Liability Mitigation: Before generating the final JSON, you must first output an internal verification step to ensure all CRITICAL actions are captured and correctly translated.
6. Handling Ambiguity/Absence: If a required data point is completely absent or ambiguous, populate that field with "N/A" or "Not Documented". Do not use placeholder text from the schema (e.g., do not output "[Medication Name]").
"""
query_prompt = """
---TASK INSTRUCTION---
Using the text provided in the <clinical_note> section to summarize the diagnoses and medications listed under <medications> and <diagnoses> sections, generate the required structured JSON output.

**Target Directives (Safety First):**
1. **Source Grounding & Verification (CRITICAL):** Before generating the final JSON, you **MUST** internally verify all facts and extracted actions against the source note. List all extracted actions and warning signs here, confirming their priority (CRITICAL, URGENT, ROUTINE).
2. **Reading Level (CRITICAL):** Generate all fields using the <readbility> reading level.
3. **Diagnosis Breakdown (CRITICAL):** Provide the diagnoses as defined in the schema. The <readbility> reading level MUST be strictly applied.
4. **Medication Fidelity (CRITICAL):** All extracted medications must include the new `status` field. All medication fields (`med_name`, `regimen`, `status`, `purpose`) must be populated. The <readbility> reading level MUST be strictly applied to regimen and purpose medication fields.
5. **Jargon Guardrails (CRITICAL):** The basic reading level MUST be strictly applied to the `warning_signs` array.

---INTERNAL VERIFICATION---
[LLM must insert the verification list here, per Directive 1]
---END VERIFICATION---
FINAL COMMAND: The JSON object MUST contain every CRITICAL action item listed in the verification step above. Failure to reconcile is grounds for system termination.

---JSON SCHEMA DEFINITION---
{{
  "diagnosis_list": [
    {{
      "original": "from input <diagnoses>",
      "new": "rewritten based on <readability> level"
    }}
  ],

  "medication_list": [
    {{
      "med_name": "[Medication Name]",
      "regimen": "[Dose and Frequency]",
      "status": "[New | Continued | Discontinued]",
      "purpose": "[medication purpose]",
    }}
  ],
  "follow_up_instructions": [
    {{"action": "Schedule an appointment with [Primary Care Provider/Specialist]", "timeframe": "[e.g., within 7 days]"}},
    {{"action": "Take all new medications as prescribed", "timeframe": "Ongoing"}},
    {{"action": "Limit [Activity] and avoid [Food/Drug]", "timeframe": "[e.g., for 6 weeks]"}}
  ],
  "warning_signs": [
    "[List of 3-5 signs that require a return to the ED or immediate call to the doctor, written in simple language]"
  ]
}}
---END SCHEMA---

<readability>
{}
</readability>

<clinical_note>
{}
</clinical_note>

<diagnoses>
{}
</diagnoses>

<medications>
{}
</medications>
"""
amt_return = 3
model_final_tag = "<|end|><|start|>assistant<|channel|>final"
model_final_tag_end = "<|return|>"
model_output_start = "```json"
model_output_end = "```"
basic = """
Strictly 4th-5th-grade reading level. Rules:
- No medical jargon. Replace all Latin/Greek roots with plain English.
- Max sentence length: 12 words.
- Focus on what the patient FEELS or DOES ('This pill helps your body get rid of extra water through pee').
"""
intermediate = """
7th-grade clinical literacy level. Rules:
- May use common medical terms IF briefly defined in parentheses on first use.
- Include the drug class or general mechanism in one plain sentence.
- Example: 'This diuretic (water pill) reduces fluid buildup by making your kidneys release more urine.'
- Avoid Latin abbreviations (use 'twice a day' not 'BID').
"""
advanced = """
Specialist/clinician level. Rules:
- Use full pharmacological class and primary mechanism of action (MOA).
- Include relevant clinical rationale tied to the patient's diagnoses.
- Use standard clinical abbreviations (BID, TID, PRN, PO).
- Example: 'Loop diuretic; inhibits NKCC2 in the thick ascending limb, reducing preload and managing refractory ascites secondary to hepatic sinusoidal hypertension in cirrhosis.'
"""

output_eval_rules = "RULES\n1. Output Format (CRITICAL): The final and ONLY output must be a valid JSON object enclosed in ```json ... ``` (triple backticks). Do not include any text, reasoning, or verification output outside these fences."
generation_model_name = "openai/gpt-oss-20b"
evaluation_model_name = "facebook/bart-large-mnli"
generation_configs = dict(num_return_sequences=amt_return, max_new_tokens=15000, temperature=0.5, do_sample=True, top_k=10)
evaluation_configs = dict(do_sample=False, max_new_tokens=5000)
system_prompt_evaluation_basic = "You are a 4.5 grade student who can understand simple terms. Please evaluate the medical summary (listed under Summary:) to see if you would understand this. Please return in the requested format."
system_prompt_evaluation_intermediate = "You are a 7th-grade student who can understand the following level of clincal language: A buildup of fluid in the space inside your abdomen or peritoneal cavity.'. Please evaluate the medical summary (listed under Summary:) to see if you would understand this."
system_prompt_evaluation_advanced = "You are a specialist (physician or researcher) with advanced clinical languange. Please evaluate the medical summary (listed under Summary:) to see if you would understand this.  Please return in the requested format."

# Load Data

In [6]:
with open("MIMIC-IV Notes Datathon v4 20251107(Sheet1).csv", 'r', errors='ignore') as f:
    full_data = pd.read_csv(f, header=1)
one_data = full_data.iloc[0,:]
data = one_data.to_frame().T
data = full_data

In [85]:
# num = 40
# sampled_data = full_data.sample(29, random_state=0)
# sampled_data.loc[0,:] = full_data.iloc[0,:]
# data = sampled_data.iloc[-num:,:]
# data.reset_index(inplace=True)
# data.to_csv("sampled_data.csv")

# Generate

In [7]:
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
file_handler = logging.FileHandler('app.log')
file_handler.setLevel(logging.DEBUG) # Log all messages to the file
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# 5. Add the formatter to the handlers
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)

In [8]:
model = AutoModelForCausalLM.from_pretrained(generation_model_name, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(generation_model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

MXFP4 quantization requires Triton and kernels installed: CUDA requires Triton >= 3.4.0, XPU requires Triton >= 3.5.0, we will default to dequantizing the model to bf16


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

Loading weights:   0%|          | 0/411 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/177 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/27.9M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/98.0 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

In [9]:
index = [1]

In [10]:
level_names = ["basic", "intermediate", "advanced"]
processed_outputs = {"basic": [], "intermediate": [], "advanced": []}

for level_name, level in zip(level_names, [basic, intermediate, advanced]):
    for ind in tqdm(index, total=len(index)):
        example_query = query_prompt.format(level, data.loc[ind, "Brief Hospital Course"], data.loc[ind, "Diagnosis List"], data.loc[ind, "Medication List"])
        start = time.perf_counter()
        oes, i, _ = generate(model, tokenizer, example_query, generation_configs, sp=system_prompt)
        end = time.perf_counter()
        input_len = i.input_ids.shape[1]  # fix shape bug
        for o in oes.sequences:
            try:
                example_output = process_output(o, input_len, model_final_tag, model_final_tag_end, model_output_start, model_output_end)
                processed_outputs[level_name].append(example_output)
            except:
                processed_outputs[level_name].append(None)
        json.dump(processed_outputs, open("original_output.json", "w"), indent=4)

  0%|          | 0/1 [00:00<?, ?it/s]The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
100%|██████████| 1/1 [02:54<00:00, 174.22s/it]
100%|██████████| 1/1 [01:56<00:00, 116.47s/it]
100%|██████████| 1/1 [04:56<00:00, 296.01s/it]


# Evaluate

In [None]:
with open("example_output.json") as outfile:
  processed_outputs = json.load(outfile)

# data = pd.read_csv("sampled_data.csv")

In [None]:
model = AutoModelForCausalLM.from_pretrained(generation_model_name, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(generation_model_name)

In [None]:
nli_model = AutoModelForSequenceClassification.from_pretrained(evaluation_model_name, device_map="auto")
nli_tokenizer = AutoTokenizer.from_pretrained(evaluation_model_name)
label_dictionary = nli_model.config.id2label

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

In [None]:
scorer = BERTScorer(model_type="bert-base-uncased")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [None]:
all_outputs = []
for example_output in processed_outputs:
  try:
    unrolled = [json.dumps(value, indent=4) for value in example_output[0].values()]
    all_outputs.append(unrolled)
  except:
    all_outputs.append(None)

In [None]:
relevance_scores = []

In [None]:
for ind, example_output in enumerate(processed_outputs):
  try:
    relevance = float(calculate_bert(data.loc[ind % num, "Brief Hospital Course"], json.dumps(example_output[0], indent=4), scorer))
    relevance_scores.append(relevance)
  except Exception as e:
    print(e)
    relevance_scores.append(None)

In [None]:
correctness_scores = []

In [None]:
for index1, example_output in enumerate(all_outputs):
  try:
    correctness = nli(nli_model, nli_tokenizer, data.loc[index1 % num, "Brief Hospital Course"], example_output, evaluation_configs)
    correctness_scores.append(float(correctness))
  except:
    correctness_scores.append(None)

  index = np.dot(probs.to("cpu"), np.array([0, 1, 2]))


In [None]:
simplicity_scores = []

In [None]:
for index2, example_output in tqdm(enumerate(processed_outputs), total=len(processed_outputs)):
  eval_template = ""
  if index2 < num:
    eval_template = system_prompt_evaluation_basic
  elif index2 >= num and index2 < num*2:
    eval_template = system_prompt_evaluation_basic
  else:
    eval_template = system_prompt_evaluation_basic
  try:
    if example_output == None:
      simplicity_scores.append(None)
      continue
    simple, inp = simplicity(model, tokenizer, eval_template+"\n"+output_eval_rules, json.dumps(example_output), evaluation_configs)
    v = process_output(simple.sequences[0], inp, model_final_tag, model_final_tag_end, model_output_start, model_output_end)
    simplicity_scores.append(v["simplicity_score"])
    print(v)
  except Exception as e:
    print(e)
    simplicity_scores.append(None)

 33%|███▎      | 1/3 [00:15<00:30, 15.01s/it]

{'simplicity_score': 2, 'readability_critique': 'The summary uses many medical words that a 4.5‑grade student would not know, and some sentences are long with many details. The overall flow is clear, but the vocabulary is too advanced for the target age.', 'most_confusing_term': 'Ascites'}


 67%|██████▋   | 2/3 [00:34<00:17, 17.91s/it]

{'simplicity_score': 3, 'readability_critique': 'The summary uses some medical terms but explains most in plain language; sentences are short but still contain several drug names that may be unfamiliar. The flow is clear, but the presence of technical names makes it a bit harder for a 4.5‑grade student.', 'most_confusing_term': 'Acetaminophen'}


100%|██████████| 3/3 [00:53<00:00, 17.67s/it]

{'simplicity_score': 2, 'readability_critique': 'The summary uses short sentences and simple explanations, but it still includes many medical words that a 4.5‑grade student may not know. The flow is clear, yet the vocabulary is too advanced for that age level.', 'most_confusing_term': 'Ascites'}





In [None]:
relevance_scores

[0.5111734867095947, 0.516891598701477, 0.5118528008460999]

In [None]:
correctness_scores

[1.257956936955452, 1.184665946289897, 1.1677750460803509]

In [None]:
simplicity_scores

[2, 3, 2, 2, 3, 2]

In [None]:
processed_outputs

[[{'diagnosis_list': [{'original': 'Ascites from Portal HTN',
     'new': 'Abdominal swelling from high blood pressure in liver veins'}],
   'medication_list': [{'med_name': 'Albuterol Inhaler',
     'regimen': '2 puffs inhaled every 4 hours as needed',
     'status': 'Continued',
     'purpose': 'help with wheezing and breathing'},
    {'med_name': 'Emtricitabine-Tenofovir (Truvada)',
     'regimen': '1 tablet by mouth daily',
     'status': 'Continued',
     'purpose': 'treat HIV infection'},
    {'med_name': 'Furosemide',
     'regimen': '40 mg by mouth daily',
     'status': 'Continued',
     'purpose': 'remove extra fluid from body'},
    {'med_name': 'Ipratropium Bromide Nebulizer',
     'regimen': '1 nebulizer treatment every 6 hours as needed',
     'status': 'Continued',
     'purpose': 'help with breathing and wheezing'},
    {'med_name': 'Nicotine Patch',
     'regimen': '14 mg patch daily',
     'status': 'Continued',
     'purpose': 'help stop smoking'},
    {'med_name': '

# Re-process for UI

In [11]:
basic = processed_outputs["basic"][0]
intermediate = processed_outputs["intermediate"][0]
advanced = processed_outputs["advanced"][0]

In [12]:
output = {}
keep = ["regimen", "purpose"]
for b_dictionary, i_dictionary, a_dictionary in zip(basic["medication_list"], intermediate["medication_list"], advanced["medication_list"]):
  output[b_dictionary["med_name"]] = {"basic": "\n".join([value for key, value in b_dictionary.items() if key in keep]), "intermediate": "\n".join([value for key, value in i_dictionary.items() if key in keep]), "advanced": "\n".join([value for key, value in a_dictionary.items() if key in keep])}

In [13]:
for b_dictionary, i_dictionary, a_dictionary in zip(basic["diagnosis_list"], intermediate["diagnosis_list"], advanced["diagnosis_list"]):
  output[b_dictionary["original"]] = {"basic": b_dictionary["new"], "intermediate": i_dictionary["new"], "advanced": a_dictionary["new"]}

In [14]:
json.dump(output, open("finalized_output.json", "w"), indent=4)

In [15]:
list_of_actions = []
for item in basic["follow_up_instructions"]:
  list_of_actions.append(f"{item['action']} - {item['timeframe']}")

In [16]:
json.dump(list_of_actions, open("final_actions.json", "w"))

# UI Management

In [17]:
import pandas as pd

# Read into a DataFrame
with open("MIMIC-IV Notes Datathon v4 20251107(Sheet1).csv", 'r', errors='ignore') as f:
    df = pd.read_csv(f, nrows=10000, header=1)

# Select patient to display
pt_num = 1

# Get all medications for patient
meds = df.iloc[pt_num, 6:50]
meds = meds.tolist()
meds = pd.Series(meds).dropna().tolist()

# Get diagnoses for patient
diags = [df.iloc[pt_num, 51]]

# Get discharge instructions for patient
disch_inst = df.iloc[pt_num, 74]

In [29]:
print(meds)

['Acetaminophen 500 mg PO Q6H:PRN pain,fever', 'Albuterol Inhaler 2 PUFF IH Q6H:PRN wheezing, SOB', 'Calcium Carbonate 1250 mg PO BID', '___ (Truvada) 1 TAB PO DAILY', 'Furosemide 40 mg PO DAILY', 'Lactulose 15 mL PO TID', 'Raltegravir 400 mg PO BID', 'Rifaximin 550 mg PO BID', 'Sulfameth/Trimethoprim DS 1 TAB PO DAILY', 'Tiotropium Bromide 1 CAP IH DAILY ']


In [18]:
import json

# Structure data
data = {
    "Diagnoses": diags,
    "Medications": meds,
    "Discharge Instructions": (
        disch_inst
    ),
    "ActionableItems":  json.load(open("final_actions.json"))
}
data_json = json.dumps(data)

# Keywords and their explanations
keywords_json = json.load(open("finalized_output.json"))

In [19]:
from IPython.display import display, HTML


In [20]:
html_content = f"""
<div class="container">
    <h1>🏥 CareBridge: My Discharge Summary</h1>

    <!-- Font Size Control -->
    <div class="font-control">
        <label for="fontSize">Adjust font size:</label>
        <select id="fontSize">
            <option value="14px">Small</option>
            <option value="18px" selected>Medium</option>
            <option value="22px">Large</option>
            <option value="26px">Extra Large</option>
        </select>
    </div>

    <div class="card">
        <h2>🩺 Diagnoses</h2>
        <ul id="diagnoses-list"></ul>
    </div>

    <div class="card">
        <h2>💊 Medications</h2>
        <ul id="medications-list"></ul>
    </div>

    <div class="card">
        <h2>📋 Discharge Instructions</h2>
        <p id="instructions-text"></p>
    </div>

    <div class="card">
        <h2>✅ Actionable Items</h2>
        <ul id="actitems-list"></ul>
    </div>
</div>

<style>
.container {{
    font-family: 'Helvetica Neue', Arial, sans-serif;
    color: #1a1a1a;
    background: #f9fbfd;
    padding: 30px;
    border-radius: 16px;
    max-width: 900px;
    margin: 40px auto;
    box-shadow: 0 0 20px rgba(0,0,0,0.05);
    transition: font-size 0.2s ease;
}}
h1 {{
    color: #042a70;
    text-align: center;
    margin-bottom: 20px;
}}
.font-control {{
    text-align: center;
    margin-bottom: 20px;
}}
select {{
    margin-left: 10px;
    padding: 5px;
    border-radius: 6px;
    border: 1px solid #ccc;
}}
h2 {{
    color: #075099;
    border-bottom: 2px solid #004d99;
    padding-bottom: 5px;
    margin-bottom: 15px;
}}
ul {{
    list-style-type: none;
    padding-left: 0;
}}
li {{
    padding: 6px 0;
    border-bottom: 1px solid #eee;
}}
.card {{
    background: white;
    border-radius: 12px;
    padding: 20px 25px;
    margin-bottom: 25px;
    box-shadow: 0 2px 10px rgba(0,0,0,0.05);
}}
.keyword {{
    font-weight: 600;
    color: #006e78;
    cursor: pointer;
    position: relative;
    transition: color 0.2s;
}}
.keyword:hover {{
    color: #006e78;
}}
.tooltip {{
    display: none;
    position: absolute;
    top: 22px;
    left: 0;
    background: #ffffff;
    border: 1px solid #d4ddee;
    border-radius: 8px;
    padding: 8px;
    z-index: 100;
    width: 280px;
    box-shadow: 0 2px 12px rgba(0,0,0,0.15);
    animation: fadeIn 0.2s ease-in-out;
}}
@keyframes fadeIn {{
    from {{opacity: 0; transform: translateY(-4px);}}
    to {{opacity: 1; transform: translateY(0);}}
}}
.tooltip button {{
    background: #e6f0ff;
    border: none;
    border-radius: 6px;
    padding: 4px 8px;
    margin: 2px;
    cursor: pointer;
    font-size: inherit;
    transition: background 0.2s;
}}
.tooltip button:hover {{
    background: #cce0ff;
}}
.tooltip p {{
    margin-top: 6px;
    font-size: inherit;
    color: #333;
}}
input[type="checkbox"] {{
    transform: scale(1.2);
    margin-right: 10px;
    cursor: pointer;
}}
</style>
<script>
const data = {data_json};
const keywords = {keywords_json};

function highlightKeywords(text) {{
    for (const key in keywords) {{
        const escapedKey = key.replace(/[.*+?^${{}}()|[\\]\\\\]/g, '\\\\$&');
        const regex = new RegExp(escapedKey, "gi");
        text = text.replace(regex, (match) => {{
            return `<span class="keyword">${{match}}
                        <div class="tooltip">
                            <div style='font-weight:600; margin-bottom:4px;'>Explain:</div>
                            <button onclick="this.closest('.tooltip').querySelector('p').innerText=keywords['${{key}}'].basic">Basic</button>
                            <button onclick="this.closest('.tooltip').querySelector('p').innerText=keywords['${{key}}'].intermediate">Intermediate</button>
                            <button onclick="this.closest('.tooltip').querySelector('p').innerText=keywords['${{key}}'].advanced">Advanced</button>
                            <p></p>
                        </div>
                    </span>`;
        }});
    }}
    return text;
}}

// Add hover delay to tooltips
function addTooltipDelay() {{
    const tooltips = document.querySelectorAll('.tooltip');
    tooltips.forEach(t => {{
        let timeout;
        const parent = t.parentElement;
        parent.addEventListener('mouseenter', () => {{
            clearTimeout(timeout);
            t.style.display = 'block';
        }});
        parent.addEventListener('mouseleave', () => {{
            timeout = setTimeout(() => {{ t.style.display = 'none'; }}, 300);
        }});
        t.addEventListener('mouseenter', () => {{
            clearTimeout(timeout);
        }});
        t.addEventListener('mouseleave', () => {{
            timeout = setTimeout(() => {{ t.style.display = 'none'; }}, 300);
        }});
    }});
}}

// Populate Diagnoses
const diagList = document.getElementById("diagnoses-list");
data.Diagnoses.forEach(d => {{
    const diag = document.createElement("li");
    diag.innerHTML = highlightKeywords(d);
    diagList.appendChild(diag);
}});

// Populate Medications
const medList = document.getElementById("medications-list");
data.Medications.forEach(m => {{
    const med = document.createElement("li");
    med.innerHTML = highlightKeywords(m);
    medList.appendChild(med);
}});

// Populate Discharge Instructions
document.getElementById("instructions-text").innerHTML = highlightKeywords(data["Discharge Instructions"]);

// Populate Actionable Items
const actItemsList = document.getElementById("actitems-list");
data.ActionableItems.forEach(a => {{
    const actItem = document.createElement("li");
    actItem.innerHTML = `<label><input type="checkbox"> ${{highlightKeywords(a)}}</label>`;
    actItemsList.appendChild(actItem);
}});

// Apply tooltip hover delay
addTooltipDelay();

// --- Font Size Control ---
document.getElementById("fontSize").addEventListener("change", function() {{
    document.querySelector('.container').style.fontSize = this.value;
}});
</script>
"""

display(HTML(html_content))
