In [1]:
import os
import json
import sys
import polars as pl
from pathlib import Path
from rich.console import Console
from dotenv import load_dotenv

load_dotenv()

cons = Console()

In [2]:
import torch

cuda_available = torch.cuda.is_available()
cons.print(f"CUDA available: {cuda_available}")
if cuda_available:
    cons.print(f"CUDA device: {torch.cuda.get_device_name(0)}")

## Load Dataset

In [None]:
from huggingface_hub import login

# Get token from environment variable instead of hardcoding
hf_token = os.getenv("HF_TOKEN")
if hf_token:
    login(token=hf_token)
else:
    print("Warning: HF_TOKEN not found in environment variables")

In [4]:
from datasets import load_dataset

ds = load_dataset("schorndorfer/mdace-inpatient")
cons.print(f"Dataset loaded")
test_df = pl.from_dataframe(ds['test'].to_pandas())
cons.print(f"Test DataFrame shape: {test_df.shape}")

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


README.md:   0%|          | 0.00/24.0 [00:00<?, ?B/s]

inpatient_icd10cm_train.parquet:   0%|          | 0.00/1.49M [00:00<?, ?B/s]

inpatient_icd10cm_val.parquet:   0%|          | 0.00/1.20M [00:00<?, ?B/s]

inpatient_icd10cm_test.parquet:   0%|          | 0.00/1.18M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2197 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/732 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/733 [00:00<?, ? examples/s]

## Load MedGemma

In [5]:
pip install --upgrade --quiet accelerate bitsandbytes transformers

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h

In [37]:
from transformers import BitsAndBytesConfig
import torch

google_colab = "google.colab" in sys.modules and not os.environ.get("VERTEX_PRODUCT")

model_variant = "4b-it"  # @param ["4b-it", "27b-it", "27b-text-it"]
model_variant = "27b-text-it"  # @param ["4b-it", "27b-it", "27b-text-it"]
model_id = f"google/medgemma-{model_variant}"

use_quantization = True  # @param {type: "boolean"}

# @markdown Set `is_thinking` to `True` to turn on thinking mode. **Note:** Thinking is supported for the 27B variants only.
is_thinking = True  # @param {type: "boolean"}

# If running a 27B variant in Google Colab, check if the runtime satisfies
# memory requirements
if "27b" in model_variant and google_colab:
    if not ("A100" in torch.cuda.get_device_name(0) and use_quantization):
        raise ValueError(
            "Runtime has insufficient memory to run a 27B variant. "
            "Please select an A100 GPU and use 4-bit quantization."
        )

model_kwargs = dict(
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

if use_quantization:
    model_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_4bit=True)

In [38]:
from transformers import pipeline

if "text" in model_variant:
    pipe = pipeline("text-generation", model=model_id, model_kwargs=model_kwargs)
else:
    pipe = pipeline("image-text-to-text", model=model_id, model_kwargs=model_kwargs)

pipe.model.generation_config.do_sample = False

config.json:   0%|          | 0.00/931 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/67.9k [00:00<?, ?B/s]

Fetching 11 files:   0%|          | 0/11 [00:00<?, ?it/s]

model-00007-of-00011.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00011.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00008-of-00011.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00004-of-00011.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00005-of-00011.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00001-of-00011.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

model-00006-of-00011.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00011.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00009-of-00011.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00010-of-00011.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00011-of-00011.safetensors:   0%|          | 0.00/4.59G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/11 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.16M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

chat_template.jinja:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

Device set to use cuda:0


## Run Inference

In [None]:
system_prompt = f"""
You are an expert clinical coder. From the following phrase extracted from a medical note, along with the entire note as context,
identify the most relevant ICD10-CM diagnosis codes

Instructions:
- Include all potential relevant codes
- Include a code only once

Output format:
- **Code**: <code>, **Description**: <description>

Just output a list of ICD-10 codes and descriptions, in the format described above.

The input format will be:

Input phrase:
<phrase to code>

Full medical note context:

<full medical note context>

###end###

Output should be in the following format:
<code 1>, <description 1>
<code 2>, <description 2>
<code 3>, <description 3>
"""

In [46]:
row_dict = test_df.row(2, named=True)
input_phrase = row_dict['covered_text']
clinical_note = row_dict['text']
prompt = f"""
Input phrase:
{input_phrase}
\n\n
Full medical note context:
{clinical_note}
\n\n
###end###
"""
row_dict

{'hadm_id': 122180,
 'comment': 'OP notes missing-unable to validate PCS codes',
 'note_id': 457739,
 'category': 'Physician',
 'description': 'Intensivist Note',
 'text': "SICU\n   HPI:\n   81F c/o headache, neck pain found to have SAH, cardiac arrest at OSH.\n   Chief complaint:\n   PMHx:\n   PMH: CAD s/p CABG, HTN, high cholesterol\n   PSH: CABG, hysterectomy, cholecystectomy\n   Current medications:\n   1. 2. Acetaminophen 3. Albuterol Inhaler 4. Bisacodyl 5. Calcium\n   Gluconate 6. CefazoLIN 7. Chlorhexidine Gluconate 0.12% Oral Rinse 8.\n   Famotidine 9. Fentanyl Citrate 10. Furosemide 11. Furosemide 12.\n   Heparin 13. HydrALAzine 14. Insulin 15. Labetalol 16. Lisinopril 17.\n   Magnesium Sulfate 18. Metoprolol Tartrate 19. Nimodipine 20.\n   NiCARdipine 21. Phenytoin 22. Pneumococcal Vac Polyvalent 23. Potassium\n   Chloride 24. Potassium Phosphate 25. Propofol\n   26. Senna 27. Simvastatin\n   24 Hour Events:\n   - continuing to hold Dilantin\n   - increased lisinopril\n   - 

In [47]:
from IPython.display import Markdown

role_instruction = "You are an expert clinical coder. From the following medical note, identify the most relevant ICD-10 codes"

if "27b" in model_variant and is_thinking:
    system_instruction = f"SYSTEM INSTRUCTION: think silently if needed. {role_instruction}"
    max_new_tokens = 1500
else:
    system_instruction = role_instruction
    max_new_tokens = 500

messages = [
    {
        "role": "system",
        "content": [{"type": "text", "text": system_prompt}]
    },
    {
        "role": "user",
        "content": [{"type": "text", "text": prompt}]
    }
]

In [48]:
output = pipe(messages, max_new_tokens=max_new_tokens)
response = output[0]["generated_text"][-1]["content"]

display(Markdown(f"---\n\n**[ User ]**\n\n{prompt}\n\n---"))
display(Markdown(f"**[ MedGemma ]**\n\n{response}\n\n---"))

---

**[ User ]**

SICU
   HPI:
   81F c/o headache, neck pain found to have SAH, cardiac arrest at OSH.
   Chief complaint:
   PMHx:
   PMH: CAD s/p CABG, HTN, high cholesterol
   PSH: CABG, hysterectomy, cholecystectomy
   Current medications:
   1. 2. Acetaminophen 3. Albuterol Inhaler 4. Bisacodyl 5. Calcium
   Gluconate 6. CefazoLIN 7. Chlorhexidine Gluconate 0.12% Oral Rinse 8.
   Famotidine 9. Fentanyl Citrate 10. Furosemide 11. Furosemide 12.
   Heparin 13. HydrALAzine 14. Insulin 15. Labetalol 16. Lisinopril 17.
   Magnesium Sulfate 18. Metoprolol Tartrate 19. Nimodipine 20.
   NiCARdipine 21. Phenytoin 22. Pneumococcal Vac Polyvalent 23. Potassium
   Chloride 24. Potassium Phosphate 25. Propofol
   26. Senna 27. Simvastatin
   24 Hour Events:
   - continuing to hold Dilantin
   - increased lisinopril
   - fentanyl gtt started
   - diuresis with lasix 10 mg x1, 20 mg x1 (weight increasing, increasing
   vent requirements)
   - made DNR
   Allergies:
   No Known Drug Allergies
   Last dose of Antibiotics:
   Cefazolin - [**2107-6-2**] 02:26 PM
   Infusions:
   Fentanyl - 50 mcg/hour
   Nicardipine - 0.8 mcg/Kg/min
   Other ICU medications:
   Labetalol - [**2107-6-2**] 02:00 PM
   Furosemide (Lasix) - [**2107-6-2**] 08:52 PM
   Heparin Sodium (Prophylaxis) - [**2107-6-3**] 12:00 AM
   Hydralazine - [**2107-6-3**] 04:00 AM
   Other medications:
   Flowsheet Data as of  [**2107-6-3**] 05:33 AM
   Vital signs
   Hemodynamic monitoring
   Fluid balance
                                                                  24 hours
                                                             Since [**10**] a.m.
   Tmax: 37.9
C (100.3
   T current: 37.9
C (100.3
   HR: 85 (73 - 87) bpm
   BP: 138/53(77) {109/38(60) - 151/61(90)} mmHg
   RR: 30 (21 - 31) insp/min
   SPO2: 95%
   Heart rhythm: SR (Sinus Rhythm)
   Wgt (current): 99.7 kg (admission): 91 kg
   Height: 65 Inch
   CVP: 20 (14 - 22) mmHg
   ICP: 6 (4 - 9) mmHg
   Total In:
                                                                  2,617 mL
                                                                    364 mL
   PO:
   Tube feeding:
                                                                  1,256 mL
                                                                    279 mL
   IV Fluid:
                                                                  1,181 mL
                                                                     55 mL
   Blood products:
   Total out:
                                                                  1,433 mL
                                                                    286 mL
   Urine:
                                                                  1,262 mL
                                                                    245 mL
   NG:
   Stool:
   Drains:
                                                                    171 mL
                                                                     41 mL
   Balance:
                                                                  1,184 mL
                                                                     78 mL
   Respiratory support
   O2 Delivery Device: Endotracheal tube
   Ventilator mode: CPAP/PSV
   Vt (Spontaneous): 435 (435 - 435) mL
   PS : 10 cmH2O
   RR (Spontaneous): 29
   PEEP: 8 cmH2O
   FiO2: 50%
   PIP: 15 cmH2O
   SPO2: 95%
   ABG: 7.49/23/77/18/-2
   Ve: 13 L/min
   PaO2 / FiO2: 154
   Physical Examination
   General Appearance: No acute distress, intubated, sedated
   HEENT: PERRL
   Cardiovascular: (Rhythm: Regular)
   Respiratory / Chest: (Expansion: Symmetric), (Breath Sounds: CTA
   bilateral : )
   Abdominal: Soft, Non-distended, Non-tender, Bowel sounds present
   Left Extremities: (Edema: 1+), (Pulse - Dorsalis pedis: Present)
   Right Extremities: (Edema: 1+), (Pulse - Dorsalis pedis: Present)
   Neurologic: (Responds to: Noxious stimuli), No(t) Moves all
   extremities, (RUE: No movement), (LUE: Weakness), (RLE: No movement),
   (LLE: No movement), Sedated
   Labs / Radiology
   260 K/uL
   9.5 g/dL
   177 mg/dL
   1.0 mg/dL
   18 mEq/L
   4.1 mEq/L
   42 mg/dL
   116 mEq/L
   143 mEq/L
   28.6 %
   12.5 K/uL
        [image002.jpg]
                             [**2107-5-31**]  03:28 AM
                             [**2107-6-1**]  02:47 AM
                             [**2107-6-1**]  03:04 AM
                             [**2107-6-1**]  06:20 AM
                             [**2107-6-1**]  02:14 PM
                             [**2107-6-2**]  03:36 AM
                             [**2107-6-2**]  06:20 PM
                             [**2107-6-2**]  09:28 PM
                             [**2107-6-3**]  04:14 AM
                             [**2107-6-3**]  04:26 AM
   WBC
   14.9
   14.6
   12.5
   Hct
   31.1
   30.1
   28.6
   Plt
   249
   244
   260
   Creatinine
   0.8
   0.8
   1.0
   TCO2
   20
   17
   20
   20
   20
   21
   18
   Glucose
   138
   149
   131
   146
   144
   177
   Other labs: PT / PTT / INR:12.7/24.3/1.1, CK / CK-MB / Troponin
   T:116/16/0.55, ALT / AST:20/28, Alk-Phos / T bili:67/0.5, Lactic
   Acid:2.2 mmol/L, Albumin:3.0 g/dL, LDH:226 IU/L, Ca:8.2 mg/dL, Mg:2.5
   mg/dL, PO4:1.9 mg/dL
   Assessment and Plan
   RESPIRATORY FAILURE, ACUTE (NOT ARDS/[**Doctor Last Name 11**]), INTRACEREBRAL HEMORRHAGE
   (ICH)
   Assessment and Plan: 81F c/o headache, neck pain found to have SAH,
   cardiac arrest at OSH.
   Neurologic: Neuro checks Q: 1 hr, ICP monitor, Pain controlled, EVD at
   10cm above tragus, continue nimodipine, recheck dilantin level today
   -?clip aneurysm on Monday vs. CMO
 neurosurgery discussing with
   family, fentanyl gtt
   Cardiovascular: Beta-blocker, Statins, Keep SBP 100-140, on metoprolol
   37.5'''/lisinopril 10'/hydral/ nicardipine drip (wean as tolerated), no
   ASA.  Will start lasix
   Pulmonary: Cont ETT, (Ventilator mode: CPAP + PS), needing increased
   PEEP, will diurese today lasix
   Gastrointestinal / Abdomen: NPO, TF.bowel regimen
   Nutrition: Tube feeding, NPO
   Renal: Foley, Adequate UO, Likely needs diuresis - weight increasing,
   increasing vent requirements. Goal 1-2 liters negative today with lasix
   gtt for gentle diuresis.
   Hematology: daily HCT
   Endocrine: SSI
   Infectious Disease: Cefazolin while drains in.  WBC trending down.
   Lines / Tubes / Drains: Foley, OGT, ETT, A-line, RIJ central line, EVD
   x2
   Wounds: c/d/i
   Imaging: CXR today
   Fluids: KVO
   Consults: Neuro surgery
   Billing Diagnosis: Cardiac arrest, Acute MI / Ischemia, (Hemorrhage,
   NOS: Sub-arachnoid), (Respiratory distress: Failure)
   ICU Care
   Nutrition:
   Replete with Fiber (Full) - [**2107-6-3**] 04:58 AM 55 mL/hour
   Glycemic Control:  Regular insulin sliding scale
   Lines:
   Arterial Line - [**2107-5-29**] 04:47 PM
   Multi Lumen - [**2107-5-29**] 04:48 PM
   ICP Catheter - [**2107-5-29**] 05:00 PM
   Prophylaxis:
   DVT: Boots, SQ UF Heparin
   Stress ulcer: H2 blocker
   VAP bundle:
   Comments:
   Communication: Patient discussed on interdisciplinary rounds , ICU
   Code status: Full code
   Disposition: ICU
   Total time spent: 32 minutes
   Patient is critically ill


---

**[ MedGemma ]**

**Code**: J96.00, **Description**: Acute respiratory failure
**Code**: J96.9, **Description**: Acute respiratory failure, unspecified
**Code**: R09.2, **Description**: Respiratory failure
**Code**: J81.0, **Description**: Acute respiratory distress syndrome
**Code**: I63.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.0, **Description**: Cerebral infarction, unspecified
**Code**: I60.1, **Description**: Cerebral infarction, unspecified
**Code**: I60.2, **Description**: Cerebral infarction, unspecified
**Code**: I60.3, **Description**: Cerebral infarction, unspecified
**Code**: I60.4, **Description**: Cerebral infarction, unspecified
**Code**: I60.5, **Description**: Cerebral infarction, unspecified
**Code**: I60.6, **Description**: Cerebral infarction, unspecified
**Code**: I60.7, **Description**: Cerebral infarction, unspecified
**Code**: I60.8, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I60.9, **Description**: Cerebral infarction, unspecified
**Code**: I6

---