# Medical-Aware Tokenizer Demo (OpenMed)

Compare default WordPiece vs medical-aware pre-tokenizer on biomedical text, and show CLI usage.

In [None]:
import os
from openmed import analyze_text
from openmed.core import OpenMedConfig

text = (
    "62-year-old male with B-cell ALL day +5 post CAR-T (tisagenlecleucel) "
    "developed IL-6-mediated cytokine storm with Tmax 39.8C, tachycardia 128, "
    "hypotension 88/54 requiring norepinephrine 0.08 mcg/kg/min. Started on "
    "tocilizumab 8 mg/kg IV x1 and stress-dose hydrocortisone; blood cultures "
    "pending, broad-spectrum piperacillin-tazobactam running. COVID-19 positive "
    "by PCR; oxygen saturation 88% on 40% FiO2 via HFNC, escalating to 60% with "
    "flow 50 L/min. Ferritin 5200, CRP 184, D-dimer 2.1; CXR with bibasilar "
    "opacities suggesting viral pneumonia."
)

cfg_on = OpenMedConfig(use_medical_tokenizer=True)
cfg_off = OpenMedConfig(use_medical_tokenizer=False)

res_on = analyze_text(
    text, model_name="oncology_detection_superclinical", config=cfg_on
)
res_off = analyze_text(
    text, model_name="oncology_detection_superclinical", config=cfg_off
)

def to_rows(result):
    return [(ent.label, ent.text, float(ent.confidence)) for ent in result.entities]


rows_on = to_rows(res_on)
rows_off = to_rows(res_off)

print("With medical tokenizer:")
for r in rows_on:
    print(r)

print("Without medical tokenizer:")
for r in rows_off:
    print(r)

In [None]:
# Show CLI example (string, not executed here)
cli_cmd = (
    "openmed analyze --text "
    "COVID-19 patient on IL-6 inhibitor"
    " --no-medical-tokenizer"
)
print("CLI example:", cli_cmd)