# Medical-Aware Tokenizer Demo (OpenMed)

Compare default WordPiece vs medical-aware pre-tokenizer on biomedical text, and show CLI usage.

In [None]:
import os
from openmed import analyze_text
from openmed.core import OpenMedConfig

text = ("IL-6-mediated cytokine storm post-CAR-T; given tocilizumab 8mg/kg; "
        "COVID-19+ patient with O2 sat 88% on 40% FiO2 via HFNC.")

cfg_on = OpenMedConfig(use_medical_tokenizer=True)
cfg_off = OpenMedConfig(use_medical_tokenizer=False)

res_on = analyze_text(text, model_name='oncology_detection_superclinical', config=cfg_on)
res_off = analyze_text(text, model_name='oncology_detection_superclinical', config=cfg_off)

def to_rows(result):
    return [(ent.label, ent.text, float(ent.confidence)) for ent in result.entities]

rows_on = to_rows(res_on)
rows_off = to_rows(res_off)

print('With medical tokenizer:')
for r in rows_on: print(r)

print('
Without medical tokenizer:')
for r in rows_off: print(r)

# Show CLI example (string, not executed here)
cli_cmd = ("openmed analyze --text ""COVID-19 patient on IL-6 inhibitor"" --no-medical-tokenizer")
print('
CLI example:', cli_cmd)
