# Medical Insight Extraction from Clinical Reports

In [2]:
# !pip install -q datasets
# !pip install -q evaluate
# !pip install -q rouge_score

In [4]:
from datasets import load_dataset
import textwrap
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from peft import LoraConfig, TaskType, get_peft_model, PeftModel
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments, DataCollatorForSeq2Seq
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm
import evaluate

In [12]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
WANDB_API_KEY = user_secrets.get_secret("WANDB_API_KEY")
HF_API_KEY = user_secrets.get_secret("HF_API_KEY")

In [8]:
import wandb

# https://wandb.ai/authorize
wandb.login(key=WANDB_API_KEY)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmk-mostafaessam[0m ([33mmk-mostafaessam-helwan-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [15]:
# HF_API_KEY

In [16]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Dataset

In [17]:
ds = load_dataset("hejazizo/mimic-iii")

README.md:   0%|          | 0.00/538 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/26.7M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/3.36M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/6.50M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/59320 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/7413 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/13057 [00:00<?, ? examples/s]

In [18]:
ds

DatasetDict({
    train: Dataset({
        features: ['prompt', 'impressions'],
        num_rows: 59320
    })
    validation: Dataset({
        features: ['prompt', 'impressions'],
        num_rows: 7413
    })
    test: Dataset({
        features: ['prompt', 'impressions'],
        num_rows: 13057
    })
})

In [19]:
def print_samples(ds, n_samples=1, split='test', shuffle=True, seed=None):
    ds = ds[split]
    
    if shuffle:
        ds = ds.shuffle(seed=seed) if seed else ds.shuffle()
    
    samples = ds.select(range(n_samples))

    for i, sample in enumerate(samples):
        print(">> Report:\n", textwrap.fill(sample['prompt'], width=100))
        print("\n>> Impression:\n", textwrap.fill(sample['impressions'], width=100))
        if n_samples > 1 and i+1 != n_samples:
            print("\n", "-" * 100, "\n")

In [22]:
print_samples(ds, 2, shuffle=False)

>> Report:
 the liver pancreas spleen adrenals and kidneys are normal the aorta is of normal caliber no enlarged
lymph node identified in the retroperitoneum there is bilateral hydronephrosis right side greater
than left

>> Impression:
 1 bilateral adenxal tumors with resultant bilateral hydronephrosis right side greater than left most
likely metastatic ___

 ---------------------------------------------------------------------------------------------------- 

>> Report:
 status post radical hysterectomy there is susceptibility artifact on the left lying between the
bladder and the rectum -- are there surgical clips in this location adjacent to the artifact there
is an ill-defined 16 x 29 mm area of abnormal soft tissue intensity hypointense on t1 with
intermediate intensity on t2 no discrete mass is seen no enlarge pelvic lymph nodes are detected the
bladder wall is not thickened a small amount of free fluid is present within the pelvis there is is
left hydroureter with dilatation of

In [23]:
def add_prefix_fn(example):
    return {
        "input": "extract: " + example["prompt"],
        "output": example["impressions"]
    }

In [24]:
ds_prefix = ds.map(
    add_prefix_fn,
    batched=False,
    remove_columns=ds['train'].column_names
)

Map:   0%|          | 0/59320 [00:00<?, ? examples/s]

Map:   0%|          | 0/7413 [00:00<?, ? examples/s]

Map:   0%|          | 0/13057 [00:00<?, ? examples/s]

In [25]:
ds_prefix

DatasetDict({
    train: Dataset({
        features: ['input', 'output'],
        num_rows: 59320
    })
    validation: Dataset({
        features: ['input', 'output'],
        num_rows: 7413
    })
    test: Dataset({
        features: ['input', 'output'],
        num_rows: 13057
    })
})

In [26]:
model_checkpoint = "t5-small"

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [27]:
def tokenize_fn(examples):
    model_inputs = tokenizer(
        examples["input"],
        # padding="max_length",
        truncation=True,
        max_length=512,
    )
    
    labels = tokenizer(
        examples["output"],
        # padding="max_length",
        truncation=True,
        max_length=256,
    )

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [28]:
ds_tokenized = ds_prefix.map(
    tokenize_fn,
    batched=True,
    remove_columns=ds_prefix['train'].column_names
)

Map:   0%|          | 0/59320 [00:00<?, ? examples/s]

Map:   0%|          | 0/7413 [00:00<?, ? examples/s]

Map:   0%|          | 0/13057 [00:00<?, ? examples/s]

In [31]:
ds_tokenized

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 59320
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 7413
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 13057
    })
})

# Modeling

In [39]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=64,
    lora_dropout=0.1,
    bias="none",
    target_modules="all-linear",
    task_type=TaskType.SEQ_2_SEQ_LM
)

In [40]:
model = get_peft_model(
    model=model,
    peft_config=lora_config
)

In [41]:
training_args = Seq2SeqTrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    logging_strategy="steps",
    logging_steps=500,
    save_strategy="epoch",
    save_total_limit=2,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    # learning_rate=2e-5,
    learning_rate = 5e-5,
    warmup_steps = 500,  # Optional, helps stabilize early training
    weight_decay=0.01,
    num_train_epochs=5,
    predict_with_generate=True,
    fp16=True,
    report_to="wandb",
    run_name="lora-mimic-finetune-v2",
)

In [42]:
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    padding=True,
    return_tensors='pt'
)

In [44]:
trainer = Seq2SeqTrainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    data_collator=data_collator,
    train_dataset=ds_tokenized["train"],
    eval_dataset=ds_tokenized["validation"],
)

  trainer = Seq2SeqTrainer(
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [45]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Epoch,Training Loss,Validation Loss
1,3.0135,2.585762
2,2.814,2.451436
3,2.7164,2.376428
4,2.6834,2.349041
5,2.6582,2.338226




TrainOutput(global_step=9270, training_loss=2.859317408359038, metrics={'train_runtime': 11506.1271, 'train_samples_per_second': 25.778, 'train_steps_per_second': 0.806, 'total_flos': 3.617155392838042e+16, 'train_loss': 2.859317408359038, 'epoch': 5.0})

### Charts and Plots: https://wandb.ai/mk-mostafaessam-helwan-university/huggingface?nw=nwusermkmostafaessam

# Testing

In [46]:
ex = ds_prefix['test'][0]

print(">> Report:", ex['input'], '\n\n')
print(">> Impression:", ex['output'])

>> Report: extract: the liver pancreas spleen adrenals and kidneys are normal the aorta is of normal caliber no enlarged lymph node identified in the retroperitoneum there is bilateral hydronephrosis right side greater than left 


>> Impression: 1 bilateral adenxal tumors with resultant bilateral hydronephrosis right side greater than left most likely metastatic ___


In [47]:
inputs = tokenizer(ex['input'], return_tensors='pt')#.to(model.device)
inputs

{'input_ids': tensor([[ 5819,    10,     8, 11501,  2131,  5045,     9,     7,     3,     7,
          4788,    35, 23563,     7,    11, 11546,     7,    33,  1389,     8,
             3,     9,   127,    17,     9,    19,    13,  1389,   212, 10661,
           150,     3, 30670, 25049,   150,   221,  4313,    16,     8,  9337,
          4267,  6948,   440,   132,    19, 24097,  7668,    29,    15, 31156,
             7,   159,   269,   596,  2123,   145,   646,     1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [48]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [49]:
inputs = inputs.to(device)
inputs

{'input_ids': tensor([[ 5819,    10,     8, 11501,  2131,  5045,     9,     7,     3,     7,
          4788,    35, 23563,     7,    11, 11546,     7,    33,  1389,     8,
             3,     9,   127,    17,     9,    19,    13,  1389,   212, 10661,
           150,     3, 30670, 25049,   150,   221,  4313,    16,     8,  9337,
          4267,  6948,   440,   132,    19, 24097,  7668,    29,    15, 31156,
             7,   159,   269,   596,  2123,   145,   646,     1]],
       device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}

In [50]:
outputs = model.generate(**inputs, max_new_tokens=200)
outputs

tensor([[    0,   209, 24097,  7668,    29,    15, 31156,     7,   159,   269,
           596,  2123,   145,   646,     1]], device='cuda:0')

In [51]:
tokenizer.decode(outputs[0], skip_special_tokens=True)

'1 bilateral hydronephrosis right side greater than left'

# Evaluation

In [77]:
from torch.utils.data import DataLoader

def get_yt_yp(model):
    data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
    
    dataloader = DataLoader(
        ds_tokenized["test"].shuffle().select(range(1000)),
        batch_size=8,
        collate_fn=data_collator
    )
    
    model.eval()
    predictions = []
    references = []
    
    for batch in tqdm(dataloader):
        input_ids = batch["input_ids"].to(model.device)
        attention_mask = batch["attention_mask"].to(model.device)
        labels = batch["labels"]
    
        with torch.no_grad():
            outputs = model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=128
            )
    
        decoded_preds = tokenizer.batch_decode(outputs, skip_special_tokens=True)
        
        labels = labels.clone()
        labels[labels == -100] = tokenizer.pad_token_id
        decoded_refs = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
        predictions.extend(decoded_preds)
        references.extend(decoded_refs)
        
        return references, predictions

In [None]:
references, predictions = get_yt_yp(model)

In [48]:
# !pip install rouge_score

In [53]:
import evaluate

rouge = evaluate.load("rouge")  # Recall-Oriented Understudy for Gisting Evaluation

results = rouge.compute(
    predictions=predictions,
    references=references
)

results

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

{'rouge1': 0.3232767395492883,
 'rouge2': 0.14866990456747228,
 'rougeL': 0.25468425796453587,
 'rougeLsum': 0.25502170521003165}

### 📝 ROUGE Evaluation Report

---

#### 🔍 What These Scores Mean:

- **ROUGE-1 (32.3%)**: Measures **unigram (word-level)** overlap. Decent for basic content similarity.
- **ROUGE-2 (14.8%)**: Measures **bigram** overlap. Lower, indicating less fluency or coherence.
- **ROUGE-L / ROUGE-Lsum (~25.5%)**: Measures the **longest common subsequence** — good for capturing sentence-level structure.

---

#### 🎯 Interpretation:

**If your task is something like:**

- **Feature Extraction**:  
  These scores are **in the typical range** for early or base models  
  _(e.g., many feature extraction baselines score ~0.2–0.4 ROUGE-1 on CNN/DailyMail)_.

- **Radiology Report Generation / Medical Domain**:  
  These scores are actually **quite reasonable**, because:
  - Medical text uses **domain-specific vocabulary**.
  - There are often **multiple valid ways to phrase** the same content.
  - **ROUGE** often **underrepresents semantic quality** in such contexts.

---


In [55]:
import evaluate

rouge = evaluate.load("bleu")  # Bilingual Evaluation Understudy

results = rouge.compute(
    predictions=predictions,
    references=references
)

results

Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

{'bleu': 0.08188674223759468,
 'precisions': [0.3962468749058707,
  0.17068853069971118,
  0.09378505721337223,
  0.05470198675496689],
 'brevity_penalty': 0.5999801181053808,
 'length_ratio': 0.6618752367471441,
 'translation_length': 33199,
 'reference_length': 50159}

### 📝 BLEU Evaluation Report

---

#### 📊 BLEU Score Summary:

- **BLEU Score**: **8.2%**
- **Precisions**:
  - 1-gram: **39.6%**
  - 2-gram: **17.1%**
  - 3-gram: **9.4%**
  - 4-gram: **5.5%**
- **Brevity Penalty**: **0.59**
- **Length Ratio**: **0.66**
  - Translation Length: **33,199**
  - Reference Length: **50,159**

---

#### 🔍 What These Scores Mean:

- **Low BLEU (~8%)**: Indicates **limited n-gram overlap** with references.
- **High 1-gram precision**: Shows the model captures **basic words** well.
- **Steep drop in higher n-grams**: Suggests difficulty in **generating fluent multi-word phrases** or full sentence structures.
- **High brevity penalty**: Model is **generating shorter outputs** than expected.

---

#### 🎯 Interpretation:

- For **radiology report generation**, low BLEU is not uncommon:
  - Multiple valid phrasings exist for the same findings.
  - BLEU is known to **undervalue semantic correctness** and **clinical acceptability**.
  - Consider complementing BLEU with **ROUGE**, **BERTScore**, or even **human evaluation**.

---


In [90]:
base_model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)

In [85]:
references, predictions = get_yt_yp(model=base_model)

  0%|          | 0/125 [00:05<?, ?it/s]


In [86]:
import evaluate

rouge = evaluate.load("rouge")  # Recall-Oriented Understudy for Gisting Evaluation

results = rouge.compute(
    predictions=predictions,
    references=references
)

results

{'rouge1': 0.249474221222722,
 'rouge2': 0.11572933593748251,
 'rougeL': 0.18151281294084895,
 'rougeLsum': 0.18215509159755788}

In [87]:
import evaluate

rouge = evaluate.load("bleu")  # Bilingual Evaluation Understudy

results = rouge.compute(
    predictions=predictions,
    references=references
)

results

{'bleu': 0.10307073594914497,
 'precisions': [0.24110671936758893,
  0.12449799196787148,
  0.07551020408163266,
  0.04979253112033195],
 'brevity_penalty': 1.0,
 'length_ratio': 1.3863013698630138,
 'translation_length': 506,
 'reference_length': 365}

# Saving the model

In [56]:
model.save_pretrained(save_directory='lora_adapter_only')

In [57]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
base_model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
peft_model = PeftModel.from_pretrained(model=base_model, model_id="/kaggle/working/lora_adapter_only")

In [58]:
peft_model.push_to_hub("MK-Mostafa/t5-small-mimic-lora-finetune-v1")

adapter_model.safetensors:   0%|          | 0.00/8.68M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/MK-Mostafa/t5-small-mimic-lora-finetune-v1/commit/781c63fbcfc92e6d3da04688c41f9cee023d7838', commit_message='Upload model', commit_description='', oid='781c63fbcfc92e6d3da04688c41f9cee023d7838', pr_url=None, repo_url=RepoUrl('https://huggingface.co/MK-Mostafa/t5-small-mimic-lora-finetune-v1', endpoint='https://huggingface.co', repo_type='model', repo_id='MK-Mostafa/t5-small-mimic-lora-finetune-v1'), pr_revision=None, pr_num=None)

In [59]:
tokenizer.push_to_hub("MK-Mostafa/t5-small-mimic-lora-finetune-v1")

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/MK-Mostafa/t5-small-mimic-lora-finetune-v1/commit/89a112db242b4e3ec045ee1de9317e73ac9de436', commit_message='Upload tokenizer', commit_description='', oid='89a112db242b4e3ec045ee1de9317e73ac9de436', pr_url=None, repo_url=RepoUrl('https://huggingface.co/MK-Mostafa/t5-small-mimic-lora-finetune-v1', endpoint='https://huggingface.co', repo_type='model', repo_id='MK-Mostafa/t5-small-mimic-lora-finetune-v1'), pr_revision=None, pr_num=None)

In [60]:
base_model.push_to_hub("MK-Mostafa/t5-small-mimic-lora-finetune-v1")

model.safetensors:   0%|          | 0.00/251M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/MK-Mostafa/t5-small-mimic-lora-finetune-v1/commit/d79cf8ad3be061566395605e76ca96ecf6573ffa', commit_message='Upload T5ForConditionalGeneration', commit_description='', oid='d79cf8ad3be061566395605e76ca96ecf6573ffa', pr_url=None, repo_url=RepoUrl('https://huggingface.co/MK-Mostafa/t5-small-mimic-lora-finetune-v1', endpoint='https://huggingface.co', repo_type='model', repo_id='MK-Mostafa/t5-small-mimic-lora-finetune-v1'), pr_revision=None, pr_num=None)

# Inference

In [61]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from peft import PeftModel
import torch

In [62]:
tokenizer = AutoTokenizer.from_pretrained("MK-Mostafa/t5-small-mimic-lora-finetune-v1")
base_model = AutoModelForSeq2SeqLM.from_pretrained("MK-Mostafa/t5-small-mimic-lora-finetune-v1")
model = PeftModel.from_pretrained(base_model, "MK-Mostafa/t5-small-mimic-lora-finetune-v1")

tokenizer_config.json:   0%|          | 0.00/20.8k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.47k [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/8.68M [00:00<?, ?B/s]

In [63]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [96]:
base_model = base_model.to(device)
base_model.eval()

model = model.to(device)
model.eval()

print( model.device )

cuda:0


In [70]:
inputs = tokenizer(ds_prefix['test'][1]['input'], return_tensors="pt")
inputs = inputs.to(device)

outputs = model.generate(**inputs, max_new_tokens=200)

tokenizer.decode(outputs[0], skip_special_tokens=True)

'1 no evidence of a stranding or hydronephrosis 2 ill-defined stranding on the left lying between the bladder and the rectum'

In [91]:
def generate_output(report, model, max_new_tokens=200):
    report = "extract: " + report
    inputs = tokenizer(
        report,
        padding=False,
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )

    inputs = inputs.to(device)
    
    outputs = model.generate(**inputs, max_new_tokens=max_new_tokens)
    
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return generated_text

In [92]:
ds['test'][1]['prompt']

'status post radical hysterectomy there is susceptibility artifact on the left lying between the bladder and the rectum -- are there surgical clips in this location adjacent to the artifact there is an ill-defined 16 x 29 mm area of abnormal soft tissue intensity hypointense on t1 with intermediate intensity on t2 no discrete mass is seen no enlarge pelvic lymph nodes are detected the bladder wall is not thickened a small amount of free fluid is present within the pelvis there is is left hydroureter with dilatation of the renal pelvis and prominence of the calyces the hydroureter extends down to the area of the susceptibility artifact and the adjoining area of abnormal soft tissue intensity the right renal collecting system is within normal limits comparison was made to ct dated ___ the area of soft tissue intensity corresponds to some ill-defined stranding seen at that time however no hydronephrosis was seen on the ___ ct scan'

In [98]:
generate_output("""
    status post radical hysterectomy there is susceptibility artifact on the left lying between
    the bladder and the rectum -- are there surgical clips in this location adjacent to the
    artifact there is an ill-defined 16 x 29 mm area of abnormal soft tissue intensity
    hypointense on t1 with intermediate intensity on t2 no discrete mass is seen no enlarge
    pelvic lymph nodes are detected the bladder wall is not thickened a small amount of free
    fluid is present within the pelvis there is is left hydroureter with dilatation of the renal
    pelvis and prominence of the calyces the hydroureter extends down to the area of the
    susceptibility artifact and the adjoining area of abnormal soft tissue intensity the right
    renal collecting system is within normal limits comparison was made to ct dated ___ the area
    of soft tissue intensity corresponds to some ill-defined stranding seen at that time however
    no hydronephrosis was seen on the ___ ct scan
""",
    model=base_model
)

': status post radical hysterectomy there is susceptibility artifact on the left lying between the bladder and the rectum -- are there surgical clips adjacent to the artifact there is an ill-defined 16 x 29 mm area of abnormal soft tissue intensity hypointense on t1 with intermediate intensity on t2 no discrete mass is seen no enlarge pelvic lymph nodes are detected the bladder wall is not thickened a small amount of free fluid is present within the pelvis there'

In [93]:
generate_output("""
    status post radical hysterectomy there is susceptibility artifact on the left lying between
    the bladder and the rectum -- are there surgical clips in this location adjacent to the
    artifact there is an ill-defined 16 x 29 mm area of abnormal soft tissue intensity
    hypointense on t1 with intermediate intensity on t2 no discrete mass is seen no enlarge
    pelvic lymph nodes are detected the bladder wall is not thickened a small amount of free
    fluid is present within the pelvis there is is left hydroureter with dilatation of the renal
    pelvis and prominence of the calyces the hydroureter extends down to the area of the
    susceptibility artifact and the adjoining area of abnormal soft tissue intensity the right
    renal collecting system is within normal limits comparison was made to ct dated ___ the area
    of soft tissue intensity corresponds to some ill-defined stranding seen at that time however
    no hydronephrosis was seen on the ___ ct scan
""",
    model=model
)

'1 no evidence of a stranding or hydronephrosis 2 ill-defined stranding on the left lying between the bladder and the rectum'