# Patient Summary Smoke Test (Colab Friendly)

This notebook sets up an end-to-end sanity check for the summarization pipeline described in the paper. Instead of cloning GitHub from inside Colab, we copy the local repository (and datasets) into the Colab VM, install the light dependencies, and then run a single-example generation pass with each model family (LED, Llama 2, GPT-4). Use it whenever you need to confirm that the tooling still works after making local changes.

## Workflow

1. Upload/sync this repository (including the preprocessed MIMIC data) to Google Drive ahead of time. I keep it under `MyDrive/patient_summaries_with_llms`, but you can change the path in the cell below.
2. Open this notebook in Google Colab, switch to a GPU runtime, and run the cells sequentially.
3. Each model section only touches a single JSONL example so we can focus on plumbing rather than performance. You'll still need the appropriate credentials/tokens for private Hugging Face models and GPT-4.

In [1]:
# @title (Optional) Verify GPU availability
!nvidia-smi || echo "GPU not visible; switch Colab runtime to GPU for Llama/LED runs."

Thu Dec  4 04:07:13 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off |   00000000:00:04.0 Off |                    0 |
| N/A   28C    P0             43W /  400W |       0MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

In [5]:
from google.colab import drive  # type: ignore
drive.mount('/content/drive', force_remount=True)

ValueError: mount failed

In [3]:
# @title Sync the local repository from Google Drive (no git clone)
from pathlib import Path
import os
import importlib.util

COLAB = importlib.util.find_spec("google.colab") is not None
if COLAB:
    from google.colab import drive  # type: ignore
    drive.mount('/content/drive', force_remount=True)
    REPO_IN_DRIVE = Path('/content/drive/Othercomputers/My Mac/cs598/patient_summaries_with_llms')  # @param {type:"string"}
    if not REPO_IN_DRIVE.exists():
        raise FileNotFoundError(f"Upload/sync the local repo to {REPO_IN_DRIVE} first.")
    TARGET_DIR = Path('/content/patient_summaries_with_llms')
    TARGET_DIR.mkdir(parents=True, exist_ok=True)
    os.system(f"rsync -a --delete '{REPO_IN_DRIVE}/' '{TARGET_DIR}/'")
    %cd /content/patient_summaries_with_llms
else:
    print("Running outside Colab; using the current working directory.")
    TARGET_DIR = Path.cwd()

KeyboardInterrupt: 

In [None]:
# @title Install Python dependencies (once per Colab VM)%pip install -q torch==2.1.2+cu118 torchvision==0.16.2+cu118 torchaudio==2.1.2+cu118 --extra-index-url https://download.pytorch.org/whl/cu118%pip install -q transformers==4.39.3 datasets==2.18.0 accelerate==0.28.0 peft==0.10.0 trl==0.7.10 sentencepiece evaluate rouge-score sacrebleu wandb guidance==0.1.10 pyyaml

In [None]:
# @title Create single-example JSONL slices for all smoke tests
import json
import itertools
import shutil
from pathlib import Path

REPO_ROOT = Path('.')
DATA_ROOT = REPO_ROOT / 'data' / 'ann-pt-summ' / '1.0.1' / 'mimic-iv-note-ext-di-bhc' / 'dataset'
assert DATA_ROOT.exists(), f"Missing data directory: {DATA_ROOT}"

SPLIT_FILENAME = 'valid.json'  # @param {type:"string"}
NUM_EXAMPLES = 1  # @param {type:"integer"}
PROMPT_EXAMPLES = 3  # @param {type:"integer"}

split_path = DATA_ROOT / SPLIT_FILENAME
SUBSET_DIR = Path('/content/data_subsets') if Path('/content').exists() else REPO_ROOT / 'data_subsets'
SUBSET_DIR.mkdir(parents=True, exist_ok=True)
SMOKE_JSONL = SUBSET_DIR / 'smoke_valid.jsonl'

with split_path.open() as src, SMOKE_JSONL.open('w') as dst:
    for line in itertools.islice(src, NUM_EXAMPLES):
        dst.write(line)

print(f"Wrote {NUM_EXAMPLES} example(s) to {SMOKE_JSONL}")

GPT4_DATA_DIR = REPO_ROOT / 'gpt-4' / 'summarization_data'
GPT4_DATA_DIR.mkdir(parents=True, exist_ok=True)
IN_CONTEXT_FILE = GPT4_DATA_DIR / 'smoke_0_in-context.json'
TEST_FILE = GPT4_DATA_DIR / 'smoke_0_test.json'

train_path = DATA_ROOT / 'train.json'
with train_path.open() as src, IN_CONTEXT_FILE.open('w') as dst:
    for line in itertools.islice(src, PROMPT_EXAMPLES):
        dst.write(line)

shutil.copyfile(SMOKE_JSONL, TEST_FILE)
print(f"Prepared GPT-4 smoke files under {GPT4_DATA_DIR}")

In [None]:
# @title Peek at the sample example
import json
with open(SMOKE_JSONL) as f:
    example = json.loads(next(f))
example

In [None]:
# @title LED-base: generate one summary via run_summarization.py
import os
from pathlib import Path

LED_OUTPUT = Path('/content/led_smoke_outputs') if Path('/content').exists() else Path('led_smoke_outputs')
LED_OUTPUT.mkdir(parents=True, exist_ok=True)
os.environ['WANDB_MODE'] = 'offline'
os.environ['WANDB_PROJECT'] = 'patient-summary-smoke'

!python summarization/run_summarization.py     --model_name_or_path allenai/led-base-16384     --do_predict     --test_file {SMOKE_JSONL}     --text_column text     --summary_column summary     --output_dir {LED_OUTPUT}     --per_device_eval_batch_size 1     --predict_with_generate     --max_source_length 2048     --max_target_length 256     --max_test_samples 1     --report_to none

In [None]:
# @title Inspect LED prediction
from pathlib import Path

pred_file = LED_OUTPUT / 'generated_predictions.txt'
print(pred_file.read_text() if pred_file.exists() else 'No prediction file produced.')

In [None]:
# @title Llama 2 7B (4-bit inference) on the same exampleimport osimport jsonimport torchfrom transformers import AutoModelForCausalLM, AutoTokenizerLLAMA_7B_MODEL = 'meta-llama/Llama-2-7b-chat-hf'  # @param {type:"string"}HF_TOKEN = os.environ.get('HF_TOKEN', None)bnb_config = BitsAndBytesConfig(    load_in_4bit=True,    bnb_4bit_compute_dtype=torch.float16,    bnb_4bit_quant_type='nf4',    bnb_4bit_use_double_quant=True,)tokenizer = AutoTokenizer.from_pretrained(LLAMA_7B_MODEL, use_fast=False, token=HF_TOKEN)if tokenizer.pad_token is None:    tokenizer.pad_token = tokenizer.eos_tokenmodel = AutoModelForCausalLM.from_pretrained(    LLAMA_7B_MODEL,    device_map='auto',    quantization_config=bnb_config,    torch_dtype=torch.float16,    token=HF_TOKEN,)model.eval()with open(SMOKE_JSONL) as f:    sample = json.loads(next(f))prompt = ("You are a helpful clinician writing plain-language discharge instructions."          +    f"Brief Hospital Course:{sample['text']}" + "Patient summary:")inputs = tokenizer(prompt, return_tensors='pt').to(model.device)with torch.inference_mode():    generation = model.generate(**inputs, max_new_tokens=256, do_sample=False)result = tokenizer.decode(generation[0], skip_special_tokens=True)summary = result.split('Patient summary:')[-1].strip()print(summary)

In [None]:
# @title (Optional) Llama 2 70B hook — requires >=80GB GPURUN_LLAMA_70B = False  # @param {type:"boolean"}LLAMA_70B_MODEL = 'meta-llama/Llama-2-70b-chat-hf'  # @param {type:"string"}if RUN_LLAMA_70B:    import os    import json    import torch    from transformers import AutoModelForCausalLM, AutoTokenizer    HF_TOKEN = os.environ.get('HF_TOKEN', None)    bnb_config = BitsAndBytesConfig(        load_in_4bit=True,        bnb_4bit_compute_dtype=torch.float16,        bnb_4bit_quant_type='nf4',        bnb_4bit_use_double_quant=True,    )    tokenizer = AutoTokenizer.from_pretrained(LLAMA_70B_MODEL, use_fast=False, token=HF_TOKEN)    if tokenizer.pad_token is None:        tokenizer.pad_token = tokenizer.eos_token    model = AutoModelForCausalLM.from_pretrained(        LLAMA_70B_MODEL,        device_map='auto',        quantization_config=bnb_config,        torch_dtype=torch.float16,        token=HF_TOKEN,    )    model.eval()    with open(SMOKE_JSONL) as f:        sample = json.loads(next(f))    prompt = (        "You are a helpful clinician writing plain-language discharge instructions."        f"Brief Hospital Course:{sample['text']}"        "Patient summary:"    )    inputs = tokenizer(prompt, return_tensors='pt').to(model.device)    with torch.inference_mode():        generation = model.generate(**inputs, max_new_tokens=256, do_sample=False)    result = tokenizer.decode(generation[0], skip_special_tokens=True)    summary = result.split('Patient summary:')[-1].strip()    print(summary)else:    print('Skipping 70B run. Enable RUN_LLAMA_70B once you have enough GPU memory.')

In [None]:
# @title GPT-4 single-example run (requires gpt-4/config.yaml with API credentials)
import os
from pathlib import Path

GPT4_OUTPUT_DIR = Path('/content/gpt4_smoke_outputs') if Path('/content').exists() else Path('gpt4_smoke_outputs')
GPT4_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

%cd gpt-4
!python run_summarization.py --task_id 0 --prompt_id 3 --model_name gpt-4 --n_shot 0 --what_for smoke --save_path {GPT4_OUTPUT_DIR / 'gpt4_smoke.jsonl'} --verbose
%cd -

In [None]:
# @title Inspect GPT-4 output
from pathlib import Path

gpt4_results = GPT4_OUTPUT_DIR / 'gpt4_smoke.jsonl'
if gpt4_results.exists():
    with gpt4_results.open() as f:
        print(f.read())
else:
    print('Run the GPT-4 cell after adding config.yaml with valid API keys.')