In [1]:
!pip install --upgrade streamlit transformers spacy pandas
!python -m spacy download en_core_web_sm


Collecting streamlit
  Downloading streamlit-1.47.1-py3-none-any.whl.metadata (9.0 kB)
Collecting transformers
  Downloading transformers-4.54.1-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Collecting pandas
  Downloading pandas-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.47.1-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [1]:
!pip install bitsandbytes accelerate

Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-c

In [2]:
%%writefile streamlit_app.py
import os
import streamlit as st
import pandas as pd

from transformers import (
    AutoTokenizer,
    AutoModelForTokenClassification,
    pipeline,
    BitsAndBytesConfig
)
import bitsandbytes as bnb

# Cache model loading so we only do it once
@st.cache(allow_output_mutation=True)
def load_pipelines():
    """
    Load two token‐classification pipelines:
      1) Prompt‐based using a 5.3B‐param Qwen in 8-bit
      2) Fine‐tuned version of the same model (if you have it locally)
    """
    # --- 1) Prompt‐based pipeline ---
    MODEL = "Qwen/Qwen3-5.3B"
    # Configure 8-bit quantization
    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
        llm_int8_threshold=6.0
    )
    # Load tokenizer & model
    prompt_tok = AutoTokenizer.from_pretrained(
        MODEL, trust_remote_code=True
    )
    prompt_model = AutoModelForTokenClassification.from_pretrained(
        MODEL,
        trust_remote_code=True,
        quantization_config=bnb_config,
        device_map="auto",       # auto-offload to GPU+CPU
    )
    # Build your HF pipeline on GPU
    prompt_pipe = pipeline(
        "token-classification",
        model=prompt_model,
        tokenizer=prompt_tok,
        aggregation_strategy="simple",
        device=0
    )

    # --- 2) Fine-tuned pipeline (optional) ---
    # If you have a checkpoint at ./qwen_finetuned_qwen3-5.3B
    ft_dir = "./qwen_finetuned_qwen3-5.3B"
    if os.path.isdir(ft_dir):
        ft_tok   = AutoTokenizer.from_pretrained(ft_dir, trust_remote_code=True)
        ft_model = AutoModelForTokenClassification.from_pretrained(
            ft_dir,
            trust_remote_code=True,
            quantization_config=bnb_config,
            device_map="auto"
        )
        ft_pipe  = pipeline(
            "token-classification",
            model=ft_model,
            tokenizer=ft_tok,
            aggregation_strategy="simple",
            device=0
        )
    else:
        ft_pipe = None

    return prompt_pipe, ft_pipe

# Load once at startup
prompt_pipe, ft_pipe = load_pipelines()

# --- Streamlit UI ---
st.set_page_config(page_title="Radiology Report Extractor", layout="wide")
st.title("🏥 Radiology Report Field Extractor")

# Sidebar for model choice
st.sidebar.header("Model Selection")
choices = ["Prompt-based QWen3-5.3B"]
if ft_pipe:
    choices.append("Fine-tuned QWen3-5.3B")
model_choice = st.sidebar.radio("Choose extraction model:", choices)

st.sidebar.markdown("---")
st.sidebar.write("Paste your radiology report below, then click **Extract**.")

# Text input
report_text = st.text_area(
    "Radiology Report",
    height=250,
    placeholder="E.g. Lumbar Spine (A-P) … Marked osteopenia …"
)

# Run extraction
if st.button("🔍 Extract Fields"):
    if not report_text.strip():
        st.error("Please enter a report to extract from.")
    else:
        with st.spinner("Running extraction on GPU…"):
            pipe = prompt_pipe if model_choice.startswith("Prompt") else ft_pipe
            entities = pipe(report_text)

        # Collect fields
        fields = {f: [] for f in ["Examination", "Clinical", "Findings", "Impression"]}
        for ent in entities:
            grp = ent["entity_group"]
            if grp in fields:
                fields[grp].append(ent["word"].strip())

        # Display structured output
        st.subheader("Extraction Results")
        for f, tokens in fields.items():
            text = " ".join(tokens) if tokens else "—"
            st.markdown(f"**{f}:** {text}")

        # Debug: raw entity table
        with st.expander("Show raw entities"):
            st.write(pd.DataFrame(entities))


Writing streamlit_app.py


In [3]:
!streamlit run streamlit_app.py --server.port 8501 --server.headless true



Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.240.161.49:8501[0m
[0m
[34m  Stopping...[0m
[34m  Stopping...[0m
