**Importing Libraries and Dependencies**

In [None]:

!pip install -q transformers accelerate gradio sentencepiece safetensors
!pip uninstall torch torchvision torchaudio -y
!pip cache purge


Found existing installation: torch 2.9.0+cu126
Uninstalling torch-2.9.0+cu126:
  Successfully uninstalled torch-2.9.0+cu126
Found existing installation: torchvision 0.24.0+cu126
Uninstalling torchvision-0.24.0+cu126:
  Successfully uninstalled torchvision-0.24.0+cu126
Found existing installation: torchaudio 2.9.0+cu126
Uninstalling torchaudio-2.9.0+cu126:
  Successfully uninstalled torchaudio-2.9.0+cu126
[0mFiles removed: 0


In [None]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118


Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting torch
  Downloading https://download.pytorch.org/whl/cu118/torch-2.7.1%2Bcu118-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (28 kB)
Collecting torchvision
  Downloading https://download.pytorch.org/whl/cu118/torchvision-0.22.1%2Bcu118-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (6.1 kB)
Collecting torchaudio
  Downloading https://download.pytorch.org/whl/cu118/torchaudio-2.7.1%2Bcu118-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (6.6 kB)
Collecting nvidia-cuda-nvrtc-cu11==11.8.89 (from torch)
  Downloading https://download.pytorch.org/whl/cu118/nvidia_cuda_nvrtc_cu11-11.8.89-py3-none-manylinux1_x86_64.whl (23.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.2/23.2 MB[0m [31m38.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu11==11.8.89 (from torch)
  Downloading https://download.pytorch.org/whl/cu118/nvidia_cuda_runtime_cu11-11.8.89-py3-none-manylinux1_x86_64.wh

In [None]:
import torch
print(torch.__version__)
print(torch.distributed.is_available())

2.9.0+cu126
True


**Our fine-tuned model--full working GUI**

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [2]:



import torch
from PIL import Image
from transformers import (
    BlipProcessor, BlipForQuestionAnswering,
    AutoTokenizer, AutoModelForCausalLM,
    pipeline
)
import gradio as gr

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# ============================================================
# I NOW LOAD FINETUNED BLIP MODEL (VQA) FROM GOOGLE DRIVE
# ============================================================

FINETUNED_BLIP_PATH = "/content/drive/MyDrive/medvqa_finetuned_blip/"

print("Loading finetuned BLIP-VQA model...")
vqa_processor = BlipProcessor.from_pretrained(FINETUNED_BLIP_PATH)
vqa_model = BlipForQuestionAnswering.from_pretrained(FINETUNED_BLIP_PATH).to(device)
print("Finetuned BLIP model loaded.\n")


# ============================================================
# I NOW LOAD LLAMA FOR MULTI-TURN CONVERSATION + DIAGNOSTICS
# ============================================================

llama_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

print("Loading LLaMA model...")
llama_tokenizer = AutoTokenizer.from_pretrained(llama_name)
llama_model = AutoModelForCausalLM.from_pretrained(
    llama_name,
    torch_dtype=torch.float16,
    device_map="auto"
)
print("LLaMA loaded.\n")


# ============================================================
# I NOW LOAD SUMMARISATION MODEL (FLAN-T5-SMALL)
# ============================================================

print("Loading summariser (FLAN-T5-small)...")
summarizer = pipeline(
    "text2text-generation",
    model="google/flan-t5-small",
    device=0 if device == "cuda" else -1
)
print("FLAN-T5 summariser loaded.\n")



Using device: cuda
Loading finetuned BLIP-VQA model...
Finetuned BLIP model loaded.

Loading LLaMA model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

LLaMA loaded.

Loading summariser (FLAN-T5-small)...


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


FLAN-T5 summariser loaded.



In [3]:

# ============================================================
#  I NOW DEFINE MODEL FUNCTIONS
# ============================================================

# ------------------ VQA -------------------------------------
def med_vqa(image: Image.Image, question: str):
    prompt = f"This is a medical image. {question}"
    inputs = vqa_processor(image, prompt, return_tensors="pt").to(device)
    out = vqa_model.generate(**inputs, max_new_tokens=60)
    answer = vqa_processor.decode(out[0], skip_special_tokens=True)
    return answer.strip()


# ------------------ Multi-turn LLaMA Dialogue ---------------
def build_dialogue_prompt(history, user_input, vqa_answer=None):
    prompt = "You are a medical assistant.\n"

    if vqa_answer:
        prompt += f"Findings from image: {vqa_answer}\n"

    for u, a in history:
        prompt += f"Patient: {u}\nDoctor: {a}\n"

    prompt += f"Patient: {user_input}\nDoctor:"
    return prompt


def llama_reply(history, user_input, vqa_answer=None):
    prompt = build_dialogue_prompt(history, user_input, vqa_answer)

    inputs = llama_tokenizer(prompt, return_tensors="pt").to(device)
    outputs = llama_model.generate(
        **inputs,
        max_new_tokens=180,
        temperature=0.7,
        top_p=0.9
    )
    reply = llama_tokenizer.decode(outputs[0], skip_special_tokens=True)

    history.append((user_input, reply))
    return reply, history


# ------------------ Summarisation ---------------------------
def summarize_history(history):
    if not history:
        return "No conversation to summarise."

    dialogue_text = " ".join([f"Patient: {u} Doctor: {a}." for u, a in history])

    instruction = (
        "Summarise the following clinical dialogue (Findings, Impression, Next steps):\n"
        + dialogue_text
    )

    result = summarizer(instruction, max_new_tokens=150)[0]["generated_text"]
    return result.strip()


# ------------------ Diagnostics via LLaMA --------------------
def diagnose_from_summary(summary):
    prompt = (
        "You are a clinical reasoning assistant. Based on the summary, list:\n"
        "- 3 possible diagnoses with brief rationale\n"
        "- 2 recommended diagnostic steps\n\n"
        f"Summary: {summary}\nDoctor:"
    )

    inputs = llama_tokenizer(prompt, return_tensors="pt").to(device)
    outputs = llama_model.generate(
        **inputs,
        max_new_tokens=200,
        temperature=0.3
    )
    return llama_tokenizer.decode(outputs[0], skip_special_tokens=True)


# ============================================================


def reset_all():
    return [], [], "", ""


def submit_fn(image, question, chat, history):
    if history is None:
        history = []

    # ---- VQA ----
    vqa_answer = med_vqa(image, question) if image else None

    # ---- LLaMA reply ----
    reply, updated_hist = llama_reply(history, question, vqa_answer)

    chat_rows = [[u, a] for (u, a) in updated_hist]

    return chat_rows, updated_hist


def update_chat(chat_df):
    new_hist = []
    if chat_df:
        for row in chat_df:
            if row and len(row) == 2:
                new_hist.append((row[0], row[1]))
    return new_hist


def summarize_fn(history):
    return summarize_history(history)


def diagnose_fn(history):
    summary = summarize_history(history)
    return diagnose_from_summary(summary)


In [4]:


# ============================================================
#  GRADIO GUI FOR MY OUTPUT
# ============================================================

with gr.Blocks() as demo:

    gr.Markdown("## 🩺 Medical VQA + Dialogue + Summary + Diagnostics System")

    with gr.Row():
        image_input = gr.Image(type="pil", label="Upload Radiology Image")
        chat_df = gr.Dataframe(
            headers=["User", "Assistant"],
            datatype=["str", "str"],
            row_count="dynamic",
            label="Conversation"
        )

    question_box = gr.Textbox(label="Ask a question", lines=2)

    with gr.Row():
        btn_submit = gr.Button("Submit", variant="primary")
        btn_reset = gr.Button("Reset")
        btn_sum = gr.Button("Generate Summary")
        btn_diag = gr.Button("Diagnostic Reasoning")

    summary_box = gr.Textbox(label="Summary", lines=6)
    diag_box = gr.Textbox(label="Diagnostics", lines=6)

    history = gr.State([])
    summary_state = gr.State("")
    diag_state = gr.State("")

    btn_submit.click(
        fn=submit_fn,
        inputs=[image_input, question_box, chat_df, history],
        outputs=[chat_df, history]
    )

    chat_df.change(
        fn=update_chat,
        inputs=[chat_df],
        outputs=[history]
    )

    btn_sum.click(fn=summarize_fn, inputs=[history], outputs=[summary_box])
    btn_diag.click(fn=diagnose_fn, inputs=[history], outputs=[diag_box])

    btn_reset.click(fn=reset_all,
                    inputs=[],
                    outputs=[chat_df, history, summary_box, diag_box])

demo.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://db840885eff1ea1c74.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




**Answers in para---other new experiment**

In [None]:
# ---------- Imports ----------
import torch
from transformers import (
    BlipProcessor, BlipForConditionalGeneration,
    AutoTokenizer, AutoModelForSeq2SeqLM,
    pipeline
)
from PIL import Image
import gradio as gr
import textwrap

# ---------- Device ----------
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

# ---------- Model names ----------
caption_model_name = "WafaaFraih/blip-roco-radiology-captioning"
dialogue_model_name = "google/flan-t5-base"   # Small & fast LLM
summarizer_model_name = "google/flan-t5-small"

# ---------- Load BLIP-RoCo Radiology Captioning ----------
print("Loading BLIP-RoCo Radiology Captioning model...")
caption_processor = BlipProcessor.from_pretrained(caption_model_name)
caption_model = BlipForConditionalGeneration.from_pretrained(caption_model_name).to(device)

# ---------- Load dialogue/diagnostic model (Flan-T5-base) ----------
print("Loading dialogue/diagnostic model (Flan-T5-base)...")
dialogue_model = AutoModelForSeq2SeqLM.from_pretrained(dialogue_model_name).to(device)
dialogue_tokenizer = AutoTokenizer.from_pretrained(dialogue_model_name)

# ---------- Load summarizer (Flan-T5-small) ----------
print("Loading summarizer (Flan-T5-small)...")
summarizer = pipeline("text2text-generation", model=summarizer_model_name, device=0 if device=="cuda" else -1)

print("All models are loaded.")


In [None]:

# ============================================================
#               VQA – Medical Image Question Answering
# ============================================================
'''def med_vqa(image: Image.Image, question: str):
    if image is None:
        return "No image provided."

    prompt = f"This is a medical image. {question}"
    inputs = caption_processor(image, prompt, return_tensors="pt").to(device)
    out_ids = caption_model.generate(**inputs, max_new_tokens=64)
    answer = caption_processor.decode(out_ids[0], skip_special_tokens=True)
    return answer.strip()


# ============================================================
#          Build prompt for medical conversation (T5)
# ============================================================
def build_dialogue_prompt(history, user_input, vqa_findings=None):
    prompt = "You are a helpful medical assistant.\n"

    if vqa_findings:
        prompt += f"Image findings: {vqa_findings}\n"

    # conversation history
    for u, a in history:
        prompt += f"Patient: {u}\nDoctor: {a}\n"

    prompt += f"Patient: {user_input}\nDoctor:"
    return prompt


# ============================================================
#          Generate reply using FLAN-T5 Base
# ============================================================
def t5_reply(history, user_input, vqa_findings=None,
             max_new_tokens=180):

    prompt = build_dialogue_prompt(history, user_input, vqa_findings)

    inputs = dialogue_tokenizer(prompt, return_tensors="pt",
                                truncation=True, max_length=512).to(device)

    with torch.no_grad():
        gen = dialogue_model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )

    reply = dialogue_tokenizer.decode(gen[0], skip_special_tokens=True)

    # update chat history
    history.append((user_input, reply))

    return reply, history


# ============================================================
#                    Summaries (T5-small)
# ============================================================
def summarize_history(history, max_new_tokens=120):
    if not history:
        return "No conversation to summarize."

    dialogue_text = " ".join(
        [f"Patient: {u} Doctor: {a}." for u, a in history]
    )

    instruction = (
        "Summarize the following clinical dialogue briefly "
        "(Findings, Impression, Recommended next steps):\n\n"
        + dialogue_text
    )

    out = summarizer(instruction, max_new_tokens=max_new_tokens)
    return out[0]["generated_text"].strip()


# ============================================================
#          Diagnose from Summary (T5 Base Clinical Reasoning)
# ============================================================
def diagnose_from_summary(summary, max_new_tokens=200):

    prompt = (
        "You are a clinical reasoning assistant. Based on the clinical summary, "
        "list 3 possible diagnoses with short rationale and provide 2–3 suggested "
        "next diagnostic steps.\n\n"
        f"Summary: {summary}\n\nAnswer:"
    )

    inputs = dialogue_tokenizer(prompt, return_tensors="pt",
                                truncation=True).to(device)

    with torch.no_grad():
        gen = dialogue_model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=False,
            temperature=0.3
        )

    result = dialogue_tokenizer.decode(gen[0], skip_special_tokens=True)
    return result'''

In [None]:
import textwrap

# ============================================================
#      Helper: Convert any short answer into 3-line paragraph
# ============================================================
def expand_to_medical_paragraph(answer: str):
    """
    Takes any model output (even 1 word) and converts it
    into a medically relevant 3-line paragraph.
    """

    base = answer.strip()

    if len(base.split()) < 5:
        # Convert extremely short answers into meaningful text
        base = (
            f"The finding appears to be described as '{base}', which on its "
            f"own is a limited observation. In a medical context, this type "
            f"of result should be interpreted cautiously. Further clinical "
            f"correlation and additional diagnostic review are recommended."
        )
    else:
        # For longer answers → embed them in a structured interpretation
        base = (
            f"The initial assessment indicates: {base}. "
            f"This observation should be interpreted within the broader "
            f"clinical context, including patient symptoms and history. "
            f"Additional evaluation or follow-up imaging may be helpful "
            f"to ensure comprehensive clinical understanding."
        )

    # Wrap nicely into 3 readable lines
    wrapped = textwrap.fill(base, width=90)
    return wrapped


# ============================================================
#               VQA – Medical Image Question Answering
# ============================================================
def med_vqa(image: Image.Image, question: str):
    if image is None:
        return "No image provided."

    prompt = f"This is a medical image. {question}"
    inputs = caption_processor(image, prompt, return_tensors="pt").to(device)
    out_ids = caption_model.generate(**inputs, max_new_tokens=64)
    answer = caption_processor.decode(out_ids[0], skip_special_tokens=True)

    # *** NEW: always output 3-line paragraph ***
    return expand_to_medical_paragraph(answer)


# ============================================================
#          Build prompt for medical conversation (T5)
# ============================================================
def build_dialogue_prompt(history, user_input, vqa_findings=None):
    prompt = "You are a helpful medical assistant.\n"

    if vqa_findings:
        prompt += f"Image findings: {vqa_findings}\n"

    for u, a in history:
        prompt += f"Patient: {u}\nDoctor: {a}\n"

    prompt += f"Patient: {user_input}\nDoctor:"
    return prompt


# ============================================================
#
# ============================================================
def t5_reply(history, user_input, vqa_findings=None,
             max_new_tokens=180):

    prompt = build_dialogue_prompt(history, user_input, vqa_findings)

    inputs = dialogue_tokenizer(prompt, return_tensors="pt",
                                truncation=True, max_length=512).to(device)

    with torch.no_grad():
        gen = dialogue_model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )

    reply = dialogue_tokenizer.decode(gen[0], skip_special_tokens=True)

    # *** NEW: convert reply into paragraph ***
    reply = expand_to_medical_paragraph(reply)

    history.append((user_input, reply))
    return reply, history


# ============================================================
#                    Summaries (T5-small)
# ============================================================
def summarize_history(history, max_new_tokens=120):
    if not history:
        return "No conversation to summarize."

    dialogue_text = " ".join(
        [f"Patient: {u} Doctor: {a}." for u, a in history]
    )

    instruction = (
        "Summarize the following clinical dialogue briefly "
        "(Findings, Impression, Recommended next steps):\n\n"
        + dialogue_text
    )

    out = summarizer(instruction, max_new_tokens=max_new_tokens)
    summary = out[0]["generated_text"].strip()

    # *** NEW: convert summary to paragraph ***
    return expand_to_medical_paragraph(summary)


# ============================================================
#
# ============================================================
def diagnose_from_summary(summary, max_new_tokens=200):

    prompt = (
        "You are a clinical reasoning assistant. Based on the clinical summary, "
        "list 3 possible diagnoses with short rationale and provide 2–3 suggested "
        "next diagnostic steps.\n\n"
        f"Summary: {summary}\n\nAnswer:"
    )

    inputs = dialogue_tokenizer(prompt, return_tensors="pt",
                                truncation=True).to(device)

    with torch.no_grad():
        gen = dialogue_model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=False,
            temperature=0.3
        )

    result = dialogue_tokenizer.decode(gen[0], skip_special_tokens=True)

    # *** NEW: convert diagnostics to paragraph ***
    return expand_to_medical_paragraph(result)
