# Radiology VQA Chatbot 



In [1]:
!pip install torch torchvision transformers faiss-cpu openai gradio pillow tqdm



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [24]:
!pip install openai==0.28

Collecting openai==0.28
  Using cached openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Using cached openai-0.28.0-py3-none-any.whl (76 kB)
Installing collected packages: openai
Successfully installed openai-0.28.0


In [1]:
import os
import json
import numpy as np
import faiss
import torch
from PIL import Image
from tqdm import tqdm
import gradio as gr

# Prompt for your key securely (won't show it on screen)
from getpass import getpass
import openai

openai.api_key = getpass("🔑 Enter your OpenAI API key: ")

# Paths (adjust if needed)
DATA_JSON = '/teamspace/studios/this_studio/VQA_RAD Dataset Public.json'
IMG_DIR   = '/teamspace/studios/this_studio/data'
OUTPUT = '/teamspace/studios/this_studio/records.json'

In [2]:
with open(DATA_JSON, 'r') as f:
    raw = json.load(f)

records = []
for ex in raw:
    # Field names per README :contentReference[oaicite:0]{index=0}&#8203;:contentReference[oaicite:1]{index=1}
    img_name = ex['image_name']   # exact case
    question = ex['question']
    answer   = ex['answer']       # capital “A”
    img_path = os.path.join(IMG_DIR, img_name)

    # Verify the image is present
    try:
        Image.open(img_path)
        records.append((img_path, question, answer))
    except FileNotFoundError:
        # skip missing files
        continue


In [4]:
with open(OUTPUT, 'w') as f:
    json.dump(records, f, indent=2)

print(f"Wrote {len(records)} records → {OUTPUT}")

Wrote 2248 records → /teamspace/studios/this_studio/records.json


In [3]:
from transformers import CLIPProcessor, CLIPModel

In [4]:
INDEX_FILE = '/teamspace/studios/this_studio/images.index'
EMB_FILE = '/teamspace/studios/this_studio/image_embs.npy'

In [8]:


with open(OUTPUT, 'r') as f:
    records = json.load(f)

device = torch.device("cuda")

# init CLIP
processor  = CLIPProcessor.from_pretrained('openai/clip-vit-base-patch32')
clip_model = CLIPModel.from_pretrained('openai/clip-vit-base-patch32').to(device)

joint_embs = []
for img_path, question, _ in tqdm(records):
    # --- Image embedding on GPU ---
    img = Image.open(img_path).convert("RGB")
    pix = processor(images=img, return_tensors="pt")["pixel_values"].to(device)
    with torch.no_grad():
        img_feat = clip_model.get_image_features(pix).squeeze()
    img_feat = img_feat / img_feat.norm()
    
    # --- Text embedding on GPU ---
    txt = processor(text=question, return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        txt_feat = clip_model.get_text_features(**txt).squeeze()
    txt_feat = txt_feat / txt_feat.norm()
    
    # --- Joint embedding (concatenate) ---
    joint = torch.cat([img_feat, txt_feat], dim=-1)
    joint_embs.append(joint.cpu().numpy().astype("float32"))

joint_embs = np.vstack(joint_embs)

# Build a FAISS index over the joint vector
dim = joint_embs.shape[1]
index = faiss.IndexFlatIP(dim)
faiss.normalize_L2(joint_embs)   # ensure rows are unit‐norm if you want cosine
index.add(joint_embs)
faiss.write_index(index, INDEX_FILE)
np.save(EMB_FILE, joint_embs)
print(f"Built FAISS index with joint image+text dim={dim}")


100%|██████████| 2248/2248 [00:54<00:00, 41.15it/s]


Built FAISS index with joint image+text dim=1024


In [5]:
device = torch.device("cuda")
processor  = CLIPProcessor.from_pretrained('openai/clip-vit-base-patch32')
clip_model = CLIPModel.from_pretrained('openai/clip-vit-base-patch32').to(device)
clip_model.eval()

index = faiss.read_index(INDEX_FILE)
records = json.load(open(OUTPUT, 'r'))
emb_matrix = np.load(EMB_FILE)
def encode_query(image: np.ndarray, question: str) -> np.ndarray:
    # image → feat 
    img = Image.fromarray(image).convert("RGB")
    pix = processor(images=img, return_tensors="pt")["pixel_values"].to(device)
    with torch.no_grad():
        img_feat = clip_model.get_image_features(pix).squeeze()
    img_feat = img_feat / img_feat.norm()
    
    # question → txt_feat
    txt = processor(text=question, return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        txt_feat = clip_model.get_text_features(**txt).squeeze()
    txt_feat = txt_feat / txt_feat.norm()

    # combining them
    joint = torch.cat([img_feat, txt_feat], dim=-1)
    return joint.cpu().numpy().astype("float32")

def retrieve_examples(feat, top_k=5):
    _, ids = index.search(feat.reshape(1,-1), top_k)
    return [records[i] for i in ids[0]]



def generate_answer(question, examples):
    context = '\n'.join([f"Q: {q}\nA: {a}" for _, q, a in examples])
    prompt = (
        f"Act as a radiology assistant assessing XRay images. Examples:\n{context}\n"
        f"Now answer in 1 or 2 short sentences: {question}\nA:"
    )
    resp = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[{"role":"user","content":prompt}],
        temperature=0.0
    )
    return resp.choices[0].message.content.strip()




Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [6]:

test_img      = '/teamspace/studios/this_studio/data/synpic16221.jpg'
test_question = "Is there pneumonia?"

# Loading and encoding the joint query (image + question)
img_arr = np.array(Image.open(test_img).convert('RGB'))
query_emb = encode_query(img_arr, test_question)

# Retrieving nearest Q/A examples
_, ids = index.search(query_emb.reshape(1, -1), 5)
examples = [records[i] for i in ids[0]]

print("=== Retrieved Examples ===")
for i, (img_path, q, a) in enumerate(examples, 1):
    print(f"{i}. Q: {q}\n   A: {a}\n")

# Generating and print the final answer
print("=== Model’s Answer ===")
answer = generate_answer(test_question, examples)
print(answer)


=== Retrieved Examples ===
1. Q: Is there pneumothorax?
   A: No

2. Q: what is the pathology
   A: left lobe mass 1.5 x 1.8 cm

3. Q: Is there fluid in the lungs?
   A: No

4. Q: Is there fluid in the lung?
   A: yes

5. Q: Is there a lung mass?
   A: yes

=== Model’s Answer ===
No, there is no evidence of pneumonia in the XRay images.


In [7]:
def chatbot_interface(image, question):
    # joint embedding for the query
    q_emb = encode_query(image, question)
    # retrieve nearest records
    _, ids = index.search(q_emb.reshape(1, -1), 5)
    examples = [records[i] for i in ids[0]]
    return generate_answer(question, examples)


# Launch with the proper fn and signature
demo = gr.Interface(
    fn=chatbot_interface,
    inputs=[
        gr.Image(type='numpy', label='Upload X-ray'),
        gr.Textbox(lines=2, placeholder='Ask a question', label='Question')
    ],
    outputs="text",
    title='Radiology VQA Chatbot'
)

demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://6f3545c11bca668b82.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [18]:
# Install the rouge_score package (only needs to be run once)
!pip install -q rouge_score

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[33m  DEPRECATION: Building 'rouge_score' using the legacy setup.py bdist_wheel mechanism, which will be removed in a future version. pip 25.3 will enforce this behaviour change. A possible replacement is to use the standardized build interface by setting the `--use-pep517` option, (possibly combined with `--no-build-isolation`), or adding a `pyproject.toml` file to the source tree of 'rouge_score'. Discussion can be found at https://github.com/pypa/pip/issues/6334[0m[33m
[0m

In [None]:
from rouge_score import rouge_scorer
import numpy as np

def compute_rouge(predictions, references):
    # Initialize the scorer with stemming to improve matching
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    
    # Score each pair
    scores = [scorer.score(ref, pred) for ref, pred in zip(references, predictions)]
    
    # Aggregate F1 scores
    avg_scores = {}
    for metric in ['rouge1', 'rouge2', 'rougeL']:
        f1_vals = [s[metric].fmeasure for s in scores]
        avg_scores[metric] = np.mean(f1_vals)
    
    return avg_scores

# Example usage
preds = [
    "No evidence of pneumonia is seen.",
    "There is a small pleural effusion."
]
refs = [
    "No pneumonia detected.",
    "Small pleural effusion is present."
]

results = compute_rouge(preds, refs)
print("Average ROUGE scores:")
for metric, score in results.items():
    print(f"{metric}: {score:.4f}")


Average ROUGE scores:
rouge1: 0.5859
rouge2: 0.2222
rougeL: 0.4949


In [None]:
import time

try:
    while True:
        print(f"[Keep‐alive] {time.strftime('%Y-%m-%d %H:%M:%S')}")
        time.sleep(10 * 60)  # 10 minutes
except KeyboardInterrupt:
    print("Stopped keep-alive loop.")


[Keep‐alive] 2025-04-28 17:30:44
[Keep‐alive] 2025-04-28 17:40:44
[Keep‐alive] 2025-04-28 17:50:44
[Keep‐alive] 2025-04-28 18:00:44
[Keep‐alive] 2025-04-28 18:10:44
[Keep‐alive] 2025-04-28 18:20:44
[Keep‐alive] 2025-04-28 18:30:44
[Keep‐alive] 2025-04-28 18:40:44
