In [1]:
!pip install torch 





In [2]:
!pip install torchvision





In [3]:
!pip install pandas 





In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models, transforms
import pandas as pd
import numpy as np
from torch.nn.utils.rnn import pad_sequence
from PIL import Image
from torch.utils.data import Dataset, DataLoader

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# transforms (you already have similar)
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [6]:
class SingleImageEncoder(nn.Module):
    def __init__(self, embed_dim=256, pretrained=True):
        super().__init__()
        base = models.resnet18(weights=models.ResNet18_Weights.DEFAULT if pretrained else None)
        for param in base.parameters():
            param.requires_grad = False
        # replace final fc with embed
        base.fc = nn.Linear(base.fc.in_features, embed_dim)
        self.model = base

    def forward(self, x):
        # x: (B, C, H, W)
        return self.model(x)  # (B, embed_dim)


In [7]:
class SimpleGCN(nn.Module):
    def __init__(self, in_feats, hidden, out_feats, dropout=0.1):
        super().__init__()
        self.fc1 = nn.Linear(in_feats, hidden)
        self.fc2 = nn.Linear(hidden, out_feats)
        self.dropout = dropout

    def forward(self, A_norm, X):
        # A_norm: (N,N) normalized adjacency (symmetric)
        # X: (N, in_feats)
        h = self.fc1(torch.matmul(A_norm, X))   # (N, hidden)
        h = F.relu(h)
        h = F.dropout(h, p=self.dropout, training=self.training)
        h = self.fc2(torch.matmul(A_norm, h))   # (N, out_feats)
        return h

In [8]:
class CombinedEncoder(nn.Module):
    def __init__(self, embed_dim=256, gcn_hidden=128, gcn_out=256, node_feat_dim=None):
        super().__init__()
        self.image_encoder = SingleImageEncoder(embed_dim=embed_dim)
        # project concatenated image features + KG into final embed_dim
        concat_dim = embed_dim * 2 + gcn_out
        self.proj = nn.Linear(concat_dim, embed_dim)
        # GCN: input node feature dim = node_feat_dim
        if node_feat_dim is None:
            node_feat_dim = gcn_out
        self.gcn = SimpleGCN(in_feats=node_feat_dim, hidden=gcn_hidden, out_feats=gcn_out)

    def forward(self, img1, img2, A_norm, node_feats):
        # img1, img2: (B,C,H,W) — here we'll support B=1 in inference, but supports batch
        f1 = self.image_encoder(img1)  # (B, embed_dim)
        f2 = self.image_encoder(img2)  # (B, embed_dim)

        # KG: run GCN on node_feats A_norm to get per-node embeddings
        # node_feats: (N, node_feat_dim)
        # gcn returns (N, gcn_out). We need a single KG embedding vector to serve as prior;
        # simplest: mean-pool node embeddings (or you can pick a subgraph)
        kg_node_embs = self.gcn(A_norm, node_feats)  # (N, gcn_out)
        kg_emb = kg_node_embs.mean(dim=0, keepdim=True)  # (1, gcn_out)
        # expand across batch
        B = f1.shape[0]
        kg_emb = kg_emb.expand(B, -1)  # (B, gcn_out)

        # concat and project
        cat = torch.cat([f1, f2, kg_emb], dim=1)  # (B, 2*embed + gcn_out)
        combined = torch.tanh(self.proj(cat))     # (B, embed_dim)
        return combined

In [9]:
class ReportDecoderRNN(nn.Module):
    def __init__(self, embed_dim, vocab_size, hidden_dim=512):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        # self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True, dropout=0.3)
        self.fc = nn.Linear(hidden_dim, vocab_size)
        self.init_fc = nn.Linear(embed_dim, hidden_dim)  # takes the combined feature vector
    def forward(self, features, captions):
        # features: (B, embed_dim)
        embeddings = self.embedding(captions)  # (B, T, embed_dim)
        h0 = torch.tanh(self.init_fc(features)).unsqueeze(0)  # (1, B, hidden_dim)
        c0 = torch.zeros_like(h0)
        outputs, _ = self.lstm(embeddings, (h0, c0))
        logits = self.fc(outputs)  # (B, T, vocab_size)
        return logits

In [10]:
def load_adj_and_node_features(csv_path, device, node_feat_mode='onehot', max_onehot_nodes=1000):
    A = pd.read_csv(csv_path, header=None).values.astype(np.float32)
    N = A.shape[0]
    assert A.shape[0] == A.shape[1], "Adjacency matrix must be square."
    # add self loops
    A_hat = A + np.eye(N, dtype=np.float32)
    deg = A_hat.sum(axis=1)
    # D^-0.5 * A_hat * D^-0.5
    D_inv_sqrt = np.diag(1.0 / np.sqrt(deg + 1e-12))
    A_norm = D_inv_sqrt @ A_hat @ D_inv_sqrt
    A_norm = torch.from_numpy(A_norm).to(device)

    # node features
    if node_feat_mode == 'onehot' and N <= max_onehot_nodes:
        node_feats = np.eye(N, dtype=np.float32)  # (N, N) -> onehot
    elif node_feat_mode == 'degree':
        node_feats = deg.reshape(-1, 1).astype(np.float32)  # (N,1)
    else:
        # fallback: identity features of small dim (here use normalized degree + ones)
        node_feats = np.stack([deg / (deg.max() + 1e-12), np.ones_like(deg)], axis=1).astype(np.float32)  # (N,2)
    node_feats = torch.from_numpy(node_feats).to(device)
    return A_norm, node_feats

In [11]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
special_tokens = {
    "bos_token": "<BOS>",
    "eos_token": "<EOS>"
}

tokenizer.add_special_tokens(special_tokens)

vocab_size = len(tokenizer)

In [12]:
def collate_fn(batch, tokenizer):
    img1_list, img2_list = [], []
    rep_list = []
    ground_truth_list = []
    img1_paths, img2_paths = [], []

    BOS = tokenizer.bos_token_id     # e.g., 1
    EOS = tokenizer.eos_token_id     # e.g., 2
    PAD = tokenizer.pad_token_id     # e.g., 0

    for item in batch:
        img1_list.append(item["img1"])
        img2_list.append(item["img2"])
        ground_truth_list.append(item["report"])
        img1_paths.append(item["img1_path"])
        img2_paths.append(item["img2_path"])

        text = item["findings"].strip()

        # Encode without special tokens
        ids = tokenizer.encode(text, add_special_tokens=False)

        # Add BOS/EOS manually
        ids = [BOS] + ids + [EOS]

        rep_list.append(torch.tensor(ids, dtype=torch.long))

    # Stack images
    img1_batch = torch.stack(img1_list)
    img2_batch = torch.stack(img2_list)

    # Pad token sequence
    rep_padded = pad_sequence(rep_list, batch_first=True, padding_value=PAD)

    return {
        "img1": img1_batch,
        "img2": img2_batch,
        "tokens": rep_padded,
        "report": ground_truth_list,
        "img1_path": img1_paths,
        "img2_path": img2_paths
        
    }


In [13]:
class MIMICCXRDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # =========================
        # LOAD IMAGES
        # =========================
        img_paths = row["image_path"]

        # Ensure two views
        if len(img_paths) == 0:
            raise ValueError(f"No images found for index {idx}")

        if len(img_paths) == 1:
            img1_path = img_paths[0]
            img2_path = img_paths[0]     # duplicate
        else:
            img1_path, img2_path = img_paths[:2]

        # Load and convert to RGB
        img1 = Image.open(img1_path).convert("RGB")
        img2 = Image.open(img2_path).convert("RGB")

        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)

        # =========================
        # TEXT FIELDS
        # =========================
        history = str(row.get("history", "")).strip()
        findings = str(row.get("findings", "")).strip()
        impression = str(row.get("impression", "")).strip()
        # print(f"Inside dataset __getitem__, findings={findings} ,history={history}, impression={impression}")

        # =========================
        # FINAL REPORT
        # =========================
        # if findings or impression:
        #     final_report = (findings + " " + impression).strip()
        # else:
        #     final_report = str(row.get("report_text", "")).strip()
        final_report = str(row.get("report_text", "")).strip()

        return {
            "img1": img1,
            "img2": img2,
            "history": history,
            "findings": findings,
            "impression": impression,
            "report": final_report,
            "img1_path": img1_path,
            "img2_path": img2_path
        }

In [14]:
path = r"D:\fyp_manish_shyam\archive\mimic-cxr-dataset"
print("Path to dataset files:", path)

Path to dataset files: D:\fyp_manish_shyam\archive\mimic-cxr-dataset


In [15]:
import os

print("Files/folders inside base path:")
print(os.listdir(path))


Files/folders inside base path:
['metadata.csv', 'mimic-cxr-reports', 'official_data_iccv_final']


In [16]:
import os

BASE_PATH = os.path.join(path, "mimic-cxr-reports/files")
IMG_PATH  = os.path.join(path, "official_data_iccv_final/files")   # or check actual folder name
META_PATH = os.path.join(path, "metadata.csv")

print("Reports:", BASE_PATH)
print("Images:", IMG_PATH)
print("Metadata:", META_PATH)


Reports: D:\fyp_manish_shyam\archive\mimic-cxr-dataset\mimic-cxr-reports/files
Images: D:\fyp_manish_shyam\archive\mimic-cxr-dataset\official_data_iccv_final/files
Metadata: D:\fyp_manish_shyam\archive\mimic-cxr-dataset\metadata.csv


In [17]:
os.listdir(BASE_PATH)

['p10', 'p11', 'p12', 'p13', 'p14', 'p15', 'p16', 'p17', 'p18', 'p19']

In [18]:
os.listdir(r"D:\fyp_manish_shyam\archive\mimic-cxr-dataset\mimic-cxr-reports\files\p17\p17702631")

['s52487079.txt', 's54824507.txt', 's57315885.txt']

In [19]:
!pip install transformers





In [20]:
import os, math, random, json
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel

# ==== LOCAL PATHS ====
REPORTS_DIR = r"D:\fyp_manish_shyam\archive\mimic-cxr-dataset\mimic-cxr-reports\files"
IMAGES_DIR = r"D:\fyp_manish_shyam\archive\mimic-cxr-dataset\official_data_iccv_final\files"
METADATA_PATH = r"D:\fyp_manish_shyam\archive\mimic-cxr-dataset\metadata.csv"

# Directory to save trained models
SAVED_MODELS_DIR = "./saved_models"
os.makedirs(SAVED_MODELS_DIR, exist_ok=True)

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

In [21]:
import pandas as pd

df_kg = pd.read_csv(r"D:\fyp_manish_shyam\adjacency_matrix.csv")
df_kg.head()


Unnamed: 0,cardiomegaly,pleural effusion,consolidation,atelectasis,pneumothorax,normal heart,normal lungs,opacity,emphysema,enlarged heart,...,evidence,process,cardiopulmonary,small,tube,hilar,stable,prior,upper,lobe
0,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,1,1,1,0,0,1,0,0,...,1,1,1,1,1,1,1,1,1,1
2,0,1,0,0,1,0,0,0,0,0,...,0,1,1,0,0,1,0,0,0,0
3,0,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,1,1,1,0,0,0,1,0,0,...,1,1,1,1,1,1,1,1,1,1


In [22]:
import pandas as pd
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModel


# -----------------------------------------------------
# 1. Load adjacency matrix (df_kg)
# -----------------------------------------------------
A = df_kg.values.astype(float)
N = A.shape[0]
print("Adjacency matrix shape:", A.shape)

# -----------------------------------------------------
# 2. Make symmetric + normalize (GCN normalization)
# -----------------------------------------------------
A = (A + A.T) / 2.0
A += np.eye(N) * 1e-6  # prevent zero degree

D = np.sum(A, axis=1)
D_inv_sqrt = np.diag(1.0 / np.sqrt(D + 1e-8))
A_hat = D_inv_sqrt @ A @ D_inv_sqrt

A_hat = torch.tensor(A_hat, dtype=torch.float32)
print("A_hat ready for GCN:", A_hat.shape)

# -----------------------------------------------------
# 3. Dummy node names if you don't have real terms
# -----------------------------------------------------
terms = [f"node_{i}" for i in range(N)]
print("Sample terms:", terms[:5], "...")


# -----------------------------------------------------
# 4. TEXT EMBEDDING — FORCED CPU (fix for GTX 1080 Ti)
# -----------------------------------------------------
tokenizer_term = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")

# IMPORTANT: Load the transformer on CPU ONLY
text_model = AutoModel.from_pretrained(
    "sentence-transformers/all-MiniLM-L6-v2"
).to("cpu")
text_model.eval()


def embed_terms(terms_list):
    """Compute MiniLM embeddings on CPU (works for older GPUs)."""
    device = torch.device("cpu")

    # tokenize
    enc = tokenizer_term(
        terms_list, padding=True, truncation=True, return_tensors="pt"
    )

    with torch.no_grad():
        # run transformer on CPU
        out = text_model(
            input_ids=enc["input_ids"].to(device),
            attention_mask=enc["attention_mask"].to(device)
        )

        mask = enc["attention_mask"].unsqueeze(-1).to(device)
        token_embeds = out.last_hidden_state * mask

        summed = token_embeds.sum(1)
        counts = mask.sum(1).clamp(min=1)

        # mean-pooled sentence embeddings
        pooled = summed / counts   # stays on CPU

    return pooled  # (N, 384)


# -----------------------------------------------------
# 5. Compute node embeddings
# -----------------------------------------------------
print("Embedding KG terms on CPU (MiniLM)...")
X_nodes = embed_terms(terms)

print("Node features shape:", X_nodes.shape)
print("Done.")


Adjacency matrix shape: (45, 45)
A_hat ready for GCN: torch.Size([45, 45])
Sample terms: ['node_0', 'node_1', 'node_2', 'node_3', 'node_4'] ...
Embedding KG terms on CPU (MiniLM)...
Node features shape: torch.Size([45, 384])
Done.


In [23]:
# Cell 2: Build normalized adjacency A_hat from the binary numpy array A

import numpy as np
import torch

print("Raw A shape:", A.shape)

# --- 1) Ensure A is numpy float array ---
A = A.astype(float)

# --- 2) Add small self-loops ---
eps = 1e-6
A_with_loops = A + np.eye(A.shape[0]) * eps

# --- 3) Compute degree matrix ---
deg = A_with_loops.sum(axis=1)
D_inv_sqrt = np.diag(1.0 / np.sqrt(deg + 1e-8))

# --- 4) Symmetric normalization: A_hat = D^-1/2 * A * D^-1/2 ---
A_hat_np = D_inv_sqrt @ A_with_loops @ D_inv_sqrt

# --- 5) Convert to torch tensor ---
A_hat = torch.tensor(A_hat_np, dtype=torch.float32)

print("A_hat shape:", A_hat.shape)
print(A_hat[:5, :5].tolist())
  # preview


Raw A shape: (45, 45)
A_hat shape: torch.Size([45, 45])
[[6.666662102361443e-07, 0.09491576254367828, 0.0, 0.0, 0.0], [0.09491576254367828, 5.4054051901175626e-08, 0.03355779871344566, 0.039872609078884125, 0.027399830520153046], [0.0, 0.03355779871344566, 8.333332601750953e-08, 0.0, 0.03402068838477135], [0.0, 0.039872609078884125, 0.0, 1.1764704765937495e-07, 0.040422599762678146], [0.0, 0.027399830520153046, 0.03402068838477135, 0.040422599762678146, 5.5555553046815476e-08]]


In [24]:
# REPORT_PATH = os.path.join(path, "mimic-cxr-reports", "files")
# IMAGE_PATH = os.path.join(path, "official_data_iccv_final", "files")
# META_PATH = os.path.join(path, "mimic-cxr-metadata.csv")
# print(REPORT_PATH)
# print(IMAGE_PATH)
# print(META_PATH)

In [25]:
# import os
# import pandas as pd

# all_reports = []
# all_report_paths = []
# all_image_paths = []
# all_patient_ids = []
# all_study_ids = []

# for pid in os.listdir(REPORT_PATH):  # REPORT_PATH = mimic-cxr-reports/files
#     pid_path = os.path.join(REPORT_PATH, pid)
#     if not os.path.isdir(pid_path):
#         continue

#     for patient in os.listdir(pid_path):  # e.g., p11013572
#         patient_path = os.path.join(pid_path, patient)
#         if not os.path.isdir(patient_path):
#             continue
#         # print(patient_path)

#         for study_file in os.listdir(patient_path):  # e.g., s50771383.txt
#             if study_file.endswith(".txt"):
#                 report_path = os.path.join(patient_path, study_file)
#                 study_id = study_file[:-4]  # remove '.txt'
#                 # print(study_id)

#                 # Build corresponding image folder path
#                 image_folder_path = os.path.join(
#                     IMAGE_PATH,  # /mimic-cxr-dataset/official_data_iccv_final/files
#                     pid,
#                     patient,
#                     study_id
#                 )
#                 # print(image_folder_path)

#                 # Only proceed if image folder exists
#                 if os.path.exists(image_folder_path):
#                     # print('im here')
#                     image_files = [f for f in os.listdir(image_folder_path) if f.lower().endswith('.jpg')]
#                     for image_file in image_files:
#                         image_path = os.path.join(image_folder_path, image_file)
#                         # print(image_path)

#                         with open(report_path, "r") as f:
#                             report_text = f.read()

#                         all_reports.append(report_text)
#                         all_report_paths.append(report_path)
#                         all_image_paths.append(image_path)
#                         all_patient_ids.append(patient)
#                         all_study_ids.append(study_id)

# print(f"\nTotal valid (image + report) pairs: {len(all_reports)}")
# print(f"\nSample report text:\n{all_reports[0][:500]}")
# print(f"\nSample image path:\n{all_image_paths[0]}")
 

In [26]:
# import re

# SECTION_NAMES = ["HISTORY", "INDICATION", "FINDINGS", "IMPRESSION", "TECHNIQUE", "EXAMINATION", "COMPARISON", "REFERENCE EXAM", "COMPARISONS"]

# def extract_field(report: str, field: str):

#     # INDICATION should match both INDICATION and HISTORY
#     if field.upper() == "INDICATION":
#         field_pattern = r"(?:INDICATION|HISTORY)"
#     else:
#         field_pattern = re.escape(field)

#     # Build next-section pattern (case-insensitive)
#     next_fields = "|".join([f"{name}:" for name in SECTION_NAMES])

#     # Entire regex is case-insensitive with (?i) AT THE BEGINNING
#     pattern = re.compile(
#         rf"(?i){field_pattern}:\s*(.*?)(?=\n\s*(?:{next_fields})|\Z)",
#         flags=re.DOTALL
#     )

#     match = pattern.search(report)
#     if match:
#         return match.group(1).strip()

#     return None


In [27]:
# import pandas as pd

# # Step 1: Create initial dataframe (one row per image)
# df = pd.DataFrame({
#     "patient_id": all_patient_ids,
#     "study_id": all_study_ids,
#     "report_text": all_reports,
#     "report_path": all_report_paths,
#     "image_path": all_image_paths
# })

# # Step 2: Group by patient_id + study_id
# df_new = df.groupby(["patient_id", "study_id"]).agg({
#     "image_path": list,          # collect all image paths into list
#     "report_text": "first",      # all same → pick first
#     "report_path": "first"       # all same → pick first
# }).reset_index()


In [28]:
# df_new["findings"] = df_new["report_text"].apply(lambda re: extract_field(re, "FINDINGS"))
# df_new["impression"] = df_new["report_text"].apply(lambda re: extract_field(re, "IMPRESSION"))
# df_new["history"] = df_new["report_text"].apply(lambda re: extract_field(re, "INDICATION"))
# df_new.head()

In [29]:
# df_new.to_pickle(r"D:\fyp_manish_shyam\df_new.pkl")

In [31]:
import pandas as pd

df_new = pd.read_pickle(r"D:\fyp_manish_shyam\df_new.pkl")

print(df_new.head())

  patient_id   study_id                                         image_path  \
0  p10000032  s50414267  [D:\fyp_manish_shyam\archive\mimic-cxr-dataset...   
1  p10000032  s53189527  [D:\fyp_manish_shyam\archive\mimic-cxr-dataset...   
2  p10000032  s53911762  [D:\fyp_manish_shyam\archive\mimic-cxr-dataset...   
3  p10000032  s56699142  [D:\fyp_manish_shyam\archive\mimic-cxr-dataset...   
4  p10000898  s50771383  [D:\fyp_manish_shyam\archive\mimic-cxr-dataset...   

                                         report_text  \
0                                   FINAL REPORT\...   
1                                   FINAL REPORT\...   
2                                   FINAL REPORT\...   
3                                   FINAL REPORT\...   
4                                   FINAL REPORT\...   

                                         report_path  \
0  D:\fyp_manish_shyam\archive\mimic-cxr-dataset\...   
1  D:\fyp_manish_shyam\archive\mimic-cxr-dataset\...   
2  D:\fyp_manish_shyam\arc

In [32]:
# ==============================
# BUILD MODEL
# ==============================

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
embed_dim = 256
hidden_dim = 512

encoder = CombinedEncoder(
    embed_dim=embed_dim,
    gcn_hidden=128,
    gcn_out=256,
    node_feat_dim=X_nodes.shape[1]
).to(device)

decoder = ReportDecoderRNN(
    embed_dim=embed_dim,
    vocab_size=vocab_size,
    hidden_dim=hidden_dim
).to(device)

decoder.embedding.weight.requires_grad_(True)

# ==============================
# OPTIMIZER + LOSS
# ==============================
criterion = nn.CrossEntropyLoss(ignore_index=0)   # ignore PAD token

optimizer = torch.optim.Adam(
    list(encoder.parameters()) + list(decoder.parameters()),
    lr=1e-4
)

A_hat = A_hat.to(device)
X_nodes = X_nodes.to(device)

# ==============================
# SPLIT: 80% TRAIN, 20% TEST
# ==============================
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df_new, test_size=0.20, random_state=42)

train_dataset = MIMICCXRDataset(train_df, transform=transform)
test_dataset  = MIMICCXRDataset(test_df,  transform=transform)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True,
                          collate_fn=lambda x: collate_fn(x, tokenizer))

test_loader  = DataLoader(test_dataset, batch_size=8, shuffle=False,
                          collate_fn=lambda x: collate_fn(x, tokenizer))


print(f"Train batches: {len(train_loader)}, Test batches: {len(test_loader)}")


Using device: cuda




Train batches: 5244, Test batches: 1311


In [33]:
# import re
# import pandas as pd

# def clean_report(report: str) -> str:
#     if not isinstance(report, str):
#         return ""

#     report = re.sub(r'_', ' ', report)          # Replace underscores with space
#     report = re.sub(r'\s+', ' ', report).strip()  # Normalize whitespace
#     report = report.lower()                      # Convert to lowercase
#     return report

In [34]:
# # Example usage with a DataFrame
# df_new['findings'] = df_new['findings'].apply(clean_report)

In [35]:
# # Example usage with a DataFrame
# df_new['history'] = df_new['history'].apply(clean_report)
# df_new['history'].head()

In [36]:
# # Example usage with a DataFrame
# df_new['impression'] = df_new['impression'].apply(clean_report)
# df_new['impression'].head()

In [37]:
# df_new.to_pickle(r"D:\fyp_manish_shyam\df_new.pkl")

In [38]:
# from tqdm import tqdm

# num_epochs = 10

# for epoch in range(num_epochs):
#     encoder.train()
#     decoder.train()

#     total_train_loss = 0
#     total_tokens = 0

#     # tqdm loader
#     train_pbar = tqdm(train_loader, desc=f"Epoch {epoch+1} Training", leave=True)

    
#     for batch in train_pbar:
#         img1 = batch["img1"].to(device)
#         img2 = batch["img2"].to(device)
#         tokens = batch["tokens"].to(device)   # (B, T)

#         inputs = tokens[:, :-1]
#         targets = tokens[:, 1:]

#         optimizer.zero_grad()

#         # ===== ENCODER =====
#         features = encoder(img1, img2, A_hat.to(device), X_nodes.to(device))  # (B, embed_dim)

#         # ===== DECODER =====
#         logits = decoder(features, inputs)  # (B, T-1, vocab)

#         loss = criterion(
#             logits.reshape(-1, vocab_size),
#             targets.reshape(-1)
#         )

#         loss.backward()
#         optimizer.step()

#         total_train_loss += loss.item() * targets.numel()
#         total_tokens += targets.numel()

#         # update progress bar
#         train_pbar.set_postfix({
#             "loss": f"{loss.item():.4f}"
#         })

#     avg_train_loss = total_train_loss / total_tokens
#     print(f"\n[Epoch {epoch+1}] Average Train Loss = {avg_train_loss:.4f}\n")


In [39]:
print(tokenizer.bos_token_id, tokenizer.eos_token_id)
print(tokenizer.cls_token_id, tokenizer.sep_token_id)


30522 30523
101 102


In [40]:
# save_path = r"D:\fyp_manish_shyam\saved_models\final_model.pth"

# checkpoint = {
#     "encoder_state_dict": encoder.state_dict(),
#     "decoder_state_dict": decoder.state_dict(),
#     "optimizer_state_dict": optimizer.state_dict(),
#     "vocab_size": vocab_size,
#     "epoch": epoch + 1
# }

# torch.save(checkpoint, save_path)

# print(f"Model saved successfully at: {save_path}")


In [41]:
save_path = r"D:\fyp_manish_shyam\saved_models\final_model.pth"
checkpoint = torch.load(save_path)

encoder.load_state_dict(checkpoint["encoder_state_dict"])
decoder.load_state_dict(checkpoint["decoder_state_dict"])
optimizer.load_state_dict(checkpoint["optimizer_state_dict"])


  checkpoint = torch.load(save_path)


In [62]:
def generate_report(
    encoder, decoder, img1, img2, A_hat, X_nodes,
    max_len=80, temperature=1.0, min_len=10
):
    BOS = tokenizer.bos_token_id
    EOS = tokenizer.eos_token_id
    NONE_ID = tokenizer.encode("none", add_special_tokens=False)[0]

    # Encode image features
    with torch.no_grad():
        feat = encoder(img1, img2, A_hat, X_nodes)

    generated = [BOS]
    words = 0

    p = 0.9   # nucleus threshold

    
    for _ in range(max_len):
        # convert python list → tensor (1, T)
        inp = torch.tensor(generated, dtype=torch.long, device=device).unsqueeze(0)
    
        with torch.no_grad():
            logits = decoder(feat, inp)  # (1, T, vocab)
    
        # use last token
        logits = logits[:, -1, :] / temperature
        logits = logits.squeeze(0) # added
        if words < min_len:         # added
            logits[EOS] = -1e9       # added
            logits[NONE_ID] = -1e9       # added
        probs = torch.softmax(logits, dim=-1) # added
    
        # # ----- Nucleus Sampling (Top-p) -----
        # sorted_probs, sorted_idx = torch.sort(probs, descending=True)  # (1, V)
        # cumulative = torch.cumsum(sorted_probs, dim=-1)
    
        # # keep tokens until cumulative prob <= p
        # mask = cumulative <= p
        # # always include at least 1 token
        # mask[..., 0] = True
    
        # nucleus_probs = sorted_probs[mask]
        # nucleus_idx = sorted_idx[mask]
    
        # # renormalize
        # nucleus_probs = nucleus_probs / nucleus_probs.sum()
    
        # # sample from nucleus distribution
        # next_id = torch.multinomial(nucleus_probs, 1).item()
        # next_id = nucleus_idx[next_id].item()
        # # -------------------------------------
        sorted_probs, sorted_idx = torch.sort(probs.squeeze(0), descending=True)
        cumulative = torch.cumsum(sorted_probs, dim=0)
        cutoff = cumulative > p
        if cutoff.any():
            cutoff_idx = cutoff.nonzero(as_tuple=True)[0][0]
            sorted_probs = sorted_probs[:cutoff_idx + 1]
            sorted_idx = sorted_idx[:cutoff_idx + 1]
        sorted_probs = sorted_probs / sorted_probs.sum()
        next_id = sorted_idx[torch.multinomial(sorted_probs, 1)].item()
        
        
        # block EOS until min_len
        if words < min_len and next_id == EOS:
            # continue
            next_id = torch.argmax(probs).item()
            
        if next_id == EOS:
            generated.append(EOS)
            break
    
        generated.append(next_id)
        words += 1

    # print("Token IDs:", generated)   # DEBUG
    # print("Raw Decode:", tokenizer.decode(generated))  # DEBUG

    # return tokenizer.decode(generated[1:-1], skip_special_tokens=True)
    return tokenizer.decode(generated, skip_special_tokens=True)


In [63]:
from tqdm import tqdm

encoder.eval()
decoder.eval()

generated_list = []
ground_truth_list = []
img1_paths = []
img2_paths = []
count = 0

total_batches = len(test_loader)

with torch.no_grad():

    # Single tqdm bar for batches
    for batch in tqdm(test_loader, total=total_batches, desc="Generating Reports"):

        img1 = batch["img1"].to(device)
        img2 = batch["img2"].to(device)
        tokens = batch["tokens"].to(device)

        B = img1.size(0)

        for i in range(B):
            # if count>=100:
            #     break;
            img1_paths.append(batch["img1_path"][i])
            img2_paths.append(batch["img2_path"][i])
            ground_truth_list.append(batch["report"][i])

            # ===== Generate report =====
            gen_report = generate_report(
                encoder,
                decoder,
                img1[i].unsqueeze(0),
                img2[i].unsqueeze(0),
                A_hat.to(device),
                X_nodes.to(device),
                min_len=120
            )

            # ===== Ground truth =====
            gt_report = tokenizer.decode(tokens[i].tolist(), skip_special_tokens=True)

            generated_list.append(gen_report)

            count += 1

print(f"\nGenerated reports for {count} samples.")


Generating Reports: 100%|██████████████████████████████████████████████████████████| 1311/1311 [38:08<00:00,  1.75s/it]


Generated reports for 10487 samples.





In [58]:
# generated_list

In [55]:
df_results_raw_100 = pd.DataFrame({
    "img1_path": img1_paths[:100],
    "img2_path": img2_paths[:100],
    "ground_truth": ground_truth_list[:100],
    "generated_report": generated_list[:100]
})
save_path = r"D:\fyp_manish_shyam\results\raw_output_100.csv"
df_results_raw_100.to_csv(save_path, index=False)

print("CSV saved at:", save_path)


CSV saved at: D:\fyp_manish_shyam\results\raw_output_100.csv


In [64]:
df_results_raw = pd.DataFrame({
    "img1_path": img1_paths,
    "img2_path": img2_paths,
    "ground_truth": ground_truth_list,
    "generated_report": generated_list
})

save_path = r"D:\fyp_manish_shyam\results\raw_output_fixed_bug.csv"
df_results_raw.to_csv(save_path, index=False)

print("CSV saved at:", save_path)


CSV saved at: D:\fyp_manish_shyam\results\raw_output_fixed_bug.csv


In [45]:
!pip install google-genai





In [46]:
!pip install -U google-genai


Collecting google-genai
  Downloading google_genai-1.55.0-py3-none-any.whl.metadata (47 kB)
Collecting distro<2,>=1.7.0 (from google-genai)
  Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Downloading google_genai-1.55.0-py3-none-any.whl (703 kB)
   ---------------------------------------- 0.0/703.4 kB ? eta -:--:--
   ---------------------------------------- 703.4/703.4 kB ?  0:00:00
Downloading distro-1.9.0-py3-none-any.whl (20 kB)
Installing collected packages: distro, google-genai

  Attempting uninstall: google-genai

    Found existing installation: google-genai 1.52.0

    Uninstalling google-genai-1.52.0:

   -------------------- ------------------- 1/2 [google-genai]
      Successfully uninstalled google-genai-1.52.0
   -------------------- ------------------- 1/2 [google-genai]
   -------------------- ------------------- 1/2 [google-genai]
   -------------------- ------------------- 1/2 [google-genai]
   -------------------- ------------------- 1/2 [google-genai]



In [47]:
import os
os.environ["GEMINI_API_KEY"] = "<api_key>"


In [48]:
from google import genai

client = genai.Client(api_key = "<api_key>")
models = client.models.list()
for m in models:
    print(m.name)
    print(m.display_name)

models/embedding-gecko-001
Embedding Gecko
models/gemini-2.5-flash
Gemini 2.5 Flash
models/gemini-2.5-pro
Gemini 2.5 Pro
models/gemini-2.0-flash-exp
Gemini 2.0 Flash Experimental
models/gemini-2.0-flash
Gemini 2.0 Flash
models/gemini-2.0-flash-001
Gemini 2.0 Flash 001
models/gemini-2.0-flash-exp-image-generation
Gemini 2.0 Flash (Image Generation) Experimental
models/gemini-2.0-flash-lite-001
Gemini 2.0 Flash-Lite 001
models/gemini-2.0-flash-lite
Gemini 2.0 Flash-Lite
models/gemini-2.0-flash-lite-preview-02-05
Gemini 2.0 Flash-Lite Preview 02-05
models/gemini-2.0-flash-lite-preview
Gemini 2.0 Flash-Lite Preview
models/gemini-exp-1206
Gemini Experimental 1206
models/gemini-2.5-flash-preview-tts
Gemini 2.5 Flash Preview TTS
models/gemini-2.5-pro-preview-tts
Gemini 2.5 Pro Preview TTS
models/gemma-3-1b-it
Gemma 3 1B
models/gemma-3-4b-it
Gemma 3 4B
models/gemma-3-12b-it
Gemma 3 12B
models/gemma-3-27b-it
Gemma 3 27B
models/gemma-3n-e4b-it
Gemma 3n E4B
models/gemma-3n-e2b-it
Gemma 3n E2B
mod

In [49]:

def refine_report_gemini(raw_report: str) -> str:
    """
    Refine a medical report using Gemini Pro (google-genai client)
    No Vertex AI, no Google Cloud billing.
    """

    client = genai.Client()   # Reads GEMINI_API_KEY automatically

    prompt = f"""
You are a clinical radiology report editor.

Rewrite the following garbled medical report into readable English:

"{raw_report}"

Rules:
- Fix grammar.
- Do NOT add new clinical findings.
- Only reorganize and clean what is already present.
- Return only the refined report.
"""

    # Correct usage for this version:
    response = client.models.generate_content(
        model="models/gemini-2.5-pro",   # or gemini-2.5-flash
        contents=prompt
    )

    return response.text.strip()


In [66]:
refined_reports = []
failed_indices = []

In [None]:
from tqdm import tqdm

for idx, gen in enumerate(tqdm(generated_list, desc="Refining reports")):
    try:
        refined = refine_report_gemini(gen)
    except Exception as e:
        print(f"[Error] Refinement failed at index {idx}: {e}")
        refined = gen
        failed_indices.append(idx)

    refined_reports.append(refined)

In [51]:
import json
import os

log_dir = r"D:\fyp_manish_shyam\logs"
os.makedirs(log_dir, exist_ok=True)

file_path = os.path.join(log_dir, "failed_indices.json")

with open(file_path, "w") as f:
    json.dump(failed_indices, f)

print("Failed indices saved to failed_indices.json")


Failed indices saved to failed_indices.json


In [None]:
df_results = pd.DataFrame({
    "img1_path": img1_paths[:50],
    "img2_path": img2_paths[:50],
    "ground_truth": ground_truth_list[:50],
    "generated_report": refined_reports[:50]
})

save_path = r"D:\fyp_manish_shyam\results\final_output.csv"
df_results.to_csv(save_path, index=False)

print("CSV saved at:", save_path)
