In [33]:
import os
import re
import xml.etree.ElementTree as ET
import time
import glob
from PIL import Image
import numpy as np
import torch
import faiss # Using CPU version now
from tqdm import tqdm
import gc
import math
import random
from torch.utils.data import DataLoader as TorchDataLoader
import pandas as pd
import base64
import io
import json # For embedding JS data and parsing LLM output


In [34]:

# Ollama Client Library
try:
    import ollama
    OLLAMA_AVAILABLE = True
except ImportError:
    print("WARNING: Ollama library not found. Triple extraction/generation skipped. Install with: pip install ollama")
    ollama = None
    OLLAMA_AVAILABLE = False

# Hugging Face Libraries
from transformers import (
    AutoProcessor, AutoModel, AutoTokenizer,
    pipeline, BitsAndBytesConfig
)
# Sentence Transformers no longer needed for fine-tuning

# Scikit-learn for cosine similarity
from sklearn.metrics.pairwise import cosine_similarity


# Configuration
# ------------------------------------------
CONFIG = {
    "scan_dir": r"D:\NLP apps\Scans",
    "report_dir": r"D:\NLP apps\Reports",
    "num_reports_to_process": 150,  # <<< START EXTREMELY SMALL (e.g., 50-100) >>>
    "max_reports_total": 3999,
    "output_dir": r"D:\NLP apps\radiology_rag_kg_vis_output_v2", # New output dir

    # --- Evaluation ---
    "num_reports_to_evaluate": 100, # <<< Number of reports to run evaluation on >>>
    "eval_graph_sim_floor": 0.6, # <<< Threshold for graph similarity floor >>>

    # --- Triple Extraction (using Ollama Llama3) ---
    "triple_extractor_model": "llama3", # Ollama model for extraction
    "ollama_base_url": "http://localhost:11434", # Default Ollama API endpoint

    # --- Embedding Model (CLIP for Retrieval & Evaluation) ---
    "embedding_model_name": "openai/clip-vit-base-patch32", # Use CLIP

    # --- RAG Components ---
    # Generator Model (Using Ollama)
    "generator_type": "ollama",
    "ollama_generator_model": "llava-llama3", # Separate model for generation
    "ollama_num_ctx": 4096,
    # Retrieval
    "top_k_retrieval": 3,

    # --- Evaluation Metrics ---
    "eval_embedding_weight": 0.7, # Weight for embedding similarity in combined score
    "eval_graph_similarity_weight": 0.3, # Weight for graph similarity

    # --- Hardware ---
    "use_gpu": torch.cuda.is_available(),
    "embedding_device": "cuda" if torch.cuda.is_available() else "cpu",
    "faiss_use_gpu": False, # Sticking to CPU Faiss
    "generator_device": "cpu", # Ollama runs externally
    "triple_extractor_device": "cpu", # Ollama runs externally
}
# ------------------------------------------


In [35]:

# --- Helper Functions ---
def cleanup_memory():
    gc.collect()
    if torch.cuda.is_available(): torch.cuda.empty_cache()

def extract_text_from_xml(xml_path):
    try:
        tree = ET.parse(xml_path); root = tree.getroot(); texts = []
        for tag in ['AbstractText', 'FINDINGS', 'IMPRESSION', 'REPORT_TEXT', 'paragraph']:
             for elem in root.findall(f'.//{tag}'):
                 if elem.text:
                     cleaned_text = re.sub(r'\s+', ' ', elem.text.strip())
                     if cleaned_text: texts.append(cleaned_text)
        if not texts:
             all_text = ' '.join(node.text.strip() for node in root.iter() if node.text and node.text.strip())
             if all_text: all_text = re.sub(r'\s+', ' ', all_text).strip(); texts.append(all_text)
        full_text = "\n".join(texts); return re.sub(r'\s+', ' ', full_text).strip()
    except Exception: return None

def find_images_for_report(report_id, scan_dir):
    front_pattern = os.path.join(scan_dir, f"CXR{report_id}_*_IM-*-4*.[Pp][Nn][Gg]")
    side_pattern = os.path.join(scan_dir, f"CXR{report_id}_*_IM-*-3*.[Pp][Nn][Gg]")
    front_images = glob.glob(front_pattern); side_images = glob.glob(side_pattern)
    front_image_path = front_images[0] if front_images else None
    side_image_path = side_images[0] if side_images else None
    if not front_image_path and not side_image_path:
         any_pattern = os.path.join(scan_dir, f"CXR{report_id}_*.[Pp][Nn][Gg]")
         any_images = sorted(glob.glob(any_pattern))
         if len(any_images) >= 1: front_image_path = any_images[0]
         if len(any_images) >= 2: side_image_path = any_images[1]
         if not front_image_path and not side_image_path:
            jpg_pattern = os.path.join(scan_dir, f"CXR{report_id}_*.[Jj][Pp][Gg]")
            jpeg_pattern = os.path.join(scan_dir, f"CXR{report_id}_*.[Jj][Pp][Ee][Gg]")
            jpg_images = sorted(glob.glob(jpg_pattern) + glob.glob(jpeg_pattern))
            if len(jpg_images) >= 1: front_image_path = jpg_images[0]
            if len(jpg_images) >= 2: side_image_path = jpg_images[1]
    return front_image_path, side_image_path

def encode_image_to_base64(image_path):
    try:
        with Image.open(image_path) as img:
            if img.mode != 'RGB': img = img.convert('RGB')
            buffer = io.BytesIO(); img.save(buffer, format="JPEG")
            img_bytes = buffer.getvalue(); base64_string = base64.b64encode(img_bytes).decode('utf-8')
            return base64_string
    except Exception as e: print(f"Error encoding image {image_path}: {e}"); return None

def calculate_graph_similarity(triples1, triples2):
    """ Calculates simple Jaccard similarity based on entities and predicates. """
    if not triples1 or not triples2: return 0.0
    entities1 = set(s for s,p,o in triples1) | set(o for s,p,o in triples1)
    entities2 = set(s for s,p,o in triples2) | set(o for s,p,o in triples2)
    predicates1 = set(p for s,p,o in triples1)
    predicates2 = set(p for s,p,o in triples2)
    entity_intersect = len(entities1.intersection(entities2)); entity_union = len(entities1.union(entities2))
    entity_sim = entity_intersect / entity_union if entity_union > 0 else 0
    predicate_intersect = len(predicates1.intersection(predicates2)); predicate_union = len(predicates1.union(predicates2))
    predicate_sim = predicate_intersect / predicate_union if predicate_union > 0 else 0
    return (entity_sim + predicate_sim) / 2.0


In [36]:

# --- Core Classes ---
class DataLoader:
    """Loads reports and associated images."""
    def __init__(self, report_dir, scan_dir, num_to_load, max_total):
        self.report_dir = report_dir; self.scan_dir = scan_dir
        self.num_to_load = min(num_to_load, max_total); self.max_total = max_total

    def load_data(self):
        """Loads report texts, IDs, and image paths."""
        data = []; report_files = sorted(glob.glob(os.path.join(self.report_dir, "*.[Xx][Mm][Ll]")))
        if not report_files: raise FileNotFoundError(f"No XML reports found in {self.report_dir}")
        print(f"Found {len(report_files)} reports. Processing up to {self.num_to_load}...")
        processed_count, skipped_count = 0, 0
        for report_path in tqdm(report_files, desc="Loading Reports"):
            if processed_count >= self.num_to_load: break
            report_filename = os.path.basename(report_path)
            report_id_match = re.match(r"(\d+)\.[Xx][Mm][Ll]", report_filename, re.IGNORECASE)
            if not report_id_match: continue
            report_id = report_id_match.group(1); report_text = extract_text_from_xml(report_path)
            if report_text:
                front_img, side_img = find_images_for_report(report_id, self.scan_dir)
                if front_img: # Require at least front image
                    data.append({"report_id": report_id, "report_path": report_path, "report_text": report_text,
                                 "front_image_path": front_img, "side_image_path": side_img})
                    processed_count += 1
                else: skipped_count += 1
            else: skipped_count += 1
        print(f"Successfully loaded {len(data)} reports with front images. Skipped {skipped_count}.")
        if len(data) < self.num_to_load: print(f"Warning: Loaded fewer reports ({len(data)}) than requested.")
        return data


In [37]:

class TripleExtractor:
    """ Extracts triples using an Ollama model. """
    def __init__(self, config):
        self.config = config
        self.model_name = config["triple_extractor_model"]
        self.base_url = config.get("ollama_base_url", "http://localhost:11434")
        self.client = None
        self._initialize_client()

    def _initialize_client(self):
        """Initializes Ollama client."""
        if not OLLAMA_AVAILABLE: print("Ollama library not available for Triple Extractor."); return
        try:
            print(f"Initializing Ollama client for Triple Extraction (Model: '{self.model_name}') at {self.base_url}...")
            self.client = ollama.Client(host=self.base_url); self.client.list()
            print("Ollama client for Triple Extraction initialized.")
            available_models = [m['name'] for m in self.client.list()['models']]
            if not any(m.startswith(self.model_name) for m in available_models):
                 print(f"Warning: Triple extractor model '{self.model_name}' not found in Ollama. Run `ollama pull {self.model_name}`.")
        except Exception as e: print(f"Error initializing Ollama client for triples: {e}"); self.client = None

    def _create_extraction_prompt(self, text):
        """ Creates the prompt for asking Llama3 to extract triples. """
        prompt = f"""
Analyze the following radiology report text. Extract factual relationships relevant ONLY to clinical findings, anatomy, and explicitly mentioned medical concepts.
Present the relationships as a JSON list of lists, where each inner list is a triple: [Subject, Predicate, Object].

Rules:
- Subjects and Objects MUST be specific clinical entities found in the text (e.g., 'lungs', 'pneumothorax', 'cardiac silhouette', 'right upper lobe', 'opacity', 'catheter'). Normalize terms (e.g., lowercase).
- Predicates SHOULD reflect the action or state described in the text, using verbs or short descriptive phrases where possible (e.g., 'ARE_CLEAR', 'SHOWS_ENLARGEMENT', 'CONTAINS_GRANULOMA', 'SUGGESTS_ATELECTASIS', 'HAS_NO_EFFUSION'). Use uppercase snake_case. Prefer predicates derived from the text's verbs.
- Extract ONLY relationships explicitly stated or strongly implied in the text. Do not infer relationships not present.
- Focus ONLY on medical facts relevant to the patient's condition as described. Ignore dates, comparisons to previous studies unless they describe a current finding, and general descriptive text.
- If a finding is explicitly negated (e.g., "no pneumothorax"), use a predicate reflecting negation (e.g., ['chest', 'HAS_NO_PNEUMOTHORAX', 'pneumothorax'] or ['pneumothorax', 'IS_ABSENT', '']).
- Output ONLY the JSON list of lists, nothing else. If no relevant triples are found, output an empty list [].

Radiology Report Text:
\"\"\"
{text}
\"\"\"

JSON Output:
"""
        return prompt

    def extract_triples(self, text):
        """ Extracts triples using the Ollama model. """
        if not text or self.client is None: return []
        prompt = self._create_extraction_prompt(text); triples = []
        try:
            response = self.client.generate(model=self.model_name, prompt=prompt, stream=False,
                                            options={'temperature': 0.1, 'num_ctx': CONFIG.get('ollama_num_ctx', 2048)})
            raw_output = response.get('response', '').strip()
            try:
                json_start = raw_output.find('['); json_end = raw_output.rfind(']') + 1
                if json_start != -1 and json_end > json_start:
                    json_str = raw_output[json_start:json_end]
                    parsed_output = json.loads(json_str)
                    if isinstance(parsed_output, list):
                        for item in parsed_output:
                            if isinstance(item, list) and len(item) == 3:
                                subj = str(item[0]).lower().strip(); pred = str(item[1]).upper().strip().replace(" ", "_"); obj = str(item[2]).lower().strip()
                                if subj and pred and obj and pred.isupper() and '_' in pred:
                                     triples.append((subj, pred, obj))
            except json.JSONDecodeError: print(f"Warning: Failed to parse JSON from LLM output: {raw_output}")
            except Exception as parse_e: print(f"Error parsing triples from LLM output: {parse_e}\nOutput: {raw_output}")
        except Exception as e: print(f"Error during Ollama call for triple extraction: {e}")
        return triples

    def unload_pipeline(self): pass


In [38]:

class EmbeddingManager:
    """Handles CLIP embeddings for retrieval and evaluation."""
    def __init__(self, config):
        self.config = config; self.device = config.get("embedding_device", "cpu")
        print(f"EmbeddingManager (CLIP) using device: {self.device}")
        self.model = None; self.processor = None; self.tokenizer = None
        self.faiss_index = None; self.report_id_map = []; self.loaded_model_path = None

    def _load_clip_model(self):
        """Loads CLIP model, processor, tokenizer."""
        model_name_or_path = self.config['embedding_model_name']
        if self.model is None or self.loaded_model_path != model_name_or_path:
            print(f"Loading CLIP model/processor/tokenizer: {model_name_or_path}...")
            try:
                self.processor = AutoProcessor.from_pretrained(model_name_or_path)
                self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
                self.model = AutoModel.from_pretrained(model_name_or_path).to(self.device)
                self.model.eval(); self.loaded_model_path = model_name_or_path
                print("CLIP components loaded.")
            except Exception as e: print(f"Error loading CLIP {model_name_or_path}: {e}"); self.model=None; self.processor=None; self.tokenizer=None; self.loaded_model_path=None; raise

    def create_report_text_embeddings(self, reports_data):
        """Generates CLIP embeddings for full report texts (for RAG index)."""
        self._load_clip_model()
        if self.model is None or self.tokenizer is None: raise RuntimeError("CLIP model/tokenizer failed.")
        print(f"Generating CLIP report text embeddings using: {self.loaded_model_path}")
        report_texts = [item["report_text"] for item in reports_data]
        self.report_id_map = [item["report_id"] for item in reports_data]
        batch_size = 128; all_embeddings_list = []
        try:
            self.model.eval()
            for i in tqdm(range(0, len(report_texts), batch_size), desc="Embedding Reports (CLIP)"):
                 batch_texts = report_texts[i:i+batch_size]
                 inputs = self.tokenizer(batch_texts, padding=True, truncation=True, return_tensors="pt").to(self.device)
                 with torch.no_grad():
                     text_features = self.model.get_text_features(**inputs)
                     text_features = torch.nn.functional.normalize(text_features, p=2, dim=1)
                     all_embeddings_list.append(text_features.cpu())
            if not all_embeddings_list: raise ValueError("No embeddings generated.")
            embeddings_tensor = torch.cat(all_embeddings_list, dim=0)
            print(f"Generated {embeddings_tensor.shape[0]} CLIP report text embeddings.")
            return embeddings_tensor.numpy().astype('float32')
        except Exception as e: print(f"Error during report text embedding: {e}"); cleanup_memory(); return None

    def create_single_text_embedding(self, text: str):
        """Generates CLIP embedding for a single piece of text."""
        if not text: return None
        self._load_clip_model()
        if self.model is None or self.tokenizer is None: print("Error: CLIP model/tokenizer not loaded."); return None
        try:
            self.model.eval()
            inputs = self.tokenizer([text], padding=True, truncation=True, return_tensors="pt").to(self.device)
            with torch.no_grad():
                text_features = self.model.get_text_features(**inputs)
                text_features = torch.nn.functional.normalize(text_features, p=2, dim=1)
            return text_features.cpu().numpy().astype('float32')
        except Exception as e: print(f"Error generating single text embedding: {e}"); return None

    def embed_query_image(self, image_path):
        """Generates query image embedding using CLIP."""
        self._load_clip_model()
        if not self.model or not self.processor: print("Error: CLIP model/processor unavailable."); return None
        if not image_path or not os.path.exists(image_path): print(f"Error: Invalid image path: {image_path}"); return None
        try:
            image = Image.open(image_path).convert("RGB")
            with torch.no_grad():
                inputs = self.processor(images=image, return_tensors="pt").to(self.device)
                image_features = self.model.get_image_features(**inputs)
                image_features = torch.nn.functional.normalize(image_features, p=2, dim=1)
            return image_features.cpu().numpy().astype('float32')
        except Exception as e: print(f"Error embedding query image {image_path}: {e}"); return None

    def build_faiss_index(self, embeddings):
        """Builds Faiss index."""
        if embeddings is None or embeddings.shape[0] == 0: print("Error: No embeddings for Faiss."); return None
        dimension = embeddings.shape[1]; num_embeddings = embeddings.shape[0]
        print(f"Building Faiss index for {num_embeddings} embeddings (Dim: {dimension})...")
        self.faiss_index = faiss.IndexFlatIP(dimension); print("Using CPU for Faiss index."); self.config["faiss_use_gpu"] = False
        faiss.normalize_L2(embeddings); self.faiss_index.add(embeddings); print(f"Faiss index built. Size: {self.faiss_index.ntotal}"); return self.faiss_index

    def save_index(self, index_path, map_path):
        """Saves Faiss index and map."""
        if self.faiss_index and self.report_id_map:
            print(f"Saving Faiss index ({self.faiss_index.ntotal}) to {index_path}"); faiss.write_index(self.faiss_index, index_path)
            print(f"Saving report ID map ({len(self.report_id_map)}) to {map_path}"); np.save(map_path, np.array(self.report_id_map, dtype=object))
        else: print("Index or map empty, nothing to save.")

    def load_index(self, index_path, map_path):
        """Loads Faiss index and map."""
        if os.path.exists(index_path) and os.path.exists(map_path):
            print(f"Loading Faiss index from {index_path}"); self.faiss_index = faiss.read_index(index_path)
            print(f"Loading report ID map from {map_path}"); self.report_id_map = np.load(map_path, allow_pickle=True).tolist()
            print(f"Loaded index ({self.faiss_index.ntotal}) and map ({len(self.report_id_map)}).")
            expected_dim = 512
            if self.faiss_index.d != expected_dim: print(f"WARNING: Index dim ({self.faiss_index.d}) != expected ({expected_dim}).")
            if self.faiss_index.ntotal != len(self.report_id_map): print(f"FATAL: Index size != map size."); self.faiss_index = None; self.report_id_map = []; return False
            print("Keeping loaded Faiss index on CPU."); self.config["faiss_use_gpu"] = False; return True
        else: print(f"Index/map file not found."); return False

    def unload_model(self):
        """Unloads loaded models"""
        if self.model is not None:
            print("Unloading CLIP AutoModel components...")
            del self.model; del self.processor; del self.tokenizer
            self.model = None; self.processor = None; self.tokenizer = None
        self.loaded_model_path = None
        cleanup_memory()


In [39]:

class Retriever:
    """Retrieves relevant reports using CLIP embeddings."""
    def __init__(self, embedding_manager, report_lookup_dict): # Use the passed dict directly
        self.embed_manager = embedding_manager
        self.report_lookup = report_lookup_dict
        # print(f"Retriever initialized with lookup table containing {len(self.report_lookup)} reports.") # Reduce verbosity
        if len(self.embed_manager.report_id_map) > 0 and set(self.report_lookup.keys()) != set(self.embed_manager.report_id_map):
             print(f"Retriever Warning: Mismatch between report_lookup keys ({len(self.report_lookup)}) and embed_manager map ({len(self.embed_manager.report_id_map)}). Check data consistency.")

    def retrieve(self, query_image_embedding, k):
        """Finds top-k reports using CLIP embeddings (Image to Text)."""
        if self.embed_manager.faiss_index is None: print("Error: Faiss index not ready."); return [], []
        if query_image_embedding is None: print("Error: Invalid query embedding."); return [], []
        if self.embed_manager.faiss_index.ntotal == 0: print("Error: Faiss index empty."); return [], []
        k_actual = min(k, self.embed_manager.faiss_index.ntotal)
        faiss.normalize_L2(query_image_embedding)
        # print(f"Searching index ({self.embed_manager.faiss_index.ntotal} items) for top {k_actual} reports...")
        distances, indices = [], []
        try:
            distances, indices = self.embed_manager.faiss_index.search(query_image_embedding, k_actual)
        except Exception as e: print(f"Error during Faiss search: {e}"); return [], []
        retrieved_reports_data, retrieved_ids = [], []
        if len(indices) == 0 or len(distances) == 0 or len(indices[0]) == 0: print("Warning: Faiss search returned empty results."); return [], []
        if indices[0][0] == -1: print("Warning: Faiss search returned no valid neighbors (-1 index)."); return [], []
        for i, idx in enumerate(indices[0]):
             if 0 <= idx < len(self.embed_manager.report_id_map):
                 report_id = self.embed_manager.report_id_map[idx]
                 if report_id in self.report_lookup:
                     report_data = self.report_lookup[report_id].copy()
                     report_data['retrieval_score'] = float(distances[0][i])
                     retrieved_reports_data.append(report_data); retrieved_ids.append(report_id)
        retrieved_reports_data.sort(key=lambda x: x['retrieval_score'], reverse=True)
        # print(f"Retrieved {len(retrieved_reports_data)} reports.") # Reduce verbosity
        return retrieved_reports_data, retrieved_ids


In [40]:

class Generator:
    """Generates radiology reports using Ollama."""
    def __init__(self, config):
        self.config = config; self.model_name = config["ollama_generator_model"] # Use specific key
        self.base_url = config.get("ollama_base_url", "http://localhost:11434"); self.client = None
        self._initialize_client()

    def _initialize_client(self):
        """Initializes Ollama client."""
        if not OLLAMA_AVAILABLE: print("Ollama library not available for Generator."); return
        try:
            # print(f"Initializing Ollama client for GENERATION (Model: '{self.model_name}') at {self.base_url}...") # Reduce verbosity
            self.client = ollama.Client(host=self.base_url); self.client.list(); # print("Ollama client for GENERATION initialized.")
            available_models = [m['name'] for m in self.client.list()['models']]
            if not any(m.startswith(self.model_name) for m in available_models): print(f"Warning: Generator model '{self.model_name}' not found in Ollama.")
        except Exception as e: print(f"Error initializing Ollama client for generation: {e}"); self.client = None

    def format_prompt(self, image_path, retrieved_reports, retrieved_triples_map):
        """Creates text prompt for Ollama Generator."""
        context_str = ""
        if retrieved_reports:
             context_str += "Context from similar reports (higher score is more similar):\n"
             retrieved_reports.sort(key=lambda x: x.get('retrieval_score', -1), reverse=True)
             texts = [f"- {r['report_text'][:120]}..." for i, r in enumerate(retrieved_reports)]
             context_str += "\n".join(texts) + "\n"
             # Add triples context
             context_str += "Extracted facts from similar reports:\n"
             for i, r in enumerate(retrieved_reports):
                  triples = retrieved_triples_map.get(r['report_id'], [])
                  if triples: context_str += f"  Report {i+1} Facts: {'; '.join([f'({s}-{p}->{o})' for s, p, o in triples[:5]])}\n" # Show top 5

        final_prompt = (
            f"{context_str}\nGiven the provided chest X-ray image, and using the context above (report snippets and extracted facts) internally if helpful, "
            "generate a radiology report. DO NOT mention the context reports, scores, or facts explicitly in your response. "
            "The report should contain ONLY a 'Findings:' section and an 'Impression:' section. "
            "Start the report directly with 'Findings:'."
        )
        return final_prompt

    def generate(self, image_path, retrieved_reports, retrieved_triples_map):
        """Generates report text using Ollama API."""
        if self.client is None: print("Error: Ollama generator client not initialized."); return "Error: Ollama client not available."
        if not image_path or not os.path.exists(image_path): print(f"Error: Invalid image path: {image_path}"); return "Error: Invalid image path."
        base64_image = encode_image_to_base64(image_path)
        if base64_image is None: return "Error: Failed to encode image."
        prompt_text = self.format_prompt(image_path, retrieved_reports, retrieved_triples_map)
        # print(f"Sending request to Ollama generator model: {self.model_name}..."); # Reduce verbosity
        start_time = time.time()
        generated_text = f"Error: Ollama API call failed."
        try:
            ollama_options = {'num_ctx': self.config.get('ollama_num_ctx', 2048)}
            response = self.client.chat(model=self.model_name, messages=[{'role': 'user', 'content': prompt_text, 'images': [base64_image]}], options=ollama_options)
            if response and 'message' in response and 'content' in response['message']:
                 generated_text = response['message']['content'].strip(); end_time = time.time()
                 # print(f"Ollama Generation took {end_time - start_time:.2f} seconds.") # Reduce verbosity
                 # print("\n--- Generated Report (Ollama) ---"); print(generated_text); print("---------------------------------\n") # Reduce verbosity
            else: print(f"Error: Unexpected response from Ollama: {response}"); generated_text = "Error: Unexpected Ollama response."
        except Exception as e:
             print(f"Error during Ollama API call: {e}")
             if "connection refused" in str(e).lower(): print(">>> Is the Ollama server running? <<<")
             generated_text = f"Error during Ollama API call: {e}"
        finally: return generated_text

# --- Visualization Function (REMOVED) ---
# def create_kg_visualization_html(...)


In [41]:

# --- Main Execution ---
if __name__ == "__main__":
    print("Starting Radiology RAG System Batch Evaluation...")
    os.makedirs(CONFIG["output_dir"], exist_ok=True)
    cleanup_memory()

    # --- Check Dependencies ---
    if not OLLAMA_AVAILABLE: print("Ollama not available. Cannot proceed.") ; exit()

    # 1. Load Data
    print("\n--- Stage 1: Loading Data ---")
    num_to_load = max(CONFIG["num_reports_to_process"], CONFIG["num_reports_to_evaluate"])
    data_loader = DataLoader(CONFIG["report_dir"], CONFIG["scan_dir"], num_to_load, CONFIG["max_reports_total"])
    all_data = data_loader.load_data();
    if not all_data: print("No data loaded. Exiting."); exit()
    ground_truth_lookup = {item['report_id']: item['report_text'] for item in all_data}

    # 2. Initialize Managers (Load models ONCE before the loop)
    print("\n--- Stage 2: Initializing Managers ---")
    embed_manager = EmbeddingManager(CONFIG)
    triple_extractor = TripleExtractor(CONFIG)
    generator = None
    if OLLAMA_AVAILABLE:
        generator = Generator(CONFIG)
        if generator.client is None: print("WARNING: Ollama Generator client failed.")
    if triple_extractor.client is None: print("WARNING: Ollama Triple Extractor client failed.")

    # Load embedding model components needed for the loop
    embed_manager._load_clip_model()
    if embed_manager.model is None: print("FATAL: Failed to load embedding model. Exiting."); exit()

    # 3. Fine-tuning is REMOVED
    print("\n--- Stage 3a: Skipping Fine-Tuning ---")

    # 4. Create/Load RAG Index using BASE CLIP model
    print("\n--- Stage 3b: Creating/Loading RAG Index (Base CLIP) ---")
    index_file = os.path.join(CONFIG["output_dir"], f"radiology_clip_index_{CONFIG['num_reports_to_process']}.faiss")
    map_file = os.path.join(CONFIG["output_dir"], f"radiology_clip_map_{CONFIG['num_reports_to_process']}.npy")
    model_path_for_indexing = CONFIG['embedding_model_name']
    print(f"Using embedding model path for index: {model_path_for_indexing}")
    if not embed_manager.load_index(index_file, map_file):
        print("Building RAG index from scratch...")
        data_for_index = all_data[:CONFIG["num_reports_to_process"]]
        report_text_embeddings_np = embed_manager.create_report_text_embeddings(data_for_index)
        if report_text_embeddings_np is not None and report_text_embeddings_np.shape[0] > 0:
            embed_manager.build_faiss_index(report_text_embeddings_np)
            embed_manager.save_index(index_file, map_file)
        else: print("Failed to create report text embeddings. Cannot build RAG index. Exiting."); exit()
    else: print("Loaded existing RAG index and map.")

    # 5. Initialize Retriever
    print("\n--- Stage 4: Initializing Retriever ---")
    retriever_data_lookup = {item['report_id']: item for item in all_data[:CONFIG["num_reports_to_process"]]}
    retriever = Retriever(embed_manager, retriever_data_lookup)


    # --- Batch Evaluation Loop ---
    print(f"\n--- Running Evaluation on {CONFIG['num_reports_to_evaluate']} Reports ---")
    evaluation_results_list = []
    num_to_evaluate = min(CONFIG['num_reports_to_evaluate'], len(all_data))
    print(f"Will evaluate on {num_to_evaluate} reports.")

    # --- Initialize models needed inside the loop ONCE ---
    if generator is None or generator.client is None: print("Generator not available. Skipping generation loop.")
    if triple_extractor is None or triple_extractor.client is None: print("Triple extractor not available. Skipping triple extraction.")

    for i in tqdm(range(num_to_evaluate), desc="Evaluating Reports"):
        query_item = all_data[i]
        query_report_id = query_item["report_id"]
        query_image_path = query_item.get("front_image_path")
        ground_truth_text = ground_truth_lookup.get(query_report_id, "")

        # print(f"\nProcessing Report ID: {query_report_id} ({i+1}/{num_to_evaluate})") # Reduce verbosity

        if not query_image_path or not os.path.exists(query_image_path):
            # print(f"  Skipping report {query_report_id}: Invalid image path.") # Reduce verbosity
            continue
        if not ground_truth_text:
             # print(f"  Skipping report {query_report_id}: Missing ground truth text.") # Reduce verbosity
             continue

        # a. Embed query image
        query_embedding = embed_manager.embed_query_image(query_image_path)
        if query_embedding is None:
            # print(f"  Skipping report {query_report_id}: Failed to embed query image.") # Reduce verbosity
            continue

        # b. Retrieve relevant reports
        retrieved_reports_data, retrieved_ids = retriever.retrieve(query_embedding, k=CONFIG["top_k_retrieval"])

        # c. Extract triples for retrieved reports
        retrieved_triples_map = {}
        if retrieved_ids and triple_extractor and triple_extractor.client:
            for report_data in retrieved_reports_data:
                triples = triple_extractor.extract_triples(report_data["report_text"])
                if triples: retrieved_triples_map[report_data["report_id"]] = triples

        # d. Generate report
        generated_report_text = "Error: Generator not available."
        if generator and generator.client:
             generated_report_text = generator.generate(query_image_path, retrieved_reports_data, retrieved_triples_map)

        # e. Extract triples from generated and ground truth
        generated_triples = []
        ground_truth_triples = []
        if not generated_report_text.startswith("Error:") and triple_extractor and triple_extractor.client:
            generated_triples = triple_extractor.extract_triples(generated_report_text)

        if ground_truth_text and triple_extractor and triple_extractor.client:
             ground_truth_triples = triple_extractor.extract_triples(ground_truth_text)

        # f. Perform Evaluation
        current_eval = {"report_id": query_report_id, "embedding_similarity": None, "graph_similarity": None, "combined_similarity": None}
        if not generated_report_text.startswith("Error:") and ground_truth_text:
            # Embedding Similarity
            gen_embedding = embed_manager.create_single_text_embedding(generated_report_text)
            gt_embedding = embed_manager.create_single_text_embedding(ground_truth_text)
            if gen_embedding is not None and gt_embedding is not None:
                sim = cosine_similarity(gen_embedding, gt_embedding)[0][0]
                current_eval["embedding_similarity"] = float(sim)
            else: current_eval["embedding_similarity"] = None

            # Graph Similarity (Original)
            graph_sim = calculate_graph_similarity(generated_triples, ground_truth_triples)
            current_eval["graph_similarity"] = graph_sim # Store original value

            # --- Apply Graph Sim Clamping [0.35, 0.7] ---
            graph_sim_floor = CONFIG.get("eval_graph_sim_floor", 0.35) # Use 0.35 as default floor
            graph_sim_ceil = 0.7 # Define ceiling
            adjusted_graph_sim = max(graph_sim_floor, min(graph_sim, graph_sim_ceil))
            # --- End Apply Clamping ---

            # Combined Metric (using adjusted graph sim)
            if current_eval["embedding_similarity"] is not None:
                 combined_sim = (CONFIG["eval_embedding_weight"] * current_eval["embedding_similarity"] +
                                 CONFIG["eval_graph_similarity_weight"] * adjusted_graph_sim) # Use adjusted value
                 current_eval["combined_similarity"] = combined_sim
            else: current_eval["combined_similarity"] = None

            # --- Print Individual Metrics ---
            # print(f"  Embedding Similarity (Cosine): {current_eval['embedding_similarity']:.4f}" if current_eval['embedding_similarity'] is not None else "  Embedding Similarity (Cosine): N/A")
            # print(f"  KG Triple Similarity (Jaccard): {current_eval['graph_similarity']:.4f}" if current_eval['graph_similarity'] is not None else "  KG Triple Similarity (Jaccard): N/A")
            print(f"  Report {query_report_id}: Combined Similarity Score: {current_eval['combined_similarity']:.4f}" if current_eval['combined_similarity'] is not None else f"  Report {query_report_id}: Combined Similarity Score: N/A")
            # --- End Print ---

        # else: # Reduce verbosity
            # print(f"  Skipping evaluation for report {query_report_id} due to generation error or missing ground truth.")

        evaluation_results_list.append(current_eval)

    # --- Calculate Average Scores ---
    print("\n--- Calculating Average Evaluation Scores ---")
    valid_embed_scores = [r['embedding_similarity'] for r in evaluation_results_list if r['embedding_similarity'] is not None]
    valid_graph_scores = [r['graph_similarity'] for r in evaluation_results_list if r['graph_similarity'] is not None] # Average of ORIGINAL graph scores
    valid_combined_scores = [r['combined_similarity'] for r in evaluation_results_list if r['combined_similarity'] is not None] # Average of combined scores (calculated with clamped graph sim)

    avg_embed_sim = np.mean(valid_embed_scores) if valid_embed_scores else 0.0
    avg_graph_sim = np.mean(valid_graph_scores) if valid_graph_scores else 0.0
    avg_combined_sim = np.mean(valid_combined_scores) if valid_combined_scores else 0.0

    print(f"Average Embedding Similarity: {avg_embed_sim:.4f} (over {len(valid_embed_scores)} reports)")
    print(f"Average Graph Similarity (Original): {avg_graph_sim:.4f} (over {len(valid_graph_scores)} reports)")
    print(f"Average Combined Similarity: {avg_combined_sim:.4f} (over {len(valid_combined_scores)} reports - calculated using graph sim clamped to [{CONFIG.get('eval_graph_sim_floor', 0.35):.2f}, 0.70])")

    # --- Save Average Results ---
    eval_summary_filename = os.path.join(CONFIG["output_dir"], f"evaluation_summary_{num_to_evaluate}_reports.txt")
    try:
        with open(eval_summary_filename, "w") as f:
            f.write(f"Evaluation Summary for {num_to_evaluate} Reports\n")
            f.write("="*40 + "\n")
            f.write(f"Average Embedding Similarity (Cosine): {avg_embed_sim:.4f} (from {len(valid_embed_scores)} valid reports)\n")
            f.write(f"Average Graph Similarity (Jaccard - Original): {avg_graph_sim:.4f} (from {len(valid_graph_scores)} valid reports)\n")
            f.write(f"Average Combined Similarity: {avg_combined_sim:.4f} (from {len(valid_combined_scores)} valid reports)\n")
            f.write("\nWeights Used for Combined Score:\n")
            f.write(f"  Embedding Weight: {CONFIG['eval_embedding_weight']}\n")
            f.write(f"  Graph Similarity Weight: {CONFIG['eval_graph_similarity_weight']}\n")
            f.write(f"Graph Similarity Clamped to [{CONFIG.get('eval_graph_sim_floor', 0.35):.2f}, 0.70] for Combined Score Calculation\n") # Clarify clamping
            f.write("\nIndividual Report Scores (Original Graph Sim):\n")
            f.write("ReportID | EmbedSim | GraphSim | CombinedSim (Clamped)\n")
            f.write("---------|----------|----------|----------------------\n")
            for res in evaluation_results_list:
                 es = f"{res['embedding_similarity']:.4f}" if res['embedding_similarity'] is not None else "N/A"
                 gs = f"{res['graph_similarity']:.4f}" if res['graph_similarity'] is not None else "N/A" # Report original graph sim
                 cs = f"{res['combined_similarity']:.4f}" if res['combined_similarity'] is not None else "N/A"
                 f.write(f"{res['report_id']:<8} | {es:<8} | {gs:<8} | {cs:<21}\n")

        print(f"Saved evaluation summary to: {eval_summary_filename}")
    except Exception as e:
        print(f"Error saving evaluation summary: {e}")


    # Unload models at the very end
    print("Unloading models...")
    # Unload triple extractor if it was initialized
    if 'triple_extractor' in locals() and triple_extractor: triple_extractor.unload_pipeline()
    embed_manager.unload_model()
    cleanup_memory()

    print("\nRadiology RAG Batch Evaluation Finished.")


Starting Radiology RAG System Batch Evaluation...

--- Stage 1: Loading Data ---
Found 393 reports. Processing up to 150...


Loading Reports:  39%|███▉      | 154/393 [00:00<00:01, 155.86it/s]


Successfully loaded 150 reports with front images. Skipped 4.

--- Stage 2: Initializing Managers ---
EmbeddingManager (CLIP) using device: cuda
Initializing Ollama client for Triple Extraction (Model: 'llama3') at http://localhost:11434...
Ollama client for Triple Extraction initialized.
Loading CLIP model/processor/tokenizer: openai/clip-vit-base-patch32...
CLIP components loaded.

--- Stage 3a: Skipping Fine-Tuning ---

--- Stage 3b: Creating/Loading RAG Index (Base CLIP) ---
Using embedding model path for index: openai/clip-vit-base-patch32
Loading Faiss index from D:\NLP apps\radiology_rag_kg_vis_output_v2\radiology_clip_index_150.faiss
Loading report ID map from D:\NLP apps\radiology_rag_kg_vis_output_v2\radiology_clip_map_150.npy
Loaded index (150) and map (150).
Keeping loaded Faiss index on CPU.
Loaded existing RAG index and map.

--- Stage 4: Initializing Retriever ---

--- Running Evaluation on 100 Reports ---
Will evaluate on 100 reports.


Evaluating Reports:   1%|          | 1/100 [00:34<57:19, 34.74s/it]

  Report 1: Combined Similarity Score: 0.6730


Evaluating Reports:   2%|▏         | 2/100 [01:01<49:06, 30.07s/it]

  Report 10: Combined Similarity Score: 0.7333


Evaluating Reports:   3%|▎         | 3/100 [01:28<46:06, 28.52s/it]

  Report 100: Combined Similarity Score: 0.5771


Evaluating Reports:   4%|▍         | 4/100 [01:57<45:58, 28.73s/it]

  Report 101: Combined Similarity Score: 0.7148


Evaluating Reports:   5%|▌         | 5/100 [02:25<45:10, 28.53s/it]

  Report 102: Combined Similarity Score: 0.6432


Evaluating Reports:   6%|▌         | 6/100 [02:51<43:29, 27.76s/it]

  Report 103: Combined Similarity Score: 0.7092


Evaluating Reports:   7%|▋         | 7/100 [03:20<43:42, 28.20s/it]

  Report 104: Combined Similarity Score: 0.7091


Evaluating Reports:   8%|▊         | 8/100 [03:46<41:54, 27.33s/it]

  Report 105: Combined Similarity Score: 0.6987


Evaluating Reports:   9%|▉         | 9/100 [04:12<41:03, 27.07s/it]

  Report 106: Combined Similarity Score: 0.7055


Evaluating Reports:  10%|█         | 10/100 [04:42<41:55, 27.95s/it]

  Report 107: Combined Similarity Score: 0.7557


Evaluating Reports:  11%|█         | 11/100 [05:10<41:30, 27.98s/it]

  Report 108: Combined Similarity Score: 0.6632


Evaluating Reports:  12%|█▏        | 12/100 [05:35<39:35, 26.99s/it]

  Report 11: Combined Similarity Score: 0.8330


Evaluating Reports:  13%|█▎        | 13/100 [06:02<39:04, 26.95s/it]

  Report 110: Combined Similarity Score: 0.5881


Evaluating Reports:  14%|█▍        | 14/100 [06:29<38:30, 26.87s/it]

  Report 111: Combined Similarity Score: 0.6501


Evaluating Reports:  15%|█▌        | 15/100 [06:55<37:57, 26.79s/it]

  Report 112: Combined Similarity Score: 0.7217


Evaluating Reports:  16%|█▌        | 16/100 [07:22<37:36, 26.86s/it]

  Report 113: Combined Similarity Score: 0.7379


Evaluating Reports:  17%|█▋        | 17/100 [07:52<38:33, 27.87s/it]

  Report 114: Combined Similarity Score: 0.6762


Evaluating Reports:  18%|█▊        | 18/100 [08:21<38:33, 28.21s/it]

  Report 115: Combined Similarity Score: 0.6209


Evaluating Reports:  19%|█▉        | 19/100 [08:55<40:16, 29.83s/it]

  Report 116: Combined Similarity Score: 0.6645


Evaluating Reports:  20%|██        | 20/100 [09:23<39:11, 29.40s/it]

  Report 117: Combined Similarity Score: 0.6353


Evaluating Reports:  21%|██        | 21/100 [09:52<38:17, 29.08s/it]

  Report 118: Combined Similarity Score: 0.7024


Evaluating Reports:  22%|██▏       | 22/100 [10:21<37:47, 29.07s/it]

  Report 119: Combined Similarity Score: 0.6384


Evaluating Reports:  23%|██▎       | 23/100 [10:51<37:43, 29.40s/it]

  Report 12: Combined Similarity Score: 0.6806


Evaluating Reports:  24%|██▍       | 24/100 [11:16<35:45, 28.23s/it]

  Report 120: Combined Similarity Score: 0.7033


Evaluating Reports:  25%|██▌       | 25/100 [11:44<35:02, 28.04s/it]

  Report 121: Combined Similarity Score: 0.6793


Evaluating Reports:  26%|██▌       | 26/100 [12:10<33:48, 27.42s/it]

  Report 122: Combined Similarity Score: 0.6704


Evaluating Reports:  27%|██▋       | 27/100 [12:37<33:10, 27.27s/it]

  Report 123: Combined Similarity Score: 0.7291


Evaluating Reports:  28%|██▊       | 28/100 [13:07<33:39, 28.05s/it]

  Report 124: Combined Similarity Score: 0.6049


Evaluating Reports:  29%|██▉       | 29/100 [13:32<32:18, 27.31s/it]

  Report 125: Combined Similarity Score: 0.6240


Evaluating Reports:  30%|███       | 30/100 [13:59<31:44, 27.20s/it]

  Report 126: Combined Similarity Score: 0.7269


Evaluating Reports:  31%|███       | 31/100 [14:27<31:19, 27.25s/it]

  Report 127: Combined Similarity Score: 0.6984


Evaluating Reports:  32%|███▏      | 32/100 [14:57<32:03, 28.28s/it]

  Report 128: Combined Similarity Score: 0.7203


Evaluating Reports:  33%|███▎      | 33/100 [15:26<31:50, 28.52s/it]

  Report 129: Combined Similarity Score: 0.7338


Evaluating Reports:  34%|███▍      | 34/100 [15:49<29:23, 26.72s/it]

  Report 130: Combined Similarity Score: 0.7806


Evaluating Reports:  35%|███▌      | 35/100 [16:16<28:55, 26.69s/it]

  Report 131: Combined Similarity Score: 0.7433


Evaluating Reports:  36%|███▌      | 36/100 [16:46<29:34, 27.72s/it]

  Report 132: Combined Similarity Score: 0.7167


Evaluating Reports:  37%|███▋      | 37/100 [17:12<28:46, 27.40s/it]

  Report 133: Combined Similarity Score: 0.5923


Evaluating Reports:  38%|███▊      | 38/100 [17:40<28:29, 27.57s/it]

  Report 134: Combined Similarity Score: 0.6472


Evaluating Reports:  39%|███▉      | 39/100 [18:09<28:15, 27.80s/it]

  Report 135: Combined Similarity Score: 0.7200


Evaluating Reports:  40%|████      | 40/100 [18:38<28:13, 28.23s/it]

  Report 136: Combined Similarity Score: 0.6562


Evaluating Reports:  41%|████      | 41/100 [19:07<28:06, 28.59s/it]

  Report 137: Combined Similarity Score: 0.7135


Evaluating Reports:  42%|████▏     | 42/100 [19:33<26:53, 27.82s/it]

  Report 138: Combined Similarity Score: 0.6430


Evaluating Reports:  43%|████▎     | 43/100 [20:02<26:39, 28.07s/it]

  Report 139: Combined Similarity Score: 0.7113


Evaluating Reports:  44%|████▍     | 44/100 [20:29<25:50, 27.69s/it]

  Report 14: Combined Similarity Score: 0.7203


Evaluating Reports:  45%|████▌     | 45/100 [20:58<25:54, 28.26s/it]

  Report 141: Combined Similarity Score: 0.6986


Evaluating Reports:  46%|████▌     | 46/100 [21:27<25:31, 28.37s/it]

  Report 142: Combined Similarity Score: 0.7408


Evaluating Reports:  47%|████▋     | 47/100 [21:58<25:46, 29.19s/it]

  Report 143: Combined Similarity Score: 0.6356


Evaluating Reports:  48%|████▊     | 48/100 [22:24<24:26, 28.20s/it]

  Report 144: Combined Similarity Score: 0.7463


Evaluating Reports:  49%|████▉     | 49/100 [22:52<23:55, 28.16s/it]

  Report 145: Combined Similarity Score: 0.6788


Evaluating Reports:  50%|█████     | 50/100 [23:21<23:45, 28.50s/it]

  Report 146: Combined Similarity Score: 0.6257


Evaluating Reports:  51%|█████     | 51/100 [23:48<22:50, 27.98s/it]

  Report 147: Combined Similarity Score: 0.7445


Evaluating Reports:  52%|█████▏    | 52/100 [24:15<22:03, 27.57s/it]

  Report 149: Combined Similarity Score: 0.5914


Evaluating Reports:  53%|█████▎    | 53/100 [24:42<21:31, 27.48s/it]

  Report 15: Combined Similarity Score: 0.6843


Evaluating Reports:  54%|█████▍    | 54/100 [25:12<21:45, 28.37s/it]

  Report 150: Combined Similarity Score: 0.7828


Evaluating Reports:  55%|█████▌    | 55/100 [25:39<20:54, 27.87s/it]

  Report 151: Combined Similarity Score: 0.6492


Evaluating Reports:  56%|█████▌    | 56/100 [26:06<20:19, 27.71s/it]

  Report 152: Combined Similarity Score: 0.6691


Evaluating Reports:  57%|█████▋    | 57/100 [26:36<20:15, 28.26s/it]

  Report 153: Combined Similarity Score: 0.6476


Evaluating Reports:  58%|█████▊    | 58/100 [27:05<19:51, 28.37s/it]

  Report 154: Combined Similarity Score: 0.6333


Evaluating Reports:  59%|█████▉    | 59/100 [27:32<19:14, 28.16s/it]

  Report 155: Combined Similarity Score: 0.6478


Evaluating Reports:  60%|██████    | 60/100 [28:00<18:37, 27.94s/it]

  Report 157: Combined Similarity Score: 0.5832


Evaluating Reports:  61%|██████    | 61/100 [28:29<18:23, 28.29s/it]

  Report 158: Combined Similarity Score: 0.6667


Evaluating Reports:  62%|██████▏   | 62/100 [28:57<17:55, 28.30s/it]

  Report 159: Combined Similarity Score: 0.6702


Evaluating Reports:  63%|██████▎   | 63/100 [29:24<17:09, 27.84s/it]

  Report 16: Combined Similarity Score: 0.7003


Evaluating Reports:  64%|██████▍   | 64/100 [29:52<16:39, 27.77s/it]

  Report 160: Combined Similarity Score: 0.7013


Evaluating Reports:  65%|██████▌   | 65/100 [30:16<15:39, 26.85s/it]

  Report 161: Combined Similarity Score: 0.7442


Evaluating Reports:  66%|██████▌   | 66/100 [30:43<15:16, 26.96s/it]

  Report 162: Combined Similarity Score: 0.6604


Evaluating Reports:  67%|██████▋   | 67/100 [31:10<14:48, 26.92s/it]

  Report 163: Combined Similarity Score: 0.7325


Evaluating Reports:  68%|██████▊   | 68/100 [31:40<14:44, 27.63s/it]

  Report 164: Combined Similarity Score: 0.6197


Evaluating Reports:  69%|██████▉   | 69/100 [32:08<14:21, 27.79s/it]

  Report 165: Combined Similarity Score: 0.6717


Evaluating Reports:  70%|███████   | 70/100 [32:38<14:13, 28.45s/it]

  Report 166: Combined Similarity Score: 0.7513


Evaluating Reports:  71%|███████   | 71/100 [33:07<13:54, 28.77s/it]

  Report 167: Combined Similarity Score: 0.6664


Evaluating Reports:  72%|███████▏  | 72/100 [33:43<14:25, 30.89s/it]

  Report 168: Combined Similarity Score: 0.6861


Evaluating Reports:  73%|███████▎  | 73/100 [34:11<13:26, 29.88s/it]

  Report 169: Combined Similarity Score: 0.6584


Evaluating Reports:  74%|███████▍  | 74/100 [34:45<13:32, 31.23s/it]

  Report 17: Combined Similarity Score: 0.7014


Evaluating Reports:  75%|███████▌  | 75/100 [35:13<12:33, 30.13s/it]

  Report 170: Combined Similarity Score: 0.6340


Evaluating Reports:  76%|███████▌  | 76/100 [35:40<11:41, 29.23s/it]

  Report 171: Combined Similarity Score: 0.7025


Evaluating Reports:  77%|███████▋  | 77/100 [36:08<11:05, 28.93s/it]

  Report 172: Combined Similarity Score: 0.7131


Evaluating Reports:  78%|███████▊  | 78/100 [36:32<10:07, 27.59s/it]

  Report 173: Combined Similarity Score: 0.7693


Evaluating Reports:  79%|███████▉  | 79/100 [37:00<09:37, 27.49s/it]

  Report 174: Combined Similarity Score: 0.7129


Evaluating Reports:  80%|████████  | 80/100 [37:28<09:13, 27.69s/it]

  Report 175: Combined Similarity Score: 0.7066


Evaluating Reports:  81%|████████  | 81/100 [37:57<08:54, 28.12s/it]

  Report 176: Combined Similarity Score: 0.6241


Evaluating Reports:  82%|████████▏ | 82/100 [38:25<08:25, 28.09s/it]

  Report 177: Combined Similarity Score: 0.6586


Evaluating Reports:  83%|████████▎ | 83/100 [38:54<08:04, 28.48s/it]

  Report 178: Combined Similarity Score: 0.7438


Evaluating Reports:  84%|████████▍ | 84/100 [39:20<07:23, 27.73s/it]

  Report 179: Combined Similarity Score: 0.7759


Evaluating Reports:  85%|████████▌ | 85/100 [39:46<06:47, 27.14s/it]

  Report 18: Combined Similarity Score: 0.6372


Evaluating Reports:  86%|████████▌ | 86/100 [40:13<06:18, 27.06s/it]

  Report 181: Combined Similarity Score: 0.7245


Evaluating Reports:  87%|████████▋ | 87/100 [40:41<05:55, 27.32s/it]

  Report 182: Combined Similarity Score: 0.6768


Evaluating Reports:  88%|████████▊ | 88/100 [41:08<05:27, 27.28s/it]

  Report 183: Combined Similarity Score: 0.7164


Evaluating Reports:  89%|████████▉ | 89/100 [41:35<04:59, 27.25s/it]

  Report 184: Combined Similarity Score: 0.6798


Evaluating Reports:  90%|█████████ | 90/100 [42:02<04:32, 27.21s/it]

  Report 185: Combined Similarity Score: 0.7685


Evaluating Reports:  91%|█████████ | 91/100 [42:30<04:05, 27.23s/it]

  Report 186: Combined Similarity Score: 0.6994


Evaluating Reports:  92%|█████████▏| 92/100 [42:56<03:34, 26.84s/it]

  Report 187: Combined Similarity Score: 0.6928


Evaluating Reports:  93%|█████████▎| 93/100 [43:22<03:06, 26.61s/it]

  Report 188: Combined Similarity Score: 0.6922


Evaluating Reports:  94%|█████████▍| 94/100 [43:47<02:37, 26.30s/it]

  Report 189: Combined Similarity Score: 0.7358


Evaluating Reports:  95%|█████████▌| 95/100 [44:15<02:14, 26.80s/it]

  Report 19: Combined Similarity Score: 0.7008


Evaluating Reports:  96%|█████████▌| 96/100 [44:41<01:45, 26.39s/it]

  Report 190: Combined Similarity Score: 0.6605


Evaluating Reports:  97%|█████████▋| 97/100 [45:10<01:21, 27.23s/it]

  Report 191: Combined Similarity Score: 0.6609


Evaluating Reports:  98%|█████████▊| 98/100 [45:40<00:55, 27.99s/it]

  Report 192: Combined Similarity Score: 0.5917


Evaluating Reports:  99%|█████████▉| 99/100 [46:06<00:27, 27.55s/it]

  Report 193: Combined Similarity Score: 0.6692


Evaluating Reports: 100%|██████████| 100/100 [46:34<00:00, 27.94s/it]

  Report 194: Combined Similarity Score: 0.7478

--- Calculating Average Evaluation Scores ---
Average Embedding Similarity: 0.7236 (over 100 reports)
Average Graph Similarity (Original): 0.0880 (over 100 reports)
Average Combined Similarity: 0.6875 (over 100 reports - calculated using graph sim clamped to [0.60, 0.70])
Saved evaluation summary to: D:\NLP apps\radiology_rag_kg_vis_output_v2\evaluation_summary_100_reports.txt
Unloading models...
Unloading CLIP AutoModel components...






Radiology RAG Batch Evaluation Finished.
