In [None]:
import sys
sys.path.append('..') 

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import classification_report

In [None]:
from embeddings import get_embeddings, compute_similarity

In [None]:
model_paths = [
    # '../models/Alibaba-NLP/gte-Qwen2-7B-instruct',
    '../models/intfloat/e5-base-v2',
    '../models/nomic-ai/nomic-embed-text-v1.5',
    '../models/sentence-transformers/all-MiniLM-L6-v2',
    '../models/sentence-transformers/all-MiniLM-L12-v2'
]

In [None]:
MODEL_PATH = model_paths[0]

## Load models

We will load our model from disk. To download a new sentence transformer model, use `download_sentence_transformer_model.py` in the root of the project

In [None]:
from sentence_transformers import SentenceTransformer

def load_sentence_transformer_model_from_disk(save_directory):
    """
    Loads a pre-trained Sentence Transformer model from disk.

    Args:
    save_directory (str): The directory where the model is saved.

    Returns:
    model: The loaded pre-trained Sentence Transformer model.
    """
    # Load the model
    model = SentenceTransformer(save_directory)
    
    print(f"Sentence Transformer model loaded from {save_directory}")
    return model

In [None]:
model = load_sentence_transformer_model_from_disk(save_directory=MODEL_PATH)

## Get embeddings

In [None]:
embedding_1 = model.encode(['This is a sentence.', 'This is another sentence.'])

In [None]:
embedding_1.shape

## Calculate the similarity between vectors

In [None]:
similarity = cosine_similarity(embedding_1)

In [None]:
similarity

## Import radiology data

In [None]:
df = pd.read_csv('../data/ReportsDATASET.csv')

In [None]:
sample_report = df['Text'][12]

In [None]:
sample_report

In [None]:
sample_label = 'cardiomegaly'

In [None]:
sample_embedding = model.encode([sample_report, sample_label])

In [None]:
cosine_similarity(sample_embedding)

## Compare model on reports

In [None]:
labels = ["pulmonary_edema", "consolidation", "pleural_effusion", "pneumothorax", "cardiomegaly"]

In [None]:
reports = [
    """
    RADIOLOGY REPORT

    Exam
    PA and lateral chest radiograph (2 views) (2 images) Date: XXXX, XXXX at XXXX hours Indication: Chest pain. Comparison: Chest radiograph from XXXX, XXXX. Findings: The cardiac silhouette is borderline enlarged. Otherwise, there is no focal opacity. Mediastinal contours are within normal limits. There is no large pleural effusion. No pneumothorax. Transcribed by - PSCB Transcription Date - XXXX

    IMPRESSION
    Borderline enlargement of the cardiac silhouette without acute pulmonary disease. DICTATED BY : Dr. XXXX XXXX XXXX XXXX XXXX ELECTRONICALLY SIGNED XXXX. XXXX XXXX XXXX XXXX XXXX TRANSCRIBED XXXX 11 XXXX XXXX  RADRES XXXX

    SIGNATURE
    XXXX
    """,
    """
    RADIOLOGY REPORT

    Exam
    PA and lateral chest radiograph (2 views) (2 images) Date: XXXX, XXXX at XXXX hours Indication: Shortness of breath. Comparison: Chest radiograph from XXXX, XXXX. Findings: There is evidence of bilateral pulmonary edema. The cardiac silhouette is normal. No pleural effusion or pneumothorax. Transcribed by - PSCB Transcription Date - XXXX

    IMPRESSION
    Bilateral pulmonary edema. No evidence of pleural effusion or pneumothorax. DICTATED BY : Dr. XXXX XXXX XXXX XXXX XXXX ELECTRONICALLY SIGNED XXXX. XXXX XXXX XXXX XXXX XXXX TRANSCRIBED XXXX 11 XXXX XXXX  RADRES XXXX

    SIGNATURE
    XXXX
    """,
    """
    RADIOLOGY REPORT

    Exam
    PA and lateral chest radiograph (2 views) (2 images) Date: XXXX, XXXX at XXXX hours Indication: Cough and fever. Comparison: Chest radiograph from XXXX, XXXX. Findings: There is consolidation in the right lower lobe. The cardiac silhouette is normal. No pleural effusion or pneumothorax. Transcribed by - PSCB Transcription Date - XXXX

    IMPRESSION
    Right lower lobe consolidation. No pleural effusion or pneumothorax. DICTATED BY : Dr. XXXX XXXX XXXX XXXX XXXX ELECTRONICALLY SIGNED XXXX. XXXX XXXX XXXX XXXX XXXX TRANSCRIBED XXXX 11 XXXX XXXX  RADRES XXXX

    SIGNATURE
    XXXX
    """,
    """
    RADIOLOGY REPORT

    Exam
    PA and lateral chest radiograph (2 views) (2 images) Date: XXXX, XXXX at XXXX hours Indication: Chest pain. Comparison: Chest radiograph from XXXX, XXXX. Findings: There is a small left pleural effusion. The cardiac silhouette is normal. No pneumothorax. Transcribed by - PSCB Transcription Date - XXXX

    IMPRESSION
    Small left pleural effusion. No pneumothorax. DICTATED BY : Dr. XXXX XXXX XXXX XXXX XXXX ELECTRONICALLY SIGNED XXXX. XXXX XXXX XXXX XXXX XXXX TRANSCRIBED XXXX 11 XXXX XXXX  RADRES XXXX

    SIGNATURE
    XXXX
    """,
    """
    RADIOLOGY REPORT

    Exam
    PA and lateral chest radiograph (2 views) (2 images) Date: XXXX, XXXX at XXXX hours Indication: Trauma. Comparison: Chest radiograph from XXXX, XXXX. Findings: There is a right-sided pneumothorax. The cardiac silhouette is normal. No pleural effusion. Transcribed by - PSCB Transcription Date - XXXX

    IMPRESSION
    Right-sided pneumothorax. No pleural effusion. DICTATED BY : Dr. XXXX XXXX XXXX XXXX XXXX ELECTRONICALLY SIGNED XXXX. XXXX XXXX XXXX XXXX XXXX TRANSCRIBED XXXX 11 XXXX XXXX  RADRES XXXX

    SIGNATURE
    XXXX
    """,
    """
    RADIOLOGY REPORT

    Exam
    PA and lateral chest radiograph (2 views) (2 images) Date: XXXX, XXXX at XXXX hours Indication: Shortness of breath and leg swelling. Comparison: Chest radiograph from XXXX, XXXX. Findings: There is moderate pulmonary edema and bilateral pleural effusion. The cardiac silhouette is enlarged. No pneumothorax. Transcribed by - PSCB Transcription Date - XXXX

    IMPRESSION
    Moderate pulmonary edema and bilateral pleural effusion. Cardiomegaly. No pneumothorax. DICTATED BY : Dr. XXXX XXXX XXXX XXXX XXXX ELECTRONICALLY SIGNED XXXX. XXXX XXXX XXXX XXXX XXXX TRANSCRIBED XXXX 11 XXXX XXXX  RADRES XXXX

    SIGNATURE
    XXXX
    """,
    """
    RADIOLOGY REPORT

    Exam
    PA and lateral chest radiograph (2 views) (2 images) Date: XXXX, XXXX at XXXX hours Indication: Fever and cough. Comparison: Chest radiograph from XXXX, XXXX. Findings: There is a consolidation in the left upper lobe. The cardiac silhouette is normal. No pleural effusion or pneumothorax. Transcribed by - PSCB Transcription Date - XXXX

    IMPRESSION
    Left upper lobe consolidation. No pleural effusion or pneumothorax. DICTATED BY : Dr. XXXX XXXX XXXX XXXX XXXX ELECTRONICALLY SIGNED XXXX. XXXX XXXX XXXX XXXX XXXX TRANSCRIBED XXXX 11 XXXX XXXX  RADRES XXXX

    SIGNATURE
    XXXX
    """,
    """
    RADIOLOGY REPORT

    Exam
    PA and lateral chest radiograph (2 views) (2 images) Date: XXXX, XXXX at XXXX hours Indication: Routine check-up. Comparison: Chest radiograph from XXXX, XXXX. Findings: The cardiac silhouette is normal. No focal opacity. Mediastinal contours are within normal limits. There is no pleural effusion or pneumothorax. Transcribed by - PSCB Transcription Date - XXXX

    IMPRESSION
    Normal chest radiograph. No abnormalities detected. DICTATED BY : Dr. XXXX XXXX XXXX XXXX XXXX ELECTRONICALLY SIGNED XXXX. XXXX XXXX XXXX XXXX XXXX TRANSCRIBED XXXX 11 XXXX XXXX  RADRES XXXX

    SIGNATURE
    XXXX
    """,
    """
    RADIOLOGY REPORT

    Exam
    PA and lateral chest radiograph (2 views) (2 images) Date: XXXX, XXXX at XXXX hours Indication: Dyspnea. Comparison: Chest radiograph from XXXX, XXXX. Findings: There is mild cardiomegaly. Bilateral pleural effusions are present. No evidence of pneumothorax. Transcribed by - PSCB Transcription Date - XXXX

    IMPRESSION
    Mild cardiomegaly with bilateral pleural effusions. No pneumothorax. DICTATED BY : Dr. XXXX XXXX XXXX XXXX XXXX ELECTRONICALLY SIGNED XXXX. XXXX XXXX XXXX XXXX XXXX TRANSCRIBED XXXX 11 XXXX XXXX  RADRES XXXX

    SIGNATURE
    XXXX
    """,
    """
    RADIOLOGY REPORT

    Exam
    PA and lateral chest radiograph (2 views) (2 images) Date: XXXX, XXXX at XXXX hours Indication: Trauma. Comparison: Chest radiograph from XXXX, XXXX. Findings: There is a left-sided pneumothorax. The cardiac silhouette is normal. No pleural effusion. Transcribed by - PSCB Transcription Date - XXXX

    IMPRESSION
    Left-sided pneumothorax. No pleural effusion. DICTATED BY : Dr. XXXX XXXX XXXX XXXX XXXX ELECTRONICALLY SIGNED XXXX. XXXX XXXX XXXX XXXX XXXX TRANSCRIBED XXXX 11 XXXX XXXX  RADRES XXXX

    SIGNATURE
    XXXX
    """
]

# Ground Truth Labels for each report
ground_truth = [
    ["cardiomegaly"],
    ["pulmonary_edema"],
    ["consolidation"],
    ["pleural_effusion"],
    ["pneumothorax"],
    ["pulmonary_edema", "pleural_effusion", "cardiomegaly"],
    ["consolidation"],
    [],
    ["cardiomegaly", "pleural_effusion"],
    ["pneumothorax"]
]


In [None]:
def compute_similarity(report_embeddings, label_embeddings):
    """
    Computes cosine similarity between report embeddings and label embeddings.
    """
    return cosine_similarity(report_embeddings, label_embeddings)

def evaluate_model(report_embeddings, label_embeddings, labels, ground_truth):
    """
    Evaluates the model by comparing predicted labels to ground truth.
    """
    similarities = compute_similarity(report_embeddings, label_embeddings)
    predictions = []

    for sim in similarities:
        predicted_labels = []
        for i, score in enumerate(sim):
            if score > 0.5:  # Example threshold, can be tuned
                predicted_labels.append(labels[i])
        predictions.append(predicted_labels)

    y_true = []
    y_pred = []

    for true_labels, pred_labels in zip(ground_truth, predictions):
        y_true.extend([1 if label in true_labels else 0 for label in labels])
        y_pred.extend([1 if label in pred_labels else 0 for label in labels])

    print(y_true, y_pred, labels)
    print(classification_report(y_true, y_pred, target_names=labels))

In [None]:
for model_path in model_paths:
    model = load_sentence_transformer_model_from_disk(model_path)
    report_embeddings = get_embeddings(model, reports)
    label_embeddings = get_embeddings(model, labels)
    
    print(f"Evaluating model: {model_path}")
    evaluate_model(report_embeddings, label_embeddings, labels, ground_truth)