In [None]:
import sys
sys.path.append('..') 

In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
from embeddings import get_embeddings, compute_similarity

In [None]:
MODEL_PATH = '../models/intfloat/e5-base-v2'

## Load models

We will load our model from disk. To download a new sentence transformer model, use `download_sentence_transformer_model.py` in the root of the project

In [None]:
from sentence_transformers import SentenceTransformer

def load_sentence_transformer_model_from_disk(save_directory):
    """
    Loads a pre-trained Sentence Transformer model from disk.

    Args:
    save_directory (str): The directory where the model is saved.

    Returns:
    model: The loaded pre-trained Sentence Transformer model.
    """
    # Load the model
    model = SentenceTransformer(save_directory)
    
    print(f"Sentence Transformer model loaded from {save_directory}")
    return model

In [None]:
model = load_sentence_transformer_model_from_disk(save_directory=MODEL_PATH)

## Get embeddings

In [None]:
embedding_1 = model.encode(['This is a sentence.', 'This is another sentence.'])

In [None]:
embedding_1.shape

## Calculate the similarity between vectors

In [None]:
similarity = cosine_similarity(embedding_1)

In [None]:
similarity

## Import radiology data

In [None]:
df = pd.read_csv('../data/ReportsDATASET.csv')

In [None]:
sample_report = df['Text'][12]

In [None]:
sample_report

In [None]:
sample_label = 'cardiomegaly'

In [None]:
sample_embedding = model.encode([sample_report, sample_label])

In [None]:
cosine_similarity(sample_embedding)