# Evaluation of the cosine similarities computed by  different types of models

In [None]:
from sentence_transformers import SentenceTransformer, util

## Loading models
*requires setting the HF_TOKEN secret*

In [None]:
# Sentence Transformer
sent_model = SentenceTransformer("all-MiniLM-L6-v2")
# SimCSE unsupervised
unsup_simcse = SentenceTransformer("princeton-nlp/unsup-simcse-roberta-base")
# SimCSE supervised
sup_simcse = SentenceTransformer("princeton-nlp/sup-simcse-roberta-base")
# E5
e5_model = SentenceTransformer('intfloat/multilingual-e5-base')

## Initializations

In [None]:
dict_models = {
    "sent_model": sent_model,
    "unsup_simcse": unsup_simcse,
    "sup_simcse": sup_simcse,
    "e5_model": e5_model
}

dict_model_labels = {
    "sent_model": "Classical SentenceTransformer",
    "unsup_simcse": "SimCSE - unsupervised",
    "sup_simcse": "SimCSE - supervised",
    "e5_model": "E5"
}

list_corpus = [
    "A man is slicing a cake carefully with a knife.",
    "Someone cuts a dessert with precision.",
    "A woman is slicing a cake.",
    "Elle découpe soigneusement la tarte en parts égales."
]

## Embedding corpus

In [None]:
# Corpus encoding for all models
dict_corpus_embeddings = {
    name: model.encode(list_corpus, convert_to_tensor=True)
    for name, model in dict_models.items()
}

## Embedding query

In [None]:
query = "Someone cuts a dessert with a knife."

# Query encoding for all models
dict_query_embeddings = {
    name: model.encode(query, convert_to_tensor=True)
    for name, model in dict_models.items()
}

## Processing

In [None]:
# Semantic search for all models
dict_hits = {
    name: util.semantic_search(emb, dict_corpus_embeddings[name])[0]
    for name, emb in dict_query_embeddings.items()
}

## Displaying results

In [None]:
print(f"\nQuery: {query}\n\nCosine similarity for each model:")

for name, hit_list in dict_hits.items():
    print(f"\n{dict_model_labels.get(name, name)}:")
    for hit in hit_list:
        print(f"  {hit['score']:.3f} - {list_corpus[hit['corpus_id']]}")