In [1]:
from sentence_transformers import SentenceTransformer
from sentence_transformers.evaluation import BinaryClassificationEvaluator
from datasets import load_dataset

# Load a model
model = SentenceTransformer('all-mpnet-base-v2')

# Load a dataset with two text columns and a class label column (https://huggingface.co/datasets/sentence-transformers/quora-duplicates)
eval_dataset = load_dataset("sentence-transformers/quora-duplicates", "pair-class", split="train[-1000:]")

# Initialize the evaluator
binary_acc_evaluator = BinaryClassificationEvaluator(
    sentences1=eval_dataset["sentence1"],
    sentences2=eval_dataset["sentence2"],
    labels=eval_dataset["label"],
    name="quora-duplicates-dev",
)
results = binary_acc_evaluator(model)
'''
Binary Accuracy Evaluation of the model on the quora-duplicates-dev dataset:
Accuracy with Cosine-Similarity:           81.60    (Threshold: 0.8352)
F1 with Cosine-Similarity:                 75.27    (Threshold: 0.7715)
Precision with Cosine-Similarity:          65.81
Recall with Cosine-Similarity:             87.89
Average Precision with Cosine-Similarity:  76.03

Accuracy with Dot-Product:           81.60  (Threshold: 0.8352)
F1 with Dot-Product:                 75.27  (Threshold: 0.7715)
Precision with Dot-Product:          65.81
Recall with Dot-Product:             87.89
Average Precision with Dot-Product:  76.03

Accuracy with Manhattan-Distance:           81.50   (Threshold: 12.0727)
F1 with Manhattan-Distance:                 74.97   (Threshold: 15.2269)
Precision with Manhattan-Distance:          63.89
Recall with Manhattan-Distance:             90.68
Average Precision with Manhattan-Distance:  75.66

Accuracy with Euclidean-Distance:           81.60   (Threshold: 0.5741)
F1 with Euclidean-Distance:                 75.27   (Threshold: 0.6760)
Precision with Euclidean-Distance:          65.81
Recall with Euclidean-Distance:             87.89
Average Precision with Euclidean-Distance:  76.03
'''
print(binary_acc_evaluator.primary_metric)
# => "quora-duplicates-dev_max_ap"
print(results[binary_acc_evaluator.primary_metric])
# => 0.760277070888393

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/4.78k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/35.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/404290 [00:00<?, ? examples/s]

NotImplementedError: The operator 'aten::cumsum.out' is not current implemented for the MPS device. If you want this op to be added in priority during the prototype phase of this feature, please comment on https://github.com/pytorch/pytorch/issues/77764. As a temporary fix, you can set the environment variable `PYTORCH_ENABLE_MPS_FALLBACK=1` to use the CPU as a fallback for this op. WARNING: this will be slower than running natively on MPS.

In [2]:
from sentence_transformers import CrossEncoder
import torch

model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", default_activation_function=torch.nn.Sigmoid())
scores = model.predict([
    ("How many people live in Berlin?", "Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers."),
    ("How many people live in Berlin?", "Berlin is well known for its museums."),
])
# => array([0.9998173 , 0.01312432], dtype=float32)

config.json:   0%|          | 0.00/794 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [3]:
scores

array([0.9998173 , 0.01312433], dtype=float32)

In [4]:
scores = model.predict([
    (Dec, "Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers."),
    ("How many people live in Berlin?", "Berlin is well known for its museums."),
])
print(scores)

[0.9998173  0.01312433]


In [5]:
# Sample sentence
sentences = ["The data appeared to be corrupted",
                        "We used statistical analysis to find a good value",
                        "The line was straight", "  "]


test = "I learned how to apply regression models to data."
print('Test sentence:',test)

test_vec = model.predict([test])[0]


for sent in sentences:
        similarity_score = 1-distance.cosine(test_vec, model.encode([sent])[0])
        print(f'\nFor {sent}\nSimilarity Score = {similarity_score} ')

Test sentence: I learned how to apply regression models to data.


IndexError: invalid index to scalar variable.