In [1]:
from sentence_transformers import SentenceTransformer, util


def test_sentence_transformer(embedding_model: str) -> None:
    """
    Exercises specified embedding model and calculates scores from the embedding vectors.
    The embedding models will be downloaded automatically to ~/.cache/huggingface/hub, if it does not already exist.
    Used the scores to confirm/compare against those of pgvector's max_inner_product.
    """
    transformer = SentenceTransformer(embedding_model)
    # transformer.save(f"sentence_transformers/{embedding_model}")
    text = "Curiosity inspires creative, innovative communities worldwide."
    embedding = transformer.encode(text)
    print("=== ", embedding_model, len(embedding))

    for query in [
        text,
        "How does curiosity inspire communities?",
        "What's the best pet?",
        "What's the meaning of life?",
    ]:
        query_embedding = transformer.encode(query)
        # Code adapted from https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-cos-v1
        score = util.dot_score(embedding, query_embedding)
        print("Score:", score.item(), "for:", query)


embedding_models = ["multi-qa-mpnet-base-cos-v1", "multi-qa-mpnet-base-dot-v1"]
for model in embedding_models:
    print(model)
    test_sentence_transformer(model)


  from tqdm.autonotebook import tqdm, trange


multi-qa-mpnet-base-cos-v1


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/9.25k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

===  multi-qa-mpnet-base-cos-v1 768
Score: 0.9999998211860657 for: Curiosity inspires creative, innovative communities worldwide.
Score: 0.8557828664779663 for: How does curiosity inspire communities?
Score: 0.06295974552631378 for: What's the best pet?
Score: 0.3046571910381317 for: What's the meaning of life?
multi-qa-mpnet-base-dot-v1


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/212 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/8.71k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

===  multi-qa-mpnet-base-dot-v1 768
Score: 37.346778869628906 for: Curiosity inspires creative, innovative communities worldwide.
Score: 30.06268310546875 for: How does curiosity inspire communities?
Score: 10.358479499816895 for: What's the best pet?
Score: 17.485326766967773 for: What's the meaning of life?
