In [64]:
import torch
import torch.nn.functional as F
from transformers import AutoModel, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2").cuda()


# Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0]  # First element of model_output contains token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
        input_mask_expanded.sum(1), min=1e-9
    )


def get_sentence_embedding(sentence: str):
    encoded_input = tokenizer(sentence, padding=True, truncation=True, return_tensors="pt").to("cuda")

    with torch.no_grad():
        model_output = model(**encoded_input)

    sentence_embeddings = mean_pooling(model_output, encoded_input["attention_mask"])
    sentence_embeddings = sentence_embeddings.flatten()
    return F.normalize(sentence_embeddings, p=2, dim=0)

emb1= get_sentence_embedding("Hello, my dog is cute")
print(emb1)

tensor([-1.8234e-02, -3.3270e-02,  1.1110e-01,  8.0928e-02, -6.7070e-02,
        -7.7351e-02,  5.9226e-02, -7.6657e-02,  2.0832e-02,  5.0144e-02,
         2.3562e-02, -2.9504e-02,  2.1711e-02,  2.7174e-02,  5.5318e-02,
        -4.8389e-03,  1.1614e-02, -2.1740e-02, -4.6921e-02, -4.5097e-02,
        -5.8431e-02,  4.9420e-02,  1.0028e-02,  4.4050e-03, -1.1355e-01,
        -1.7065e-02,  6.5421e-02, -1.7996e-02,  3.6328e-02, -2.1533e-02,
        -5.8041e-02,  9.9074e-03,  5.0377e-02,  1.3503e-02, -3.7831e-03,
        -7.2319e-03,  1.5691e-03, -2.5259e-02,  3.2062e-02,  5.8019e-02,
        -7.5156e-03, -7.5619e-03,  1.6762e-02, -5.2282e-02, -3.4996e-02,
        -2.2992e-02, -5.4908e-02, -3.4377e-02,  1.6029e-02, -6.5723e-04,
        -9.3028e-02,  2.1957e-02, -1.2922e-02,  2.3898e-02,  1.6532e-03,
        -6.4742e-03, -1.7111e-02,  3.9563e-03,  5.0674e-02, -7.2320e-03,
        -1.1371e-02,  1.0544e-01,  2.9574e-02, -1.1621e-02,  4.2336e-02,
        -8.1642e-02, -3.3025e-02, -3.4053e-03, -6.0

In [65]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2", device="cuda")
text = "Hello, my dog is cute"

text_embedding = model.encode(text, convert_to_tensor=True)

print(text_embedding.shape)

torch.Size([384])


In [45]:
print((emb1==text_embedding).sum())

tensor(384, device='cuda:0')


Conclusion: method 1 is faster