In [None]:
from transformers import AutoTokenizer, AutoModel
import torch

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
# Sentences we want sentence embeddings for

In [None]:
# Load model from HuggingFace Hub
tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-small-en-v1.5')
model = AutoModel.from_pretrained('BAAI/bge-small-en-v1.5')
model.to(DEVICE)
model.eval()

In [None]:
# Tokenize sentences
sentences = ["Hello", "Hi"]
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')

# for s2p(short query to long passage) retrieval task, add an instruction to query (not add instruction for passages)
# encoded_input = tokenizer([instruction + q for q in queries], padding=True, truncation=True, return_tensors='pt')

In [None]:
# Compute token embeddings
with torch.inference_mode():
    model_output = model(**encoded_input)
    # Perform pooling. In this case, cls pooling.
    sentence_embeddings = model_output[0][:, 0]

# normalize embeddings
sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1)

In [None]:
print("Sentence embeddings:", sentence_embeddings)