In [8]:
import torch
from transformers import AutoModel, AutoTokenizer
import numpy as np

In [9]:
model_name = 'sentence-transformers/bert-base-nli-mean-tokens'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

In [10]:
def get_sentence_embedding(sentence, tokenizer, model):
    inputs = tokenizer(sentence, return_tensors='pt', padding=True, truncation=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
        # Take the output of the last layer and calculate the average of all tokens as the sentence embedding
        embeddings = outputs.last_hidden_state.mean(dim=1)
    return embeddings.numpy()


In [11]:
# test 
sentences = ["Fetch Rewards is a good company!", "I need this job.", "Deep learning models are powerful."]

for sentence in sentences:
    embedding = get_sentence_embedding(sentence, tokenizer, model)
    print(f"Sentence: {sentence}")
    print("Embedding:", embedding[0][:10], "...")  # display only the first 10 values


Sentence: Fetch Rewards is a good company!
Embedding: [-0.09339077 -0.05491878  1.6760554   0.22114281  0.03420132 -0.6127432
 -0.4563969   0.31616974  0.5116602  -0.22488236] ...
Sentence: I need this job.
Embedding: [ 0.22050771  0.09618075  2.0571465  -0.02566287 -0.1751951   0.36857465
  0.26772994  0.2326076   0.915058   -0.8603219 ] ...
Sentence: Deep learning models are powerful.
Embedding: [-0.30292642 -0.42733595  1.2662299   0.7802493  -0.03143926 -1.0125474
 -0.6239908   0.12163259  1.2622325  -0.6915277 ] ...
