https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2

In [4]:
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F

def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] 
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)


# Sentences we want sentence embeddings for
sentences = ['This is an example sentence', 'This is an instance of a sentence']

# Load model from HuggingFace Hub
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L12-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L12-v2')

# Tokenize sentences
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')

# Compute token embeddings
with torch.no_grad():
    model_output = model(**encoded_input)

# Perform pooling
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])

# Normalize embeddings
#sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)




2024-03-15 06:31:32.134548: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
class Embedder():
    def __init__(self, model_name, tokenizer_name):
        
        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
        self.model_name = AutoModel.from_pretrained(model_name)
        
    def _mean_pooling(model_output, attention_mask):
        
        token_embeddings = model_output[0] 
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        
        return (torch.sum(token_embeddings * input_mask_expanded, 1) / 
                        torch.clamp(input_mask_expanded.sum(1), min=1e-9))
        
    def compute_embeddings(self, sentences):
        
        encoded_input = self.tokenizer(sentences, padding=True, 
                                  truncation=True, return_tensors='pt')
        with torch.no_grad():
            model_output = model(**encoded_input)
            
        sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
        sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
        
        return sentence_embeddings
        

        
        


In [6]:
embedder = Embedder(model_name='sentence-transformers/all-MiniLM-L12-v2',
                    tokenizer_name='sentence-transformers/all-MiniLM-L12-v2')

In [8]:
embedder.compute_embeddings(sentences)[0]

tensor([-2.0270e-04,  8.1480e-02,  3.1362e-02,  2.9206e-03,  2.6156e-02,
         2.9074e-02,  7.8262e-02, -1.8042e-03,  1.0134e-01, -4.5171e-02,
         5.8435e-02, -1.5320e-02,  5.4996e-02, -9.8643e-02, -3.5025e-02,
         8.4567e-03,  1.5861e-02,  1.0563e-02, -3.4271e-02, -4.7506e-03,
         9.9902e-02, -2.0602e-02, -4.4784e-02,  3.1214e-02, -1.1924e-02,
        -5.1502e-02, -1.3361e-02,  1.8962e-02,  9.7681e-02, -5.4411e-02,
        -3.4331e-02,  8.1291e-02,  4.8812e-02, -1.1028e-02,  2.1352e-02,
         1.2719e-02, -1.4397e-02,  3.6286e-02, -7.6123e-02,  3.2329e-02,
         2.0810e-02, -4.2202e-02,  9.1291e-02,  2.0853e-02, -3.0802e-02,
        -8.3851e-02,  1.3089e-02, -3.0063e-02,  4.1123e-02, -1.2750e-01,
        -7.7803e-02, -3.9341e-02,  1.5260e-03, -2.8011e-02,  3.4166e-02,
         1.4671e-02, -7.7165e-02,  1.6362e-01,  4.1129e-02, -5.2446e-02,
        -4.1877e-02,  1.8053e-02, -1.3892e-02, -3.6819e-02,  6.9498e-02,
        -2.5709e-02,  3.5855e-02,  2.1019e-02, -3.8