In [1]:
from typing import Dict
import numpy as np
from transformers import AutoModel, AutoTokenizer

def transform_query(query: str) -> str:
    return f'Represent this sentence for searching relevant passages: {query}'

def pooling_np(outputs, attention_mask, strategy='cls'):
    if strategy == 'cls':
        # Taking the first token (CLS token) for each sequence
        return outputs[:, 0]
    elif strategy == 'mean':
        # Applying attention mask and computing mean pooling
        outputs_masked = outputs * attention_mask[:, :, None]
        return np.sum(outputs_masked, axis=1) / np.sum(attention_mask, axis=1)[:, None]
    else:
        raise NotImplementedError

def cos_sim_np(a, b):
    dot_product = np.dot(a, b.T)
    norm_a = np.linalg.norm(a, axis=1, keepdims=True)
    norm_b = np.linalg.norm(b, axis=1)
    return dot_product / (norm_a * norm_b)

# Load the model and tokenizer
model_id = 'mixedbread-ai/mxbai-embed-large-v1'
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModel.from_pretrained(model_id)  # Running on CPU

# Example documents
docs = [transform_query('A man is eating a piece of bread')] + [
    "A man is eating food.",
    "A man is eating pasta.",
    "The girl is carrying a baby.",
    "A man is riding a horse.",
]

# Tokenize and process with the model
inputs = tokenizer(docs, padding=True, return_tensors='pt')
outputs = model(**inputs).last_hidden_state.detach().numpy()  # Convert to NumPy array
attention_mask = inputs['attention_mask'].numpy()  # Convert attention mask to NumPy array

# Pool embeddings using NumPy
embeddings = pooling_np(outputs, attention_mask, 'cls')

# Calculate cosine similarities with NumPy
similarities_np = cos_sim_np(embeddings[0:1], embeddings[1:])
print('Similarities:', similarities_np)


  from .autonotebook import tqdm as notebook_tqdm


Similarities: [[0.7919583  0.6369279  0.16512007 0.36207786]]
