In [1]:
from typing import Dict
import numpy as np
from transformers import AutoModel, AutoTokenizer

def transform_query(query: str) -> str:
    return f'Represent this sentence for searching relevant passages: {query}'

def pooling_np(outputs, attention_mask, strategy='cls'):
    if strategy == 'cls':
        # Taking the first token (CLS token) for each sequence
        return outputs[:, 0]
    elif strategy == 'mean':
        # Applying attention mask and computing mean pooling
        outputs_masked = outputs * attention_mask[:, :, None]
        return np.sum(outputs_masked, axis=1) / np.sum(attention_mask, axis=1)[:, None]
    else:
        raise NotImplementedError

def cos_sim_np(a, b):
    dot_product = np.dot(a, b.T)
    norm_a = np.linalg.norm(a, axis=1, keepdims=True)
    norm_b = np.linalg.norm(b, axis=1)
    return dot_product / (norm_a * norm_b)

# Load the model and tokenizer
model_id = 'mixedbread-ai/mxbai-embed-large-v1'
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModel.from_pretrained(model_id)  # Running on CPU

# Example documents
docs = [transform_query('A man is eating a piece of bread')] + [
    "A man is eating food.",
    "A man is eating pasta.",
    "The girl is carrying a baby.",
    "A man is riding a horse.",
]

# Tokenize and process with the model
inputs = tokenizer(docs, padding=True, return_tensors='pt')
outputs = model(**inputs).last_hidden_state.detach().numpy()  # Convert to NumPy array
attention_mask = inputs['attention_mask'].numpy()  # Convert attention mask to NumPy array

# Pool embeddings using NumPy
embeddings = pooling_np(outputs, attention_mask, 'cls')

# Calculate cosine similarities with NumPy
similarities_np = cos_sim_np(embeddings[0:1], embeddings[1:])
print('Similarities:', similarities_np)


  from .autonotebook import tqdm as notebook_tqdm


Similarities: [[0.791958   0.63692826 0.16512099 0.3620784 ]]


In [5]:
from typing import Dict

import torch
import numpy as np
from transformers import AutoModel, AutoTokenizer
from sentence_transformers.util import cos_sim

# For retrieval you need to pass this prompt. Please find our more in our blog post.
def transform_query(query: str) -> str:
    """ For retrieval, add the prompt for query (not for documents).
    """
    return f'Represent this sentence for searching relevant passages: {query}'

# The model works really well with cls pooling (default) but also with mean poolin.
def pooling(outputs: torch.Tensor, inputs: Dict,  strategy: str = 'cls') -> np.ndarray:
    if strategy == 'cls':
        outputs = outputs[:, 0]
    elif strategy == 'mean':
        outputs = torch.sum(
            outputs * inputs["attention_mask"][:, :, None], dim=1) / torch.sum(inputs["attention_mask"])
    else:
        raise NotImplementedError
    return outputs.detach().cpu().numpy()

# 1. load model
model_id = 'mixedbread-ai/mxbai-embed-large-v1'
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModel.from_pretrained(model_id)


docs = [
    transform_query('A man is eating a piece of bread'),
    "A man is eating food.",
    "A man is eating pasta.",
    "The girl is carrying a baby.",
    "A man is riding a horse.",
]

# 2. encode
inputs = tokenizer(docs, padding=True, return_tensors='pt')
for k, v in inputs.items():
    inputs[k] = v
outputs = model(**inputs).last_hidden_state
embeddings = pooling(outputs, inputs, 'cls')

similarities = cos_sim(embeddings[0], embeddings[1:])
print('similarities:', similarities)


similarities: tensor([[0.7920, 0.6369, 0.1651, 0.3621]])


In [8]:
from sentence_transformers import SentenceTransformer
from sentence_transformers.quantization import quantize_embeddings

# 1. Load an embedding model
model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")

# 2. Encode some text and select MRL dimensions
mrl_embeddings = model.encode(
    ["Who is german and likes bread?", "Everybody in Germany."], normalize_embeddings=True)[..., :512] 

# 3. Apply binary quantization
binary_embeddings = quantize_embeddings(mrl_embeddings, precision="binary")
print(binary_embeddings)
print(binary_embeddings.shape)

[[ -41 -118 -100  111   42    0   58  -50  114 -124   59   51  109  -44
     5  -67   41  -39  100 -120  -41   48   33   89  -10   -5  -94  -42
   -48  -37  -11  100  -21   96   27  127  -27  -49  -84   29 -108   70
   103  -37  118   81    4   42   80    8   63   55   -9   30 -118  -19
    45   39  -36   61 -120  -86   -6   43]
 [ -46  -88 -120  124   10   38   54  -62  106   22   27   -5   93  -40
    53  -15 -127   77   86   77   -6  -33   55 -126   84   -5   62  -58
   -40  -54   38   29   70  -40   15  124   21  -50   -3   29  -94    4
   -29 -109   86  -45   36  111  114 -118  127   31  -67 -114 -120   73
    46  -10  -36  101    0  115  -52    2]]
(2, 64)


In [16]:
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim

# 1. load model
model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")

# For retrieval you need to pass this prompt.
query = 'Represent this sentence for searching relevant passages: A man is eating a piece of bread'

docs = [
    "A man is eating food.",
    "A man is eating pasta.",
    "The girl is carrying a baby.",
    "A man is riding a horse.",
]

docs_plus = [query] + docs
print(docs_plus)

# 2. Encode
docs_embeddings = model.encode(docs, normalize_embeddings=True)[..., :512]
query_embedding = model.encode(query, normalize_embeddings=True)[..., :512]

docs_binary_embeds = np.packbits(docs_embeddings > 0, axis=-1)
query_binary_embed = np.packbits(query_embedding > 0, axis=-1)

hamming_distances = np.sum(docs_binary_embeds != query_binary_embed, axis=1)
max_distance = docs_binary_embeds.shape[1] * 8  # Maximum possible Hamming distance
scores = 1 - hamming_distances / max_distance
# docs_embeddings = model.encode(docs, normalize_embeddings=True)[..., :512] 
# query_embedding = model.encode(query, normalize_embeddings=True)[..., :512]

# docs_binary_embeds = (np.packbits(docs_embeddings > 0).reshape(docs_embeddings.shape[0], -1) - 128).astype(np.int8)
# query_binary_embed = (np.packbits(query_embedding > 0).reshape(query_embedding.shape[0], -1) - 128).astype(np.int8)

# 3. calculate similarities with hamming distance between binary embeddings
# Calculate the Hamming distances
# hamming_distances = np.sum(docs_binary_embeds != query_binary_embed, axis=1)

# Convert Hamming distances to similarities (optional)
# similarities = -hamming_distances

print('Hamming distances:', hamming_distances)
print('Normalized scores:', scores)

#use transform_query with binary embeddings
docsplus_embeddings = model.encode(docs_plus, normalize_embeddings=True)[..., :512]
docsplus_binary_embeds = np.packbits(docsplus_embeddings > 0, axis=-1)
hamming_distances = np.sum(docsplus_binary_embeds[1:] != docsplus_binary_embeds[0], axis=1)
max_distance = docsplus_binary_embeds.shape[1] * 8  # Maximum possible Hamming distance
scores = 1 - hamming_distances / max_distance

print('binplus Hamming distances:', hamming_distances)
print('binplus Normalized scores:', scores)

# USE NORMAL EMBS
docs_int_embeddings = model.encode(docs)
query_int_embedding = model.encode(query)

similarities = cos_sim(query_int_embedding, docs_int_embeddings)
print('int similarities:', similarities)

# USE WITH QUERY   
querydocs_embedding = model.encode(docs_plus)
similarities = cos_sim(querydocs_embedding[0], querydocs_embedding[1:])
print('query_docs similarities:', similarities)


['Represent this sentence for searching relevant passages: A man is eating a piece of bread', 'A man is eating food.', 'A man is eating pasta.', 'The girl is carrying a baby.', 'A man is riding a horse.']
Hamming distances: [57 62 63 64]
Normalized scores: [0.88867188 0.87890625 0.87695312 0.875     ]
binplus Hamming distances: [57 62 63 64]
binplus Normalized scores: [0.88867188 0.87890625 0.87695312 0.875     ]
int similarities: tensor([[0.7920, 0.6369, 0.1651, 0.3621]])
query_docs similarities: tensor([[0.7920, 0.6369, 0.1651, 0.3621]])
