In [7]:
from transformers import AutoTokenizer
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings, LlamaCppEmbeddings, OpenAIEmbeddings

model_name = "intfloat/multilingual-e5-large" #"thenlper/gte-large" #"menadsa/S-BioELECTRA"
# Get embeddings.

embeddings = HuggingFaceEmbeddings(model_name=model_name)
#embeddings = LlamaCppEmbeddings(model_path = "/home/antonkulaga/sources/longevity-genie/data/models/stablebeluga-13b.ggmlv3.q2_K.bin")
texts = [
    "Basquetball is a great sport.",
    "Fly me to the moon is one of my favourite songs.",
    "The Celtics are my favourite team.",
    "This is a document about the Boston Celtics",
    "I simply love going to the movies",
    "The Boston Celtics won the game by 20 points",
    "This is just a random text.",
    "Elden Ring is one of the best games in the last 15 years.",
    "L. Kornet is one of the best Celtics players.",
    "Larry Bird was an iconic NBA player.",
]
# Create a retriever
retriever = Chroma.from_texts(texts, embedding=embeddings).as_retriever(
    search_kwargs={"k": 10}
)
query = "What can you tell me about the Celtics?"

# Get relevant documents ordered by relevance score
docs = retriever.get_relevant_documents(query)
docs

Downloading (…)8d1f6/.gitattributes:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/201 [00:00<?, ?B/s]

Downloading (…)5ae7c8d1f6/README.md:   0%|          | 0.00/160k [00:00<?, ?B/s]

Downloading (…)e7c8d1f6/config.json:   0%|          | 0.00/690 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

Downloading (…)1f6/onnx/config.json:   0%|          | 0.00/688 [00:00<?, ?B/s]

Downloading model.onnx:   0%|          | 0.00/546k [00:00<?, ?B/s]

Downloading model.onnx_data:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

Downloading (…)tencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/418 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

Downloading (…)tencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/418 [00:00<?, ?B/s]

Downloading (…)7c8d1f6/modules.json:   0%|          | 0.00/387 [00:00<?, ?B/s]

[Document(page_content='This is a document about the Boston Celtics', metadata={}),
 Document(page_content='The Celtics are my favourite team.', metadata={}),
 Document(page_content='The Boston Celtics won the game by 20 points', metadata={}),
 Document(page_content='L. Kornet is one of the best Celtics players.', metadata={}),
 Document(page_content='Basquetball is a great sport.', metadata={}),
 Document(page_content='Elden Ring is one of the best games in the last 15 years.', metadata={}),
 Document(page_content='This is just a random text.', metadata={}),
 Document(page_content='Fly me to the moon is one of my favourite songs.', metadata={}),
 Document(page_content='I simply love going to the movies', metadata={}),
 Document(page_content='Larry Bird was an iconic NBA player.', metadata={})]

In [14]:
text = "This is a sentence with 10 tokens. Can we find more?"
tokenizer = AutoTokenizer.from_pretrained("thenlper/gte-large")
len(tokenizer.tokenize(text))

14

Compute speed

In [8]:
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel

def average_pool(last_hidden_states: Tensor,
                 attention_mask: Tensor) -> Tensor:
    last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
    return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]

input_texts = [
    "what is the capital of China?",
    "how to implement quick sort in python?",
    "Beijing",
    "sorting algorithms"
]

tokenizer = AutoTokenizer.from_pretrained("thenlper/gte-large")
model = AutoModel.from_pretrained("thenlper/gte-large")

# Tokenize the input texts
batch_dict = tokenizer(input_texts, max_length=512, padding=True, truncation=True, return_tensors='pt')

outputs = model(**batch_dict)
embeddings = average_pool(outputs.last_hidden_state, batch_dict['attention_mask'])


Downloading (…)lve/main/config.json:   0%|          | 0.00/619 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/670M [00:00<?, ?B/s]

In [9]:
import time

def compute_embeddings(input_texts: list[str]):
    start_time = time.perf_counter()  # Start the timer

    # Tokenize the input texts
    batch_dict = tokenizer(input_texts, max_length=512, padding=True, truncation=True, return_tensors='pt')

    outputs = model(**batch_dict)
    embeddings = average_pool(outputs.last_hidden_state, batch_dict['attention_mask'])

    end_time = time.perf_counter()  # End the timer

    execution_time = end_time - start_time  # Calculate the execution time

    return execution_time

In [10]:
compute_embeddings(input_texts)

0.2508821910014376

In [23]:
from langchain.text_splitter import CharacterTextSplitter
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("thenlper/gte-large")
text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(tokenizer, chunk_size=512, chunk_overlap=0)

texts = text_splitter.split_text(text)
texts

['This is a sentence with 10 tokens. Can we find more?']

In [12]:
import sentence_transformers as st
# Load the sentence transformer model
model = st.SentenceTransformer()

# Get the text
text = "This is a sentence with 10 tokens. Can we find more?"

# Tokenize the text
tokens = model.tokenize(text)

# Count the number of tokens
token_count = len(tokens)

# Print the number of tokens
print(f"The number of tokens in the text is {token_count}")

AttributeError: 'NoneType' object has no attribute 'tokenize'

In [14]:
# Create a retriever
retriever = Chroma.from_texts(texts, embedding=embeddings).as_retriever(
    search_kwargs={"k": 10}
)
query = "What can you tell me about the Celtics?"

# Get relevant documents ordered by relevance score
docs = retriever.get_relevant_documents(query)
docs


llama_print_timings:        load time =  5457.98 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =  5747.48 ms /     8 tokens (  718.44 ms per token,     1.39 tokens per second)
llama_print_timings:        eval time =  1103.68 ms /     1 runs   ( 1103.68 ms per token,     0.91 tokens per second)
llama_print_timings:       total time =  6868.26 ms

llama_print_timings:        load time =  5457.98 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time = 10178.43 ms /    15 tokens (  678.56 ms per token,     1.47 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time = 10186.59 ms

llama_print_timings:        load time =  5457.98 ms
llama_print_timings:   

[Document(page_content='The Celtics are my favourite team.', metadata={}),
 Document(page_content='L. Kornet is one of the best Celtics players.', metadata={}),
 Document(page_content='Basquetball is a great sport.', metadata={}),
 Document(page_content='Fly me to the moon is one of my favourite songs.', metadata={}),
 Document(page_content='Larry Bird was an iconic NBA player.', metadata={}),
 Document(page_content='This is just a random text.', metadata={}),
 Document(page_content='Elden Ring is one of the best games in the last 15 years.', metadata={}),
 Document(page_content='I simply love going to the movies', metadata={}),
 Document(page_content='This is a document about the Boston Celtics', metadata={}),
 Document(page_content='The Boston Celtics won the game by 20 points', metadata={})]

In [15]:
retriever = Chroma.from_texts(texts, embedding=embeddings).as_retriever(
    search_kwargs={"k": 10}
)


llama_print_timings:        load time =  5457.98 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =  2510.96 ms /     8 tokens (  313.87 ms per token,     3.19 tokens per second)
llama_print_timings:        eval time =   338.84 ms /     1 runs   (  338.84 ms per token,     2.95 tokens per second)
llama_print_timings:       total time =  2852.88 ms

llama_print_timings:        load time =  5457.98 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =  5234.82 ms /    15 tokens (  348.99 ms per token,     2.87 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =  5237.32 ms

llama_print_timings:        load time =  5457.98 ms
llama_print_timings:   