## General batching

### Sentence Transformers

In [1]:
import os
os.environ["HF_HOME"] = "../.cache"
from sentence_transformers import SentenceTransformer
from concurrent.futures import ThreadPoolExecutor
# Load your model
model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
model.encode("Hello, world!")

  from tqdm.autonotebook import tqdm, trange


README.md:   0%|          | 0.00/114k [00:00<?, ?B/s]

array([ 0.30596843,  0.7907292 ,  0.00980733, ...,  0.0644001 ,
       -0.45898223, -0.01831897], dtype=float32)

In [2]:
def simple_encode(text: list[str]):
    return model.encode(text)

In [3]:
import timeit

def run_benchmark():
    with ThreadPoolExecutor(max_workers=128) as executor:
        list(executor.map(simple_encode, [["Hello, world!"] for _ in range(1000)]))

# Run the benchmark 1k requests
execution_time = timeit.timeit(run_benchmark, number=1)
print(f"Execution time: {execution_time:.2f} seconds")

Execution time: 20.67 seconds


In [4]:
import batched
import timeit

dynamic_encode = batched.dynamically(simple_encode, batch_size=128)

def run_benchmark():
    with ThreadPoolExecutor(max_workers=128) as executor:
        list(executor.map(dynamic_encode, [["Hello, world!"] for _ in range(10000)]))


# Run the benchmark for 10k requests
execution_time = timeit.timeit(run_benchmark, number=1)
print(f"Execution time: {execution_time:.2f} seconds")

Execution time: 6.10 seconds
