In [11]:
import batch


class TextProcessor:
    def __init__(self):
        self.processed_count = 0

    @batch.dynamically(batch_size=32, timeout_ms=10.0)
    def process_text(self, texts: list[str]) -> list[str]:
        # Simulate some processing time
        time.sleep(0.1)
        
        # Convert texts to uppercase and add a counter
        results = []
        for text in texts:
            self.processed_count += 1
            results.append(f"{text.upper()} (Processed: {self.processed_count})")
        
        return results
        

# Create an instance of TextProcessor
processor = TextProcessor()

# Process single items
result1 = processor.process_text("Hello, world!")
result2 = processor.process_text("Python is awesome")

print("Single item results:")
print(result1)
print(result2)

# Process a batch of items
batch_texts = [f"Text {i}" for i in range(100)]
batch_results = processor.process_text(batch_texts)

print("\nBatch processing results (first 5):")
for result in batch_results[:5]:
    print(result)

# Check the statistics
print("\nBatch processing statistics:")
print(processor.process_text.stats)


Single item results:
HELLO, WORLD! (Processed: 1)
PYTHON IS AWESOME (Processed: 2)

Batch processing results (first 5):
TEXT 0 (Processed: 3)
TEXT 1 (Processed: 101)
TEXT 2 (Processed: 4)
TEXT 3 (Processed: 102)
TEXT 4 (Processed: 100)

Batch processing statistics:
BatchProcessorStats(queue_size=0, total_processed=102, total_batches=6, avg_batch_size=17.0, avg_processing_time=0.10255853335062663)


In [12]:
from sentence_transformers import SentenceTransformer
import numpy as np
import time

class SentenceEmbedder:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)

    @batch.dynamically(batch_size=32, timeout_ms=50.0)
    def embed_sentences(self, sentences: list[str]) -> list[np.ndarray]:
        # Convert sentences to embeddings
        embeddings = self.model.encode(sentences)
        return [embedding for embedding in embeddings]

# Create an instance of SentenceEmbedder
embedder = SentenceEmbedder()

# Embed single sentences
single_sentence1 = "This is a test sentence."
single_sentence2 = "Another example sentence."
embedding1 = embedder.embed_sentences(single_sentence1)
embedding2 = embedder.embed_sentences(single_sentence2)

print("Single sentence embedding shapes:")
print(f"Embedding 1 shape: {embedding1.shape}")
print(f"Embedding 2 shape: {embedding2.shape}")

# Embed a batch of 1000 sentences
batch_sentences = [f"This is test sentence number {i}." for i in range(1000)]

start_time = time.time()
batch_embeddings = embedder.embed_sentences(batch_sentences)
end_time = time.time()

print("\nBatch embedding shapes:")
for i, embedding in enumerate(batch_embeddings[:5]):  # Print only first 5 for brevity
    print(f"Embedding {i+1} shape: {embedding.shape}")

print(f"\nTime taken to embed 1000 sentences: {end_time - start_time:.4f} seconds")

# Check the statistics
print("\nBatch processing statistics:")
print(embedder.embed_sentences.stats)


Single sentence embedding shapes:
Embedding 1 shape: (384,)
Embedding 2 shape: (384,)

Batch embedding shapes:
Embedding 1 shape: (384,)
Embedding 2 shape: (384,)
Embedding 3 shape: (384,)
Embedding 4 shape: (384,)
Embedding 5 shape: (384,)

Time taken to embed 1000 sentences: 0.5730 seconds

Batch processing statistics:
BatchProcessorStats(queue_size=0, total_processed=1002, total_batches=34, avg_batch_size=29.470588235294116, avg_processing_time=0.017600543358746695)
