In [2]:
import torch
import faiss
import numpy as np
from langchain.embeddings import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer


In [None]:
# Configuration
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
BATCH_SIZE = 128  
DIMENSION = 384  # Dimension of embeddings from all-MiniLM-L6-v2

# Initialize embedding model on GPU with mixed precision
model = SentenceTransformer(MODEL_NAME, device="cuda")
model = model.half()  # Use FP16 to save memory

def generate_embeddings(texts: list[str]) -> np.ndarray:
    """Generate embeddings on GPU in batches."""
    embeddings = model.encode(
        texts,
        batch_size=BATCH_SIZE,
        convert_to_tensor=True,
        convert_to_numpy=False,
        device="cuda"
    )
    return embeddings.float().cpu().numpy()  # Convert to FP32 CPU numpy array


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [5]:

# Sample documents
documents = [
    "The cat sits on the mat.",
    "A dog is running in the park.",
    "Birds are singing in the trees.",
    # Add your own documents here
]

# Generate embeddings
embeddings_np = generate_embeddings(documents)


In [None]:
# Create FAISS GPU index
res = faiss.StandardGpuResources()
index = faiss.IndexFlatL2(DIMENSION)
gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
gpu_index.add(embeddings_np)

In [None]:
def similarity_search(query: str, k: int = 3) -> tuple[list[int], list[float]]:
    """Search for similar vectors using GPU acceleration."""
    query_embedding = model.encode(
        query,
        convert_to_tensor=True,
        device="cuda"
    ).float().cpu().numpy()
    
    distances, indices = gpu_index.search(
        np.expand_dims(query_embedding, axis=0).astype("float32"), 
        k
    )
    return indices[0].tolist(), distances[0].tolist()

In [None]:
query = "animals resting at home"
indices, distances = similarity_search(query)
print("Most similar indices:", indices)
print("Distances:", distances)

Most similar indices: [0, 2, 1]
Distances: [1.4387826919555664, 1.4573981761932373, 1.582641363143921]
