__sentence-transformers__ is a Python library that provides transformer models trained to generate <u>sentence embeddings</u>

It extends HuggingFace’s Transformers library with pooling strategies, efficient training routines, and loss functions tailored for sentence-level tasks such as:
- semantic search 
- clustering 
- dense retrieval 
- paraphrase detection

In [None]:
%pip install -U sentence-transformers

In [None]:
%pip install puccinialin

In [7]:
%pip install faiss-cpu

Collecting faiss-cpu
  Using cached faiss_cpu-1.11.0.tar.gz (70 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting numpy<3.0,>=1.25.0 (from faiss-cpu)
  Using cached numpy-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl.metadata (62 kB)
Using cached numpy-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl (21.2 MB)
Building wheels for collected packages: faiss-cpu
  Building wheel for faiss-cpu (pyproject.toml) ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mBuilding wheel for faiss-cpu [0m[1;32m([0m[32mpyproject.toml[0m[1;32m)[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[105 lines of output][0m
  [31m   [0m running bdist_wheel
  [31m   [0m running build
  [31m   [0m running build_py
  [31m   [0m running build_ext
  [31m   [0m building 'faiss._swigfaiss' extension
 

In [None]:
from sentence_transformers import SentenceTransformer

import faiss
import numpy as np

# 1. Load a pre-trained transformer b

model = SentenceTransformer('all-MiniLM-L6-v2')

# 2. Example corpus of documents
corpus = [
    "The Eiffel Tower is located in Paris.",
    "Machine learning enables computers to learn from data.",
    "Python is a popular programming language.",
    "Paris is the capital of France.",
    "Transformers are powerful models for NLP tasks."
]

# 3. Encode the corpus into dense vectors
corpus_embeddings = model.encode(corpus, convert_to_numpy=True, show_progress_bar=True)

# 4. Build a FAISS index
dimension = corpus_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)  # Use L2 or cosine similarity (normalized vectors)
index.add(corpus_embeddings)

# 5. Encode the user query
query = "What city is the Eiffel Tower in?"
query_embedding = model.encode([query], convert_to_numpy=True)

# 6. Search for top-k most similar documents
top_k = 3
distances, indices = index.search(query_embedding, top_k)

# 7. Print the results
print(f"\nQuery: {query}")
print("Top results:")
for i, idx in enumerate(indices[0]):
    print(f"{i+1}. {corpus[idx]} (distance: {distances[0][i]:.4f})")
