# Header

In [1]:
from langchain_huggingface import HuggingFaceEmbeddings
from sklearn.metrics.pairwise import cosine_similarity

from rag_project.constants import SENTENCE_TRANSFORMERS_MODEL_NAME

  from .autonotebook import tqdm as notebook_tqdm


| Cenário                           | `model_kwargs`                                                                 | `encode_kwargs`                                                                                  | Observações                                                                 |
|----------------------------------|--------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------|
| **CPU Local (notebook pessoal)** | `{"device": "cpu"}`                                                            | `{"batch_size": 16, "normalize_embeddings": True, "convert_to_numpy": True}`                     | Uso simples, consome menos memória. Mais lento, mas confiável.             |
| **GPU Única (desenvolvimento)**  | `{"device": "cuda:0", "cache_folder": "./.cache"}`                             | `{"batch_size": 32, "normalize_embeddings": True, "convert_to_numpy": True, "show_progress_bar": True}` | Muito mais rápido. Ajustar `batch_size` conforme VRAM disponível.         |
| **Servidor com múltiplas GPUs**  | `{"device": "cuda", "cache_folder": "/mnt/cache", "local_files_only": True}`   | `{"batch_size": 64, "normalize_embeddings": True, "convert_to_numpy": True}`                     | Escalável. Defina GPU via `CUDA_VISIBLE_DEVICES`. Cache compartilhado útil. |
| **Deploy em Produção (API)**     | `{"device": "cpu", "local_files_only": True}`                                  | `{"batch_size": 64, "normalize_embeddings": True, "convert_to_numpy": True, "show_progress_bar": False}` | Remove progress bar. Foca em previsibilidade. Cache local obrigatório.    |
| **Ambiente com pouca RAM (<8GB)**| `{"device": "cpu"}`                                                            | `{"batch_size": 8, "normalize_embeddings": True, "convert_to_numpy": True}`                      | Reduz `batch_size` para evitar *out of memory*. Mais lento, mas estável.   |
| **Treinamento/Fine-tuning**      | `{"device": "cuda:0", "trust_remote_code": True, "cache_folder": "./.cache"}`  | `{"batch_size": 16, "convert_to_tensor": True, "normalize_embeddings": False}`                   | Retorna tensores (PyTorch). Não normaliza embeddings durante treino.       |


In [2]:
def init_embeddings() -> HuggingFaceEmbeddings:
    """Initialize and return HuggingFace embeddings model."""
    model_name = SENTENCE_TRANSFORMERS_MODEL_NAME 
    model_kwargs = {"device": "cpu"}
    encode_kwargs = {
        "batch_size": 8,
        "normalize_embeddings": True,
        "convert_to_numpy": True
    }
    
    return HuggingFaceEmbeddings(
        model_name=model_name,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs
    )

In [3]:
def get_embedding(text: str, model: HuggingFaceEmbeddings) -> list:
    """Get embeddings for a text using HuggingFace model."""
    embeddings = model.embed_documents([text])
    return embeddings[0]

In [4]:
def compare_embeddings(text1: str, text2: str, model: HuggingFaceEmbeddings) -> float:
    """Compare two texts using cosine similarity."""
    embeddings1 = get_embedding(text1, model)
    embeddings2 = get_embedding(text2, model)
    similarity = cosine_similarity([embeddings1], [embeddings2])[0][0]
    return float(similarity)

# Model

In [5]:
SENTENCE_TRANSFORMERS_MODEL_NAME

'sentence-transformers/all-MiniLM-L6-v2'

In [6]:
model = init_embeddings()

# Test Vector

In [7]:
vector = get_embedding("ararara", model)
print(f"Vector for 'ararara': {vector}")
print(f"Vector length: {len(vector)}")

Vector for 'ararara': [-0.027658434584736824, 0.030850345268845558, -0.13303080201148987, 0.037607040256261826, -0.04273787513375282, -0.05145422741770744, 0.07088905572891235, 0.038816697895526886, 0.015591012313961983, -0.02220756560564041, -0.011408737860620022, -0.008057289756834507, -0.07453787326812744, 0.06826901435852051, 0.023302776739001274, -0.0012418633559718728, -0.01842336915433407, 0.051092181354761124, -0.054916270077228546, -0.008841163478791714, -0.015197123400866985, -0.04311760514974594, 0.04897509887814522, -0.04720856249332428, -0.05399173125624657, 0.1062997356057167, 0.04329536110162735, -0.005579659249633551, 0.09255728870630264, -0.09092096239328384, 0.016210751608014107, 0.018388843163847923, 0.07274480909109116, -0.012804645113646984, -0.0682835727930069, 0.05473126843571663, -0.10667318105697632, -0.01116626150906086, -0.01530216820538044, 0.034413717687129974, 0.003748370334506035, 0.03633091598749161, -0.08491870760917664, -0.060378074645996094, -0.121188

In [8]:
words = ("ararara", "guarana")
similarity = compare_embeddings(words[0], words[1], model)
print(f"Comparing ({words[0]}, {words[1]}): {similarity}")

Comparing (ararara, guarana): 0.4554741162008914
