In [5]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv

In [6]:
load_dotenv()

True

In [7]:
class GoogleEmbeddingFunction:
    """Classe para adaptar GoogleGenerativeAIEmbeddings ao formato esperado pelo LangChain."""
    def __init__(self, model_name="models/text-embedding-004"):
        self.model = GoogleGenerativeAIEmbeddings(model=model_name)

    def embed_documents(self, texts):
        """Gera embeddings para múltiplos documentos."""
        return [self.model.embed_query(text) for text in texts]

    def embed_query(self, text):
        """Gera embeddings para uma única consulta."""
        return self.model.embed_query(text)
    


In [15]:
google = GoogleEmbeddingFunction()

In [37]:

documents = [
    "4. Retrieval and Generation: Retrieve",
    "Now let’s write the actual application logic. We want to create a simple application that takes a user question, searches for documents relevant to that question, passes the retrieved documents and initial question to a model, and returns an answer.",
    "First we need to define our logic for searching over documents. LangChain defines a Retriever interface which wraps an index that can return relevant Documents given a string query.",
    "The most common type of Retriever is the VectorStoreRetriever, which uses the similarity search capabilities of a vector store to facilitate retrieval. Any VectorStore can easily be turned into a Retriever with VectorStore.as_retriever():"
]

In [38]:
embeddings_docs=google.embed_documents(documents)

In [47]:
query = "the most commom type of Retriever and search capabilities"

In [48]:
embedding_query = google.embed_query(query)

In [49]:
def cosine_similarity(vec1, vec2):
    """Calcula a similaridade do cosseno entre dois vetores."""
    vec1 = [float(x) for x in vec1]  # Garante que todos os elementos sejam float
    vec2 = [float(x) for x in vec2]

    dot_product = sum(a * b for a, b in zip(vec1, vec2))
    norm_vec1 = sum(a ** 2 for a in vec1) ** 0.5
    norm_vec2 = sum(b ** 2 for b in vec2) ** 0.5

    if norm_vec1 == 0 or norm_vec2 == 0:
        return 0  # Evita divisão por zero

    return dot_product / (norm_vec1 * norm_vec2)

In [50]:
def compare_embeddings(reference_embedding, embeddings_list):
    """Compara um embedding de referência com uma lista de embeddings e retorna os scores."""
    scores = [cosine_similarity(reference_embedding, emb) for emb in embeddings_list]
    return scores


print(compare_embeddings(embedding_query, embeddings_docs))

[0.5122531594068312, 0.35609404988662946, 0.4548649893726172, 0.5556514022787851]
