Skip to content

Commit

Permalink
feat: Updated agents-api/agents_api/models/docs/em
Browse files Browse the repository at this point in the history
  • Loading branch information
sweep-ai[bot] committed Apr 13, 2024
1 parent 5e53710 commit bfde768
Showing 1 changed file with 16 additions and 0 deletions.
16 changes: 16 additions & 0 deletions agents-api/agents_api/models/docs/embed_docs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Module for embedding documents in the cozodb database. Contains functions to update document embeddings."""

from uuid import UUID

import pandas as pd
Expand All @@ -6,20 +8,34 @@
from ...clients.cozo import client


"""Embeds document snippets in the cozodb database.
Parameters:
doc_id (UUID): The unique identifier for the document.
snippet_indices (list[int]): Indices of the snippets in the document.
embeddings (list[list[float]]): Embedding vectors for the snippets.
client (CozoClient, optional): The Cozo client to interact with the database. Defaults to a pre-configured client instance.
Returns:
pd.DataFrame: A DataFrame containing the results of the embedding operation.
"""
def embed_docs_snippets_query(
doc_id: UUID,
snippet_indices: list[int],
embeddings: list[list[float]],
client: CozoClient = client,
) -> pd.DataFrame:
doc_id = str(doc_id)
# Ensure the number of snippet indices matches the number of embeddings.
assert len(snippet_indices) == len(embeddings)

# Prepare records for the database query by combining doc_id, snippet indices, and embeddings.
records = [
[doc_id, snippet_idx, embedding]
for snippet_idx, embedding in zip(snippet_indices, embeddings)
]

# Define the datalog query for updating document snippet embeddings in the database.
query = """
{
?[doc_id, snippet_idx, embedding] <- $records
Expand Down

0 comments on commit bfde768

Please sign in to comment.