Add similar search (#959)

* Add similar search * Update tests * Make extra args named only
sanders41 · Jun 11, 2024 · 32d55ba · 32d55ba
1 parent 9ff0c65
commit 32d55ba
Show file tree

Hide file tree

Showing 4 changed files with 167 additions and 1 deletion.
diff --git a/meilisearch_python_sdk/index.py b/meilisearch_python_sdk/index.py
@@ -20,7 +20,12 @@
 from meilisearch_python_sdk.errors import InvalidDocumentError, MeilisearchError
 from meilisearch_python_sdk.models.documents import DocumentsInfo
 from meilisearch_python_sdk.models.index import IndexStats
-from meilisearch_python_sdk.models.search import FacetSearchResults, Hybrid, SearchResults
+from meilisearch_python_sdk.models.search import (
+    FacetSearchResults,
+    Hybrid,
+    SearchResults,
+    SimilarSearchResults,
+)
 from meilisearch_python_sdk.models.settings import (
     Embedders,
     Faceting,
@@ -1216,6 +1221,75 @@ async def facet_search(
 
         return result
 
+    async def search_similar_documents(
+        self,
+        id: str,
+        *,
+        offset: int | None = None,
+        limit: int | None = None,
+        filter: str | None = None,
+        embedder: str = "default",
+        attributes_to_retrieve: list[str] | None = None,
+        show_ranking_score: bool = False,
+        show_ranking_score_details: bool = False,
+        ranking_score_threshold: float | None = None,
+    ) -> SimilarSearchResults:
+        """Search the index.
+
+        Args:
+            id: The id for the target document that is being used to find similar documents.
+            offset: Number of documents to skip. Defaults to 0.
+            limit: Maximum number of documents returned. Defaults to 20.
+            filter: Filter queries by an attribute value. Defaults to None.
+            embedder: The vector DB to use for the search.
+            attributes_to_retrieve: Attributes to display in the returned documents.
+                Defaults to ["*"].
+            show_ranking_score: If set to True the ranking score will be returned with each document
+                in the search. Defaults to False.
+            show_ranking_score_details: If set to True the ranking details will be returned with
+                each document in the search. Defaults to False.
+            ranking_score_threshold: If set, no document whose _rankingScore is under the
+                rankingScoreThreshold is returned. The value must be between 0.0 and 1.0. Defaults
+                to None.
+
+        Returns:
+
+            Results of the search
+
+        Raises:
+
+            MeilisearchCommunicationError: If there was an error communicating with the server.
+            MeilisearchApiError: If the Meilisearch API returned an error.
+
+        Examples:
+
+            >>> from meilisearch_python_sdk import AsyncClient
+            >>> async with AsyncClient("http://localhost.com", "masterKey") as client:
+            >>>     index = client.index("movies")
+            >>>     search_results = await index.search_similar_documents("123")
+        """
+        payload = {
+            "id": id,
+            "filter": filter,
+            "embedder": embedder,
+            "attributesToRetrieve": attributes_to_retrieve,
+            "showRankingScore": show_ranking_score,
+            "showRankingScoreDetails": show_ranking_score_details,
+            "rankingScoreThreshold": ranking_score_threshold,
+        }
+
+        if offset:
+            payload["offset"] = offset
+
+        if limit:
+            payload["limit"] = limit
+
+        response = await self._http_requests.post(
+            f"{self._base_url_with_uid}/similar", body=payload
+        )
+
+        return SimilarSearchResults(**response.json())
+
     async def get_document(self, document_id: str) -> JsonDict:
         """Get one document with given document identifier.
 
@@ -5214,6 +5288,73 @@ def facet_search(
 
         return result
 
+    def search_similar_documents(
+        self,
+        id: str,
+        *,
+        offset: int | None = None,
+        limit: int | None = None,
+        filter: str | None = None,
+        embedder: str = "default",
+        attributes_to_retrieve: list[str] | None = None,
+        show_ranking_score: bool = False,
+        show_ranking_score_details: bool = False,
+        ranking_score_threshold: float | None = None,
+    ) -> SimilarSearchResults:
+        """Search the index.
+
+        Args:
+            id: The id for the target document that is being used to find similar documents.
+            offset: Number of documents to skip. Defaults to 0.
+            limit: Maximum number of documents returned. Defaults to 20.
+            filter: Filter queries by an attribute value. Defaults to None.
+            embedder: The vector DB to use for the search.
+            attributes_to_retrieve: Attributes to display in the returned documents.
+                Defaults to ["*"].
+            show_ranking_score: If set to True the ranking score will be returned with each document
+                in the search. Defaults to False.
+            show_ranking_score_details: If set to True the ranking details will be returned with
+                each document in the search. Defaults to False.
+            ranking_score_threshold: If set, no document whose _rankingScore is under the
+                rankingScoreThreshold is returned. The value must be between 0.0 and 1.0. Defaults
+                to None.
+
+        Returns:
+
+            Results of the search
+
+        Raises:
+
+            MeilisearchCommunicationError: If there was an error communicating with the server.
+            MeilisearchApiError: If the Meilisearch API returned an error.
+
+        Examples:
+
+            >>> from meilisearch_python_sdk import Client
+            >>> client = Client("http://localhost.com", "masterKey")
+            >>> index = client.index("movies")
+            >>> search_results = index.search_similar_documents("123")
+        """
+        payload = {
+            "id": id,
+            "filter": filter,
+            "embedder": embedder,
+            "attributesToRetrieve": attributes_to_retrieve,
+            "showRankingScore": show_ranking_score,
+            "showRankingScoreDetails": show_ranking_score_details,
+            "rankingScoreThreshold": ranking_score_threshold,
+        }
+
+        if offset:
+            payload["offset"] = offset
+
+        if limit:
+            payload["limit"] = limit
+
+        response = self._http_requests.post(f"{self._base_url_with_uid}/similar", body=payload)
+
+        return SimilarSearchResults(**response.json())
+
     def get_document(self, document_id: str) -> JsonDict:
         """Get one document with given document identifier.
 

diff --git a/meilisearch_python_sdk/models/search.py b/meilisearch_python_sdk/models/search.py
@@ -94,3 +94,12 @@ class SearchResults(CamelBase):
 
 class SearchResultsWithUID(SearchResults):
     index_uid: str
+
+
+class SimilarSearchResults(CamelBase):
+    hits: List[JsonDict]
+    id: str
+    processing_time_ms: int
+    limit: Optional[int] = None
+    offset: Optional[int] = None
+    estimated_total_hits: Optional[int] = None
diff --git a/tests/test_async_search.py b/tests/test_async_search.py
@@ -451,3 +451,11 @@ async def test_multi_search_ranking_score_threshold(async_client, async_index_wi
         ]
     )
     assert len(result[0].hits) > 0
+
+
+@pytest.mark.parametrize("limit, offset", ((1, 1), (None, None)))
+@pytest.mark.usefixtures("enable_vector_search")
+async def test_similar_search(limit, offset, async_index_with_documents_and_vectors):
+    index = await async_index_with_documents_and_vectors()
+    response = await index.search_similar_documents("287947", limit=limit, offset=offset)
+    assert len(response.hits) >= 1
diff --git a/tests/test_search.py b/tests/test_search.py
@@ -452,3 +452,11 @@ def test_multi_search_ranking_score_threshold(client, index_with_documents):
         ]
     )
     assert len(result[0].hits) > 0
+
+
+@pytest.mark.parametrize("limit, offset", ((1, 1), (None, None)))
+@pytest.mark.usefixtures("enable_vector_search")
+def test_similar_search(limit, offset, index_with_documents_and_vectors):
+    index = index_with_documents_and_vectors()
+    response = index.search_similar_documents("287947", limit=limit, offset=offset)
+    assert len(response.hits) >= 1