Skip to content

Commit

Permalink
Add similar search (#959)
Browse files Browse the repository at this point in the history
* Add similar search

* Update tests

* Make extra args named only
  • Loading branch information
sanders41 authored Jun 11, 2024
1 parent 9ff0c65 commit 32d55ba
Show file tree
Hide file tree
Showing 4 changed files with 167 additions and 1 deletion.
143 changes: 142 additions & 1 deletion meilisearch_python_sdk/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@
from meilisearch_python_sdk.errors import InvalidDocumentError, MeilisearchError
from meilisearch_python_sdk.models.documents import DocumentsInfo
from meilisearch_python_sdk.models.index import IndexStats
from meilisearch_python_sdk.models.search import FacetSearchResults, Hybrid, SearchResults
from meilisearch_python_sdk.models.search import (
FacetSearchResults,
Hybrid,
SearchResults,
SimilarSearchResults,
)
from meilisearch_python_sdk.models.settings import (
Embedders,
Faceting,
Expand Down Expand Up @@ -1216,6 +1221,75 @@ async def facet_search(

return result

async def search_similar_documents(
self,
id: str,
*,
offset: int | None = None,
limit: int | None = None,
filter: str | None = None,
embedder: str = "default",
attributes_to_retrieve: list[str] | None = None,
show_ranking_score: bool = False,
show_ranking_score_details: bool = False,
ranking_score_threshold: float | None = None,
) -> SimilarSearchResults:
"""Search the index.
Args:
id: The id for the target document that is being used to find similar documents.
offset: Number of documents to skip. Defaults to 0.
limit: Maximum number of documents returned. Defaults to 20.
filter: Filter queries by an attribute value. Defaults to None.
embedder: The vector DB to use for the search.
attributes_to_retrieve: Attributes to display in the returned documents.
Defaults to ["*"].
show_ranking_score: If set to True the ranking score will be returned with each document
in the search. Defaults to False.
show_ranking_score_details: If set to True the ranking details will be returned with
each document in the search. Defaults to False.
ranking_score_threshold: If set, no document whose _rankingScore is under the
rankingScoreThreshold is returned. The value must be between 0.0 and 1.0. Defaults
to None.
Returns:
Results of the search
Raises:
MeilisearchCommunicationError: If there was an error communicating with the server.
MeilisearchApiError: If the Meilisearch API returned an error.
Examples:
>>> from meilisearch_python_sdk import AsyncClient
>>> async with AsyncClient("http://localhost.com", "masterKey") as client:
>>> index = client.index("movies")
>>> search_results = await index.search_similar_documents("123")
"""
payload = {
"id": id,
"filter": filter,
"embedder": embedder,
"attributesToRetrieve": attributes_to_retrieve,
"showRankingScore": show_ranking_score,
"showRankingScoreDetails": show_ranking_score_details,
"rankingScoreThreshold": ranking_score_threshold,
}

if offset:
payload["offset"] = offset

if limit:
payload["limit"] = limit

response = await self._http_requests.post(
f"{self._base_url_with_uid}/similar", body=payload
)

return SimilarSearchResults(**response.json())

async def get_document(self, document_id: str) -> JsonDict:
"""Get one document with given document identifier.
Expand Down Expand Up @@ -5214,6 +5288,73 @@ def facet_search(

return result

def search_similar_documents(
self,
id: str,
*,
offset: int | None = None,
limit: int | None = None,
filter: str | None = None,
embedder: str = "default",
attributes_to_retrieve: list[str] | None = None,
show_ranking_score: bool = False,
show_ranking_score_details: bool = False,
ranking_score_threshold: float | None = None,
) -> SimilarSearchResults:
"""Search the index.
Args:
id: The id for the target document that is being used to find similar documents.
offset: Number of documents to skip. Defaults to 0.
limit: Maximum number of documents returned. Defaults to 20.
filter: Filter queries by an attribute value. Defaults to None.
embedder: The vector DB to use for the search.
attributes_to_retrieve: Attributes to display in the returned documents.
Defaults to ["*"].
show_ranking_score: If set to True the ranking score will be returned with each document
in the search. Defaults to False.
show_ranking_score_details: If set to True the ranking details will be returned with
each document in the search. Defaults to False.
ranking_score_threshold: If set, no document whose _rankingScore is under the
rankingScoreThreshold is returned. The value must be between 0.0 and 1.0. Defaults
to None.
Returns:
Results of the search
Raises:
MeilisearchCommunicationError: If there was an error communicating with the server.
MeilisearchApiError: If the Meilisearch API returned an error.
Examples:
>>> from meilisearch_python_sdk import Client
>>> client = Client("http://localhost.com", "masterKey")
>>> index = client.index("movies")
>>> search_results = index.search_similar_documents("123")
"""
payload = {
"id": id,
"filter": filter,
"embedder": embedder,
"attributesToRetrieve": attributes_to_retrieve,
"showRankingScore": show_ranking_score,
"showRankingScoreDetails": show_ranking_score_details,
"rankingScoreThreshold": ranking_score_threshold,
}

if offset:
payload["offset"] = offset

if limit:
payload["limit"] = limit

response = self._http_requests.post(f"{self._base_url_with_uid}/similar", body=payload)

return SimilarSearchResults(**response.json())

def get_document(self, document_id: str) -> JsonDict:
"""Get one document with given document identifier.
Expand Down
9 changes: 9 additions & 0 deletions meilisearch_python_sdk/models/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,12 @@ class SearchResults(CamelBase):

class SearchResultsWithUID(SearchResults):
index_uid: str


class SimilarSearchResults(CamelBase):
hits: List[JsonDict]
id: str
processing_time_ms: int
limit: Optional[int] = None
offset: Optional[int] = None
estimated_total_hits: Optional[int] = None
8 changes: 8 additions & 0 deletions tests/test_async_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,3 +451,11 @@ async def test_multi_search_ranking_score_threshold(async_client, async_index_wi
]
)
assert len(result[0].hits) > 0


@pytest.mark.parametrize("limit, offset", ((1, 1), (None, None)))
@pytest.mark.usefixtures("enable_vector_search")
async def test_similar_search(limit, offset, async_index_with_documents_and_vectors):
index = await async_index_with_documents_and_vectors()
response = await index.search_similar_documents("287947", limit=limit, offset=offset)
assert len(response.hits) >= 1
8 changes: 8 additions & 0 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,3 +452,11 @@ def test_multi_search_ranking_score_threshold(client, index_with_documents):
]
)
assert len(result[0].hits) > 0


@pytest.mark.parametrize("limit, offset", ((1, 1), (None, None)))
@pytest.mark.usefixtures("enable_vector_search")
def test_similar_search(limit, offset, index_with_documents_and_vectors):
index = index_with_documents_and_vectors()
response = index.search_similar_documents("287947", limit=limit, offset=offset)
assert len(response.hits) >= 1

0 comments on commit 32d55ba

Please sign in to comment.