diff --git a/meilisearch_python_sdk/_version.py b/meilisearch_python_sdk/_version.py index 267fcbda..e80dd862 100644 --- a/meilisearch_python_sdk/_version.py +++ b/meilisearch_python_sdk/_version.py @@ -1 +1 @@ -VERSION = "2.10.1" +VERSION = "2.11.0" diff --git a/meilisearch_python_sdk/index.py b/meilisearch_python_sdk/index.py index 3f564134..d67c8e69 100644 --- a/meilisearch_python_sdk/index.py +++ b/meilisearch_python_sdk/index.py @@ -6,7 +6,7 @@ from datetime import datetime from functools import cached_property, partial from pathlib import Path -from typing import TYPE_CHECKING, Any, Generator, MutableMapping, Sequence +from typing import TYPE_CHECKING, Any, Generator, Literal, MutableMapping, Sequence from urllib.parse import urlencode from warnings import warn @@ -20,7 +20,12 @@ from meilisearch_python_sdk.errors import InvalidDocumentError, MeilisearchError from meilisearch_python_sdk.models.documents import DocumentsInfo from meilisearch_python_sdk.models.index import IndexStats -from meilisearch_python_sdk.models.search import FacetSearchResults, Hybrid, SearchResults +from meilisearch_python_sdk.models.search import ( + FacetSearchResults, + Hybrid, + SearchResults, + SimilarSearchResults, +) from meilisearch_python_sdk.models.settings import ( Embedders, Faceting, @@ -737,6 +742,7 @@ async def search( attributes_to_search_on: list[str] | None = None, show_ranking_score: bool = False, show_ranking_score_details: bool = False, + ranking_score_threshold: float | None = None, vector: list[float] | None = None, hybrid: Hybrid | None = None, ) -> SearchResults: @@ -777,6 +783,9 @@ async def search( Because this feature is experimental it may be removed or updated causing breaking changes in this library without a major version bump so use with caution. This feature became stable in Meiliseach v1.7.0. + ranking_score_threshold: If set, no document whose _rankingScore is under the + rankingScoreThreshold is returned. The value must be between 0.0 and 1.0. Defaults + to None. vector: List of vectors for vector search. Defaults to None. Note: This parameter can only be used with Meilisearch >= v1.3.0, and is experimental in Meilisearch v1.3.0. In order to use this feature in Meilisearch v1.3.0 you first need to enable the @@ -808,6 +817,9 @@ async def search( >>> index = client.index("movies") >>> search_results = await index.search("Tron") """ + if ranking_score_threshold: + _validate_ranking_score_threshold(ranking_score_threshold) + body = _process_search_parameters( q=query, offset=offset, @@ -831,6 +843,7 @@ async def search( show_ranking_score_details=show_ranking_score_details, vector=vector, hybrid=hybrid, + ranking_score_threshold=ranking_score_threshold, ) search_url = f"{self._base_url_with_uid}/search" @@ -981,12 +994,13 @@ async def facet_search( highlight_pre_tag: str = "", highlight_post_tag: str = "", crop_marker: str = "...", - matching_strategy: str = "all", + matching_strategy: Literal["all", "last", "frequency"] = "all", hits_per_page: int | None = None, page: int | None = None, attributes_to_search_on: list[str] | None = None, show_ranking_score: bool = False, show_ranking_score_details: bool = False, + ranking_score_threshold: float | None = None, vector: list[float] | None = None, ) -> FacetSearchResults: """Search the index. @@ -1028,6 +1042,9 @@ async def facet_search( Because this feature is experimental it may be removed or updated causing breaking changes in this library without a major version bump so use with caution. This feature became stable in Meiliseach v1.7.0. + ranking_score_threshold: If set, no document whose _rankingScore is under the + rankingScoreThreshold is returned. The value must be between 0.0 and 1.0. Defaults + to None. vector: List of vectors for vector search. Defaults to None. Note: This parameter can only be used with Meilisearch >= v1.3.0, and is experimental in Meilisearch v1.3.0. In order to use this feature in Meilisearch v1.3.0 you first need to enable the @@ -1056,6 +1073,9 @@ async def facet_search( >>> facet_query="Sci-fi" >>> ) """ + if ranking_score_threshold: + _validate_ranking_score_threshold(ranking_score_threshold) + body = _process_search_parameters( q=query, facet_name=facet_name, @@ -1079,6 +1099,7 @@ async def facet_search( attributes_to_search_on=attributes_to_search_on, show_ranking_score=show_ranking_score, show_ranking_score_details=show_ranking_score_details, + ranking_score_threshold=ranking_score_threshold, vector=vector, ) search_url = f"{self._base_url_with_uid}/facet-search" @@ -1107,6 +1128,7 @@ async def facet_search( attributes_to_search_on=attributes_to_search_on, show_ranking_score=show_ranking_score, show_ranking_score_details=show_ranking_score_details, + ranking_score_threshold=ranking_score_threshold, vector=vector, ) @@ -1138,6 +1160,7 @@ async def facet_search( attributes_to_search_on=attributes_to_search_on, show_ranking_score=show_ranking_score, show_ranking_score_details=show_ranking_score_details, + ranking_score_threshold=ranking_score_threshold, vector=vector, ) ) @@ -1180,6 +1203,7 @@ async def facet_search( attributes_to_search_on=attributes_to_search_on, show_ranking_score=show_ranking_score, show_ranking_score_details=show_ranking_score_details, + ranking_score_threshold=ranking_score_threshold, vector=vector, ) ) @@ -1208,6 +1232,75 @@ async def facet_search( return result + async def search_similar_documents( + self, + id: str, + *, + offset: int | None = None, + limit: int | None = None, + filter: str | None = None, + embedder: str = "default", + attributes_to_retrieve: list[str] | None = None, + show_ranking_score: bool = False, + show_ranking_score_details: bool = False, + ranking_score_threshold: float | None = None, + ) -> SimilarSearchResults: + """Search the index. + + Args: + id: The id for the target document that is being used to find similar documents. + offset: Number of documents to skip. Defaults to 0. + limit: Maximum number of documents returned. Defaults to 20. + filter: Filter queries by an attribute value. Defaults to None. + embedder: The vector DB to use for the search. + attributes_to_retrieve: Attributes to display in the returned documents. + Defaults to ["*"]. + show_ranking_score: If set to True the ranking score will be returned with each document + in the search. Defaults to False. + show_ranking_score_details: If set to True the ranking details will be returned with + each document in the search. Defaults to False. + ranking_score_threshold: If set, no document whose _rankingScore is under the + rankingScoreThreshold is returned. The value must be between 0.0 and 1.0. Defaults + to None. + + Returns: + + Results of the search + + Raises: + + MeilisearchCommunicationError: If there was an error communicating with the server. + MeilisearchApiError: If the Meilisearch API returned an error. + + Examples: + + >>> from meilisearch_python_sdk import AsyncClient + >>> async with AsyncClient("http://localhost.com", "masterKey") as client: + >>> index = client.index("movies") + >>> search_results = await index.search_similar_documents("123") + """ + payload = { + "id": id, + "filter": filter, + "embedder": embedder, + "attributesToRetrieve": attributes_to_retrieve, + "showRankingScore": show_ranking_score, + "showRankingScoreDetails": show_ranking_score_details, + "rankingScoreThreshold": ranking_score_threshold, + } + + if offset: + payload["offset"] = offset + + if limit: + payload["limit"] = limit + + response = await self._http_requests.post( + f"{self._base_url_with_uid}/similar", body=payload + ) + + return SimilarSearchResults(**response.json()) + async def get_document(self, document_id: str) -> JsonDict: """Get one document with given document identifier. @@ -5008,12 +5101,13 @@ def search( highlight_pre_tag: str = "", highlight_post_tag: str = "", crop_marker: str = "...", - matching_strategy: str = "all", + matching_strategy: Literal["all", "last", "frequency"] = "all", hits_per_page: int | None = None, page: int | None = None, attributes_to_search_on: list[str] | None = None, show_ranking_score: bool = False, show_ranking_score_details: bool = False, + ranking_score_threshold: float | None = None, vector: list[float] | None = None, hybrid: Hybrid | None = None, ) -> SearchResults: @@ -5054,6 +5148,9 @@ def search( Because this feature is experimental it may be removed or updated causing breaking changes in this library without a major version bump so use with caution. This feature became stable in Meiliseach v1.7.0. + ranking_score_threshold: If set, no document whose _rankingScore is under the + rankingScoreThreshold is returned. The value must be between 0.0 and 1.0. Defaults + to None. vector: List of vectors for vector search. Defaults to None. Note: This parameter can only be used with Meilisearch >= v1.3.0, and is experimental in Meilisearch v1.3.0. In order to use this feature in Meilisearch v1.3.0 you first need to enable the @@ -5085,6 +5182,9 @@ def search( >>> index = client.index("movies") >>> search_results = index.search("Tron") """ + if ranking_score_threshold: + _validate_ranking_score_threshold(ranking_score_threshold) + body = _process_search_parameters( q=query, offset=offset, @@ -5108,6 +5208,7 @@ def search( show_ranking_score_details=show_ranking_score_details, vector=vector, hybrid=hybrid, + ranking_score_threshold=ranking_score_threshold, ) if self._pre_search_plugins: @@ -5172,6 +5273,7 @@ def facet_search( attributes_to_search_on: list[str] | None = None, show_ranking_score: bool = False, show_ranking_score_details: bool = False, + ranking_score_threshold: float | None = None, vector: list[float] | None = None, ) -> FacetSearchResults: """Search the index. @@ -5213,6 +5315,9 @@ def facet_search( Because this feature is experimental it may be removed or updated causing breaking changes in this library without a major version bump so use with caution. This feature became stable in Meiliseach v1.7.0. + ranking_score_threshold: If set, no document whose _rankingScore is under the + rankingScoreThreshold is returned. The value must be between 0.0 and 1.0. Defaults + to None. vector: List of vectors for vector search. Defaults to None. Note: This parameter can only be used with Meilisearch >= v1.3.0, and is experimental in Meilisearch v1.3.0. In order to use this feature in Meilisearch v1.3.0 you first need to enable the @@ -5241,6 +5346,9 @@ def facet_search( >>> facet_query="Sci-fi" >>> ) """ + if ranking_score_threshold: + _validate_ranking_score_threshold(ranking_score_threshold) + body = _process_search_parameters( q=query, facet_name=facet_name, @@ -5264,6 +5372,7 @@ def facet_search( attributes_to_search_on=attributes_to_search_on, show_ranking_score=show_ranking_score, show_ranking_score_details=show_ranking_score_details, + ranking_score_threshold=ranking_score_threshold, vector=vector, ) @@ -5291,6 +5400,7 @@ def facet_search( attributes_to_search_on=attributes_to_search_on, show_ranking_score=show_ranking_score, show_ranking_score_details=show_ranking_score_details, + ranking_score_threshold=ranking_score_threshold, vector=vector, ) @@ -5303,6 +5413,73 @@ def facet_search( return result + def search_similar_documents( + self, + id: str, + *, + offset: int | None = None, + limit: int | None = None, + filter: str | None = None, + embedder: str = "default", + attributes_to_retrieve: list[str] | None = None, + show_ranking_score: bool = False, + show_ranking_score_details: bool = False, + ranking_score_threshold: float | None = None, + ) -> SimilarSearchResults: + """Search the index. + + Args: + id: The id for the target document that is being used to find similar documents. + offset: Number of documents to skip. Defaults to 0. + limit: Maximum number of documents returned. Defaults to 20. + filter: Filter queries by an attribute value. Defaults to None. + embedder: The vector DB to use for the search. + attributes_to_retrieve: Attributes to display in the returned documents. + Defaults to ["*"]. + show_ranking_score: If set to True the ranking score will be returned with each document + in the search. Defaults to False. + show_ranking_score_details: If set to True the ranking details will be returned with + each document in the search. Defaults to False. + ranking_score_threshold: If set, no document whose _rankingScore is under the + rankingScoreThreshold is returned. The value must be between 0.0 and 1.0. Defaults + to None. + + Returns: + + Results of the search + + Raises: + + MeilisearchCommunicationError: If there was an error communicating with the server. + MeilisearchApiError: If the Meilisearch API returned an error. + + Examples: + + >>> from meilisearch_python_sdk import Client + >>> client = Client("http://localhost.com", "masterKey") + >>> index = client.index("movies") + >>> search_results = index.search_similar_documents("123") + """ + payload = { + "id": id, + "filter": filter, + "embedder": embedder, + "attributesToRetrieve": attributes_to_retrieve, + "showRankingScore": show_ranking_score, + "showRankingScoreDetails": show_ranking_score_details, + "rankingScoreThreshold": ranking_score_threshold, + } + + if offset: + payload["offset"] = offset + + if limit: + payload["limit"] = limit + + response = self._http_requests.post(f"{self._base_url_with_uid}/similar", body=payload) + + return SimilarSearchResults(**response.json()) + def get_document(self, document_id: str) -> JsonDict: """Get one document with given document identifier. @@ -8192,6 +8369,7 @@ def _process_search_parameters( attributes_to_search_on: list[str] | None = None, show_ranking_score: bool = False, show_ranking_score_details: bool = False, + ranking_score_threshold: float | None = None, vector: list[float] | None = None, hybrid: Hybrid | None = None, ) -> JsonDict: @@ -8218,6 +8396,7 @@ def _process_search_parameters( "page": page, "attributesToSearchOn": attributes_to_search_on, "showRankingScore": show_ranking_score, + "rankingScoreThreshold": ranking_score_threshold, } if facet_name: @@ -8311,3 +8490,8 @@ def _embedder_json_to_settings_model( # pragma: no cover def _validate_file_type(file_path: Path) -> None: if file_path.suffix not in (".json", ".csv", ".ndjson"): raise MeilisearchError("File must be a json, ndjson, or csv file") + + +def _validate_ranking_score_threshold(ranking_score_threshold: float) -> None: + if not 0.0 <= ranking_score_threshold <= 1.0: + raise MeilisearchError("ranking_score_threshold must be between 0.0 and 1.0") diff --git a/meilisearch_python_sdk/models/search.py b/meilisearch_python_sdk/models/search.py index ccc7085a..65958f72 100644 --- a/meilisearch_python_sdk/models/search.py +++ b/meilisearch_python_sdk/models/search.py @@ -1,8 +1,11 @@ from typing import List, Optional +from warnings import warn +import pydantic from camel_converter.pydantic_base import CamelBase -from pydantic import Field +from meilisearch_python_sdk._utils import is_pydantic_2 +from meilisearch_python_sdk.errors import MeilisearchError from meilisearch_python_sdk.types import Filter, JsonDict @@ -24,7 +27,7 @@ class Hybrid(CamelBase): class SearchParams(CamelBase): index_uid: str - query: Optional[str] = Field(None, alias="q") + query: Optional[str] = pydantic.Field(None, alias="q") offset: int = 0 limit: int = 20 filter: Optional[Filter] = None @@ -44,9 +47,35 @@ class SearchParams(CamelBase): attributes_to_search_on: Optional[List[str]] = None show_ranking_score: bool = False show_ranking_score_details: bool = False + ranking_score_threshold: Optional[float] = None vector: Optional[List[float]] = None hybrid: Optional[Hybrid] = None + if is_pydantic_2(): + + @pydantic.field_validator("ranking_score_threshold", mode="before") # type: ignore[attr-defined] + @classmethod + def validate_ranking_score_threshold(cls, v: Optional[float]) -> Optional[float]: + if v and not 0.0 <= v <= 1.0: + raise MeilisearchError("ranking_score_threshold must be between 0.0 and 1.0") + + return v + + else: # pragma: no cover + warn( + "The use of Pydantic less than version 2 is depreciated and will be removed in a future release", + DeprecationWarning, + stacklevel=2, + ) + + @pydantic.validator("ranking_score_threshold", pre=True) + @classmethod + def validate_expires_at(cls, v: Optional[float]) -> Optional[float]: + if v and not 0.0 <= v <= 1.0: + raise MeilisearchError("ranking_score_threshold must be between 0.0 and 1.0") + + return v + class SearchResults(CamelBase): hits: List[JsonDict] @@ -65,3 +94,12 @@ class SearchResults(CamelBase): class SearchResultsWithUID(SearchResults): index_uid: str + + +class SimilarSearchResults(CamelBase): + hits: List[JsonDict] + id: str + processing_time_ms: int + limit: Optional[int] = None + offset: Optional[int] = None + estimated_total_hits: Optional[int] = None diff --git a/pyproject.toml b/pyproject.toml index 168083b9..c48734b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "meilisearch-python-sdk" -version = "2.10.1" +version = "2.11.0" description = "A Python client providing both async and sync support for the Meilisearch API" authors = ["Paul Sanders "] license = "MIT" diff --git a/tests/test_async_search.py b/tests/test_async_search.py index d2312616..d5f5fa44 100644 --- a/tests/test_async_search.py +++ b/tests/test_async_search.py @@ -4,7 +4,7 @@ from meilisearch_python_sdk import AsyncClient from meilisearch_python_sdk._task import async_wait_for_task -from meilisearch_python_sdk.errors import MeilisearchApiError +from meilisearch_python_sdk.errors import MeilisearchApiError, MeilisearchError from meilisearch_python_sdk.models.search import Hybrid, SearchParams @@ -113,12 +113,12 @@ async def test_custom_search_params_with_simple_string(async_index_with_document async def test_custom_search_params_with_string_list(async_index_with_documents): index = await async_index_with_documents() response = await index.search( - "a", + "Shazam!", limit=5, attributes_to_retrieve=["title", "overview"], attributes_to_highlight=["title"], ) - assert len(response.hits) == 5 + assert len(response.hits) == 1 assert "title" in response.hits[0] assert "overview" in response.hits[0] assert "release_date" not in response.hits[0] @@ -404,3 +404,82 @@ async def test_custom_facet_search(async_index_with_documents): ) assert response.facet_hits[0].value == "cartoon" assert response.facet_hits[0].count == 1 + + +@pytest.mark.parametrize("ranking_score_threshold", (-0.1, 1.1)) +async def test_search_invalid_ranking_score_threshold( + ranking_score_threshold, async_index_with_documents +): + index = await async_index_with_documents() + with pytest.raises(MeilisearchError) as e: + await index.search("", ranking_score_threshold=ranking_score_threshold) + assert "ranking_score_threshold must be between 0.0 and 1.0" in str(e.value) + + +async def test_search_ranking_score_threshold(async_index_with_documents_and_vectors): + index = await async_index_with_documents_and_vectors() + result = await index.search("", ranking_score_threshold=0.5) + assert len(result.hits) > 0 + + +@pytest.mark.parametrize("ranking_score_threshold", (-0.1, 1.1)) +async def test_multi_search_invalid_ranking_score_threshold( + ranking_score_threshold, async_client, async_index_with_documents +): + index1 = await async_index_with_documents() + with pytest.raises(MeilisearchError) as e: + await async_client.multi_search( + [ + SearchParams( + index_uid=index1.uid, query="", ranking_score_threshold=ranking_score_threshold + ), + ] + ) + assert "ranking_score_threshold must be between 0.0 and 1.0" in str(e.value) + + +async def test_multi_search_ranking_score_threshold(async_client, async_index_with_documents): + index1 = await async_index_with_documents() + result = await async_client.multi_search( + [ + SearchParams(index_uid=index1.uid, query="", ranking_score_threshold=0.5), + ] + ) + assert len(result[0].hits) > 0 + + +async def test_facet_search_ranking_score_threshold(async_index_with_documents_and_vectors): + index = await async_index_with_documents_and_vectors() + update = await index.update_filterable_attributes(["genre"]) + await async_wait_for_task(index.http_client, update.task_uid) + response = await index.facet_search( + "How to Train Your Dragon", + facet_name="genre", + facet_query="cartoon", + ranking_score_threshold=0.5, + ) + assert len(response.facet_hits) > 0 + + +@pytest.mark.parametrize("ranking_score_threshold", (-0.1, 1.1)) +async def test_facet_search_invalid_ranking_score_threshold( + ranking_score_threshold, async_index_with_documents_and_vectors +): + index = await async_index_with_documents_and_vectors() + update = await index.update_filterable_attributes(["genre"]) + await async_wait_for_task(index.http_client, update.task_uid) + with pytest.raises(MeilisearchError) as e: + await index.facet_search( + "How to Train Your Dragon", + facet_name="genre", + facet_query="cartoon", + ranking_score_threshold=ranking_score_threshold, + ) + assert "ranking_score_threshold must be between 0.0 and 1.0" in str(e.value) + + +@pytest.mark.parametrize("limit, offset", ((1, 1), (None, None))) +async def test_similar_search(limit, offset, async_index_with_documents_and_vectors): + index = await async_index_with_documents_and_vectors() + response = await index.search_similar_documents("287947", limit=limit, offset=offset) + assert len(response.hits) >= 1 diff --git a/tests/test_search.py b/tests/test_search.py index b430e573..b37e2ca9 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -4,7 +4,7 @@ from meilisearch_python_sdk import Client from meilisearch_python_sdk._task import wait_for_task -from meilisearch_python_sdk.errors import MeilisearchApiError +from meilisearch_python_sdk.errors import MeilisearchApiError, MeilisearchError from meilisearch_python_sdk.models.search import Hybrid, SearchParams @@ -113,12 +113,12 @@ def test_custom_search_params_with_simple_string(index_with_documents): def test_custom_search_params_with_string_list(index_with_documents): index = index_with_documents() response = index.search( - "a", + "Shazam!", limit=5, attributes_to_retrieve=["title", "overview"], attributes_to_highlight=["title"], ) - assert len(response.hits) == 5 + assert len(response.hits) == 1 assert "title" in response.hits[0] assert "overview" in response.hits[0] assert "release_date" not in response.hits[0] @@ -405,3 +405,82 @@ def test_custom_facet_search(index_with_documents): ) assert response.facet_hits[0].value == "cartoon" assert response.facet_hits[0].count == 1 + + +@pytest.mark.parametrize("ranking_score_threshold", (-0.1, 1.1)) +def test_search_invalid_ranking_score_threshold( + ranking_score_threshold, index_with_documents_and_vectors +): + index = index_with_documents_and_vectors() + with pytest.raises(MeilisearchError) as e: + index.search("", ranking_score_threshold=ranking_score_threshold) + assert "ranking_score_threshold must be between 0.0 and 1.0" in str(e.value) + + +def test_search_ranking_score_threshold(index_with_documents_and_vectors): + index = index_with_documents_and_vectors() + result = index.search("", ranking_score_threshold=0.5) + assert len(result.hits) > 0 + + +@pytest.mark.parametrize("ranking_score_threshold", (-0.1, 1.1)) +def test_multi_search_invalid_ranking_score_threshold( + ranking_score_threshold, client, index_with_documents +): + index1 = index_with_documents() + with pytest.raises(MeilisearchError) as e: + client.multi_search( + [ + SearchParams( + index_uid=index1.uid, query="", ranking_score_threshold=ranking_score_threshold + ), + ] + ) + assert "ranking_score_threshold must be between 0.0 and 1.0" in str(e.value) + + +def test_multi_search_ranking_score_threshold(client, index_with_documents): + index1 = index_with_documents() + result = client.multi_search( + [ + SearchParams(index_uid=index1.uid, query="", ranking_score_threshold=0.5), + ] + ) + assert len(result[0].hits) > 0 + + +def test_facet_search_ranking_score_threshold(index_with_documents_and_vectors): + index = index_with_documents_and_vectors() + update = index.update_filterable_attributes(["genre"]) + wait_for_task(index.http_client, update.task_uid) + response = index.facet_search( + "How to Train Your Dragon", + facet_name="genre", + facet_query="cartoon", + ranking_score_threshold=0.5, + ) + assert len(response.facet_hits) > 0 + + +@pytest.mark.parametrize("ranking_score_threshold", (-0.1, 1.1)) +def test_facet_search_invalid_ranking_score_threshold( + ranking_score_threshold, index_with_documents_and_vectors +): + index = index_with_documents_and_vectors() + update = index.update_filterable_attributes(["genre"]) + wait_for_task(index.http_client, update.task_uid) + with pytest.raises(MeilisearchError) as e: + index.facet_search( + "How to Train Your Dragon", + facet_name="genre", + facet_query="cartoon", + ranking_score_threshold=ranking_score_threshold, + ) + assert "ranking_score_threshold must be between 0.0 and 1.0" in str(e.value) + + +@pytest.mark.parametrize("limit, offset", ((1, 1), (None, None))) +def test_similar_search(limit, offset, index_with_documents_and_vectors): + index = index_with_documents_and_vectors() + response = index.search_similar_documents("287947", limit=limit, offset=offset) + assert len(response.hits) >= 1