Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support text separator customization #871

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .code-samples.meilisearch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,18 @@ facet_search_2: |-
client.index('books').update_faceting_settings({ 'sortFacetValuesBy': { 'genres': 'count' } })
facet_search_3: |-
client.index('books').facet_search('genres', 'c')
get_separator_tokens_1: |-
client.index('articles').get_separator_tokens()
update_separator_tokens_1: |-
client.index('articles').update_separator_tokens(["|", "…"])
reset_separator_tokens_1: |-
client.index('articles').reset_separator_tokens()
get_non_separator_tokens_1: |-
client.index('articles').get_non_separator_tokens()
update_non_separator_tokens_1: |-
client.index('articles').update_non_separator_tokens(["@", "#"])
reset_non_separator_tokens_1: |-
client.index('articles').reset_non_separator_tokens()
get_dictionary_1: |-
client.index('books').get_dictionary()
update_dictionary_1: |-
Expand Down
2 changes: 2 additions & 0 deletions meilisearch/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ class Paths:
pagination = "pagination"
faceting = "faceting"
dictionary = "dictionary"
separator_tokens = "separator-tokens"
non_separator_tokens = "non-separator-tokens"
swap = "swap-indexes"

def __init__(
Expand Down
118 changes: 118 additions & 0 deletions meilisearch/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1637,6 +1637,124 @@ def reset_dictionary(self) -> TaskInfo:

return TaskInfo(**task)

# TEXT SEPARATOR SUB-ROUTES

def get_separator_tokens(self) -> List[str]:
"""Get the additional text separator tokens set on this index.

Returns
-------
settings:
List containing the separator tokens of the index.

Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
return self.http.get(self.__settings_url_for(self.config.paths.separator_tokens))

def get_non_separator_tokens(self) -> List[str]:
"""Get the list of disabled text separator tokens on this index.

Returns
-------
settings:
List containing the disabled separator tokens of the index.

Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
return self.http.get(self.__settings_url_for(self.config.paths.non_separator_tokens))

def update_separator_tokens(self, body: Union[List[str], None]) -> TaskInfo:
"""Update the additional separator tokens of the index.

Parameters
----------
body:
List of the new separator tokens.

Returns
-------
task_info:
TaskInfo instance containing information about a task to track the progress of an asynchronous process.
https://www.meilisearch.com/docs/reference/api/tasks#get-one-task

Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
task = self.http.put(self.__settings_url_for(self.config.paths.separator_tokens), body)

return TaskInfo(**task)

def update_non_separator_tokens(self, body: Union[List[str], None]) -> TaskInfo:
"""Update the disabled separator tokens of the index.

Parameters
----------
body:
List of the newly disabled separator tokens.

Returns
-------
task_info:
TaskInfo instance containing information about a task to track the progress of an asynchronous process.
https://www.meilisearch.com/docs/reference/api/tasks#get-one-task

Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
task = self.http.put(self.__settings_url_for(self.config.paths.non_separator_tokens), body)

return TaskInfo(**task)

def reset_separator_tokens(self) -> TaskInfo:
"""Clear all additional separator tokens

Returns
-------
task_info:
TaskInfo instance containing information about a task to track the progress of an asynchronous process.
https://www.meilisearch.com/docs/reference/api/tasks#get-one-task

Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
task = self.http.delete(
self.__settings_url_for(self.config.paths.separator_tokens),
)

return TaskInfo(**task)

def reset_non_separator_tokens(self) -> TaskInfo:
"""Clear all disabled separator tokens

Returns
-------
task_info:
TaskInfo instance containing information about a task to track the progress of an asynchronous process.
https://www.meilisearch.com/docs/reference/api/tasks#get-one-task

Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
task = self.http.delete(
self.__settings_url_for(self.config.paths.non_separator_tokens),
)

return TaskInfo(**task)

@staticmethod
def _batch(
documents: List[Dict[str, Any]], batch_size: int
Expand Down
76 changes: 76 additions & 0 deletions tests/settings/test_settings_text_separators_meilisearch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
NEW_SEPARATOR_TOKENS = ["|", "…"]
NEW_NON_SEPARATOR_TOKENS = ["@", "#"]


def test_get_separator_tokens_default(empty_index):
ellnix marked this conversation as resolved.
Show resolved Hide resolved
"""Tests getting the default value of separator tokens."""
separator_tokens = empty_index().get_separator_tokens()
assert separator_tokens == []


def test_get_non_separator_tokens_default(empty_index):
"""Tests getting the default value of separator tokens."""
non_separator_tokens = empty_index().get_separator_tokens()
assert non_separator_tokens == []


def test_update_separator_tokens(empty_index):
"""Tests updating the separator tokens."""
index = empty_index()
task = index.update_separator_tokens(NEW_SEPARATOR_TOKENS)
task = index.wait_for_task(task.task_uid)
assert task.status == "succeeded"

separator_tokens = index.get_separator_tokens()
for token in NEW_SEPARATOR_TOKENS:
assert token in separator_tokens


def test_update_non_separator_tokens(empty_index):
"""Tests updating the non separator tokens."""
index = empty_index()
task = index.update_non_separator_tokens(NEW_NON_SEPARATOR_TOKENS)
task = index.wait_for_task(task.task_uid)
assert task.status == "succeeded"

non_separator_tokens = index.get_non_separator_tokens()
for token in NEW_NON_SEPARATOR_TOKENS:
assert token in non_separator_tokens


def test_reset_separator_tokens(empty_index):
"""Tests resetting the separator tokens to its default empty list."""
index = empty_index()
task = index.update_separator_tokens(NEW_SEPARATOR_TOKENS)
task = index.wait_for_task(task.task_uid)
assert task.status == "succeeded"

separator_tokens = index.get_separator_tokens()
for token in NEW_SEPARATOR_TOKENS:
assert token in separator_tokens

reset_task = index.reset_separator_tokens()
reset_task = index.wait_for_task(reset_task.task_uid)
assert reset_task.status == "succeeded"

separator_tokens = index.get_separator_tokens()
assert separator_tokens == []


def test_non_reset_separator_tokens(empty_index):
"""Tests resetting the separator tokens to its default empty list."""
index = empty_index()
task = index.update_non_separator_tokens(NEW_NON_SEPARATOR_TOKENS)
task = index.wait_for_task(task.task_uid)
assert task.status == "succeeded"

non_separator_tokens = index.get_non_separator_tokens()
for token in NEW_NON_SEPARATOR_TOKENS:
assert token in non_separator_tokens

reset_task = index.reset_non_separator_tokens()
reset_task = index.wait_for_task(reset_task.task_uid)
assert reset_task.status == "succeeded"

non_separator_tokens = index.get_non_separator_tokens()
assert non_separator_tokens == []