Skip to content

Commit

Permalink
Merge #871
Browse files Browse the repository at this point in the history
871: Support text separator customization r=sanders41 a=ellnix

# Pull Request

## Related issue
Fixes #852 

## PR checklist
Please check if your PR fulfills the following requirements:
- [X] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [X] Have you read the contributing guidelines?
- [X] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: ellnix <103502144+ellnix@users.noreply.github.com>
  • Loading branch information
meili-bors[bot] and ellnix authored Oct 24, 2023
2 parents 8d8b4ad + 98168cf commit 1c4ce20
Show file tree
Hide file tree
Showing 4 changed files with 208 additions and 0 deletions.
12 changes: 12 additions & 0 deletions .code-samples.meilisearch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,18 @@ facet_search_2: |-
client.index('books').update_faceting_settings({ 'sortFacetValuesBy': { 'genres': 'count' } })
facet_search_3: |-
client.index('books').facet_search('genres', 'c')
get_separator_tokens_1: |-
client.index('articles').get_separator_tokens()
update_separator_tokens_1: |-
client.index('articles').update_separator_tokens(["|", "&hellip;"])
reset_separator_tokens_1: |-
client.index('articles').reset_separator_tokens()
get_non_separator_tokens_1: |-
client.index('articles').get_non_separator_tokens()
update_non_separator_tokens_1: |-
client.index('articles').update_non_separator_tokens(["@", "#"])
reset_non_separator_tokens_1: |-
client.index('articles').reset_non_separator_tokens()
get_dictionary_1: |-
client.index('books').get_dictionary()
update_dictionary_1: |-
Expand Down
2 changes: 2 additions & 0 deletions meilisearch/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ class Paths:
pagination = "pagination"
faceting = "faceting"
dictionary = "dictionary"
separator_tokens = "separator-tokens"
non_separator_tokens = "non-separator-tokens"
swap = "swap-indexes"

def __init__(
Expand Down
118 changes: 118 additions & 0 deletions meilisearch/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1637,6 +1637,124 @@ def reset_dictionary(self) -> TaskInfo:

return TaskInfo(**task)

# TEXT SEPARATOR SUB-ROUTES

def get_separator_tokens(self) -> List[str]:
"""Get the additional text separator tokens set on this index.
Returns
-------
settings:
List containing the separator tokens of the index.
Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
return self.http.get(self.__settings_url_for(self.config.paths.separator_tokens))

def get_non_separator_tokens(self) -> List[str]:
"""Get the list of disabled text separator tokens on this index.
Returns
-------
settings:
List containing the disabled separator tokens of the index.
Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
return self.http.get(self.__settings_url_for(self.config.paths.non_separator_tokens))

def update_separator_tokens(self, body: Union[List[str], None]) -> TaskInfo:
"""Update the additional separator tokens of the index.
Parameters
----------
body:
List of the new separator tokens.
Returns
-------
task_info:
TaskInfo instance containing information about a task to track the progress of an asynchronous process.
https://www.meilisearch.com/docs/reference/api/tasks#get-one-task
Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
task = self.http.put(self.__settings_url_for(self.config.paths.separator_tokens), body)

return TaskInfo(**task)

def update_non_separator_tokens(self, body: Union[List[str], None]) -> TaskInfo:
"""Update the disabled separator tokens of the index.
Parameters
----------
body:
List of the newly disabled separator tokens.
Returns
-------
task_info:
TaskInfo instance containing information about a task to track the progress of an asynchronous process.
https://www.meilisearch.com/docs/reference/api/tasks#get-one-task
Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
task = self.http.put(self.__settings_url_for(self.config.paths.non_separator_tokens), body)

return TaskInfo(**task)

def reset_separator_tokens(self) -> TaskInfo:
"""Clear all additional separator tokens
Returns
-------
task_info:
TaskInfo instance containing information about a task to track the progress of an asynchronous process.
https://www.meilisearch.com/docs/reference/api/tasks#get-one-task
Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
task = self.http.delete(
self.__settings_url_for(self.config.paths.separator_tokens),
)

return TaskInfo(**task)

def reset_non_separator_tokens(self) -> TaskInfo:
"""Clear all disabled separator tokens
Returns
-------
task_info:
TaskInfo instance containing information about a task to track the progress of an asynchronous process.
https://www.meilisearch.com/docs/reference/api/tasks#get-one-task
Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
task = self.http.delete(
self.__settings_url_for(self.config.paths.non_separator_tokens),
)

return TaskInfo(**task)

@staticmethod
def _batch(
documents: List[Dict[str, Any]], batch_size: int
Expand Down
76 changes: 76 additions & 0 deletions tests/settings/test_settings_text_separators_meilisearch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
NEW_SEPARATOR_TOKENS = ["|", "&hellip;"]
NEW_NON_SEPARATOR_TOKENS = ["@", "#"]


def test_get_separator_tokens_default(empty_index):
"""Tests getting the default value of separator tokens."""
separator_tokens = empty_index().get_separator_tokens()
assert separator_tokens == []


def test_get_non_separator_tokens_default(empty_index):
"""Tests getting the default value of separator tokens."""
non_separator_tokens = empty_index().get_separator_tokens()
assert non_separator_tokens == []


def test_update_separator_tokens(empty_index):
"""Tests updating the separator tokens."""
index = empty_index()
task = index.update_separator_tokens(NEW_SEPARATOR_TOKENS)
task = index.wait_for_task(task.task_uid)
assert task.status == "succeeded"

separator_tokens = index.get_separator_tokens()
for token in NEW_SEPARATOR_TOKENS:
assert token in separator_tokens


def test_update_non_separator_tokens(empty_index):
"""Tests updating the non separator tokens."""
index = empty_index()
task = index.update_non_separator_tokens(NEW_NON_SEPARATOR_TOKENS)
task = index.wait_for_task(task.task_uid)
assert task.status == "succeeded"

non_separator_tokens = index.get_non_separator_tokens()
for token in NEW_NON_SEPARATOR_TOKENS:
assert token in non_separator_tokens


def test_reset_separator_tokens(empty_index):
"""Tests resetting the separator tokens to its default empty list."""
index = empty_index()
task = index.update_separator_tokens(NEW_SEPARATOR_TOKENS)
task = index.wait_for_task(task.task_uid)
assert task.status == "succeeded"

separator_tokens = index.get_separator_tokens()
for token in NEW_SEPARATOR_TOKENS:
assert token in separator_tokens

reset_task = index.reset_separator_tokens()
reset_task = index.wait_for_task(reset_task.task_uid)
assert reset_task.status == "succeeded"

separator_tokens = index.get_separator_tokens()
assert separator_tokens == []


def test_non_reset_separator_tokens(empty_index):
"""Tests resetting the separator tokens to its default empty list."""
index = empty_index()
task = index.update_non_separator_tokens(NEW_NON_SEPARATOR_TOKENS)
task = index.wait_for_task(task.task_uid)
assert task.status == "succeeded"

non_separator_tokens = index.get_non_separator_tokens()
for token in NEW_NON_SEPARATOR_TOKENS:
assert token in non_separator_tokens

reset_task = index.reset_non_separator_tokens()
reset_task = index.wait_for_task(reset_task.task_uid)
assert reset_task.status == "succeeded"

non_separator_tokens = index.get_non_separator_tokens()
assert non_separator_tokens == []

0 comments on commit 1c4ce20

Please sign in to comment.