Skip to content

Commit

Permalink
OpenSearch: Add Support for Boolean Filter with ANN search (#3038)
Browse files Browse the repository at this point in the history
### Description
Add Support for Boolean Filter with ANN search
Documentation -
https://opensearch.org/docs/latest/search-plugins/knn/filter-search-knn/#boolean-filter-with-ann-search

### Issues Resolved
#2924

Signed-off-by: Naveen Tatikonda <navtat@amazon.com>
  • Loading branch information
naveentatikonda committed Apr 18, 2023
1 parent 5420a0e commit 3453b74
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 3 deletions.
41 changes: 38 additions & 3 deletions langchain/vectorstores/opensearch_vector_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,28 @@ def _default_approximate_search_query(
}


def _approximate_search_query_with_boolean_filter(
query_vector: List[float],
boolean_filter: Dict,
size: int = 4,
k: int = 4,
vector_field: str = "vector_field",
subquery_clause: str = "must",
) -> Dict:
"""For Approximate k-NN Search, with Boolean Filter."""
return {
"size": size,
"query": {
"bool": {
"filter": boolean_filter,
subquery_clause: [
{"knn": {vector_field: {"vector": query_vector, "k": k}}}
],
}
},
}


def _default_script_query(
query_vector: List[float],
space_type: str = "l2",
Expand Down Expand Up @@ -317,6 +339,11 @@ def similarity_search(
size: number of results the query actually returns; default: 4
boolean_filter: A Boolean filter consists of a Boolean query that
contains a k-NN query and a filter
subquery_clause: Query clause on the knn vector field; default: "must"
Optional Args for Script Scoring Search:
search_type: "script_scoring"; default: "approximate_search"
Expand All @@ -339,11 +366,19 @@ def similarity_search(
text_field = _get_kwargs_value(kwargs, "text_field", "text")
metadata_field = _get_kwargs_value(kwargs, "metadata_field", "metadata")
vector_field = _get_kwargs_value(kwargs, "vector_field", "vector_field")

if search_type == "approximate_search":
size = _get_kwargs_value(kwargs, "size", 4)
search_query = _default_approximate_search_query(
embedding, size, k, vector_field
)
boolean_filter = _get_kwargs_value(kwargs, "boolean_filter", {})
subquery_clause = _get_kwargs_value(kwargs, "subquery_clause", "must")
if boolean_filter != {}:
search_query = _approximate_search_query_with_boolean_filter(
embedding, boolean_filter, size, k, vector_field, subquery_clause
)
else:
search_query = _default_approximate_search_query(
embedding, size, k, vector_field
)
elif search_type == SCRIPT_SCORING_SEARCH:
space_type = _get_kwargs_value(kwargs, "space_type", "l2")
pre_filter = _get_kwargs_value(kwargs, "pre_filter", MATCH_ALL_QUERY)
Expand Down
14 changes: 14 additions & 0 deletions tests/integration_tests/vectorstores/test_opensearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,17 @@ def test_opensearch_embedding_size_zero() -> None:
OpenSearchVectorSearch.from_texts(
[], FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL
)


def test_appx_search_with_boolean_filter() -> None:
"""Test Approximate Search with Boolean Filter."""
boolean_filter_val = {"bool": {"must": [{"term": {"text": "bar"}}]}}
docsearch = OpenSearchVectorSearch.from_texts(
texts,
FakeEmbeddings(),
opensearch_url=DEFAULT_OPENSEARCH_URL,
)
output = docsearch.similarity_search(
"foo", k=3, boolean_filter=boolean_filter_val, subquery_clause="should"
)
assert output == [Document(page_content="bar")]

0 comments on commit 3453b74

Please sign in to comment.