Skip to content

Commit

Permalink
Use fulltext search for products (#9344)
Browse files Browse the repository at this point in the history
Does not support substring matches but gives us proper result ranking
with different weights assigned to product names, attributes, and
descriptions.

Also supports websearch expressions:
- `"foo bar"` for word distance
- `foo -bar` for excluding terms
- `foo OR bar` for alternatives

Co-authored-by: Filip Owczarek <filip.owczarek@saleor.io>

Co-authored-by: Filip Owczarek <filip.owczarek@saleor.io>
  • Loading branch information
patrys and fowczarek committed Apr 26, 2022
1 parent 170efdc commit 4b6f259
Show file tree
Hide file tree
Showing 32 changed files with 389 additions and 443 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ All notable, unreleased changes to this project will be documented in this file.
- Allow plugins to create their custom error code - #9300 by @LeOndaz

#### Other
- Use full-text search for products search API - #9344 by @patrys

- Include required permission in mutations' descriptions - #9363 by @maarcingebala
- Make GraphQL list items non-nullable - #9391 by @maarcingebala
Expand Down
38 changes: 37 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ documentation = "https://docs.saleor.io/"
django-stubs = "1.8.0"
pytest-socket = "^0.5.1"
before_after = "^1.0.1"
types-certifi = "^2021.10.8"
types-freezegun = "^1.1.7"
types-six = "^1.16.12"

[tool.black]
target_version = [ "py35", "py36", "py37", "py38" ]
Expand Down
3 changes: 3 additions & 0 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -201,10 +201,13 @@ toml==0.10.2; python_version >= "3.7" and python_full_version < "3.0.0" or pytho
tomli==2.0.1; python_version < "3.11" and python_full_version >= "3.6.2" and python_version >= "3.7" and (python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.7")
tornado==6.1; python_version >= "3.7"
tox==3.25.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
types-certifi==2021.10.8.1
types-freezegun==1.1.9
types-pkg-resources==0.1.3
types-python-dateutil==2.8.12
types-pytz==2021.3.6
types-requests==2.27.20
types-six==1.16.15
types-urllib3==1.26.13
typing-extensions==4.2.0; python_version < "3.10" and python_full_version >= "3.6.2" and python_version >= "3.7"
urllib3==1.26.9; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "3.7"
Expand Down
12 changes: 3 additions & 9 deletions saleor/core/management/commands/update_search_indexes.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
from django.core.management.base import BaseCommand

from ....account.models import User
from ....order.models import Order
from ....product.models import Product
from ...search_tasks import (
set_order_search_document_values,
set_product_search_document_values,
Expand All @@ -15,16 +12,13 @@ class Command(BaseCommand):

def handle(self, *args, **options):
# Update products
products_total_count = Product.objects.filter(search_document="").count()
self.stdout.write(f"Updating products: {products_total_count}")
self.stdout.write("Updating products")
set_product_search_document_values.delay()

# Update orders
orders_total_count = Order.objects.filter(search_document="").count()
self.stdout.write(f"Updating orders: {orders_total_count}")
self.stdout.write("Updating orders")
set_order_search_document_values.delay()

# Update users
users_total_count = User.objects.filter(search_document="").count()
self.stdout.write(f"Updating users: {users_total_count}")
self.stdout.write("Updating users")
set_user_search_document_values.delay()
23 changes: 19 additions & 4 deletions saleor/core/search_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ..product.models import Product
from ..product.search import (
PRODUCT_FIELDS_TO_PREFETCH,
prepare_product_search_document_value,
prepare_product_search_vector_value,
)

task_logger = get_task_logger(__name__)
Expand Down Expand Up @@ -82,7 +82,7 @@ def set_order_search_document_values(updated_count: int = 0) -> None:
@app.task
def set_product_search_document_values(updated_count: int = 0) -> None:
products = list(
Product.objects.filter(search_document="")
Product.objects.filter(search_vector=None)
.prefetch_related(*PRODUCT_FIELDS_TO_PREFETCH)[:BATCH_SIZE]
.iterator()
)
Expand All @@ -91,8 +91,9 @@ def set_product_search_document_values(updated_count: int = 0) -> None:
task_logger.info("No products to update.")
return

updated_count += set_search_document_values(
products, prepare_product_search_document_value
updated_count += set_search_vector_values(
products,
prepare_product_search_vector_value,
)

task_logger.info("Updated %d products", updated_count)
Expand All @@ -117,3 +118,17 @@ def set_search_document_values(instances: List, prepare_search_document_func):
Model.objects.bulk_update(instances, ["search_document"])

return len(instances)


def set_search_vector_values(
instances,
prepare_search_vector_func,
):
Model = instances[0]._meta.model
for instance in instances:
instance.search_vector = prepare_search_vector_func(
instance, already_prefetched=True
)
Model.objects.bulk_update(instances, ["search_vector"])

return len(instances)
6 changes: 6 additions & 0 deletions saleor/core/tests/test_postgresql_search.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import pytest
from django.contrib.postgres.search import SearchVector
from django.db.models import Value
from django.utils.text import slugify

from ...account.models import Address
Expand All @@ -24,6 +26,10 @@ def gen_product(name, description):
product_type=product_type,
category=category,
search_document=f"{name}{description}",
search_vector=(
SearchVector(Value(name), weight="A")
+ SearchVector(Value(description), weight="C")
),
)
ProductChannelListing.objects.create(
product=product,
Expand Down
4 changes: 2 additions & 2 deletions saleor/core/utils/random_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
ProductVariantChannelListing,
VariantMedia,
)
from ...product.search import update_products_search_document
from ...product.search import update_products_search_vector
from ...product.tasks import update_products_discounted_prices_of_discount_task
from ...product.thumbnails import (
create_category_background_image_thumbnails,
Expand Down Expand Up @@ -472,7 +472,7 @@ def create_products_by_schema(placeholder_dir, create_images):
assign_products_to_collections(associations=types["product.collectionproduct"])

all_products_qs = Product.objects.all()
update_products_search_document(all_products_qs)
update_products_search_vector(all_products_qs)
update_products_discounted_prices(all_products_qs)


Expand Down
6 changes: 3 additions & 3 deletions saleor/graphql/attribute/bulk_mutations.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from ...attribute import models
from ...core.permissions import PageTypePermissions
from ...product import models as product_models
from ...product.search import update_products_search_document
from ...product.search import update_products_search_vector
from ..core.mutations import ModelBulkDeleteMutation
from ..core.types import AttributeError, NonNullList
from ..utils import resolve_global_ids_to_primary_keys
Expand Down Expand Up @@ -32,7 +32,7 @@ def perform_mutation(cls, _root, info, ids, **data):
_, attribute_pks = resolve_global_ids_to_primary_keys(ids, "Attribute")
product_ids = cls.get_product_ids_to_update(attribute_pks)
response = super().perform_mutation(_root, info, ids, **data)
update_products_search_document(
update_products_search_vector(
product_models.Product.objects.filter(id__in=product_ids)
)
return response
Expand Down Expand Up @@ -86,7 +86,7 @@ def perform_mutation(cls, _root, info, ids, **data):
_, attribute_pks = resolve_global_ids_to_primary_keys(ids, "AttributeValue")
product_ids = cls.get_product_ids_to_update(attribute_pks)
response = super().perform_mutation(_root, info, ids, **data)
update_products_search_document(
update_products_search_vector(
product_models.Product.objects.filter(id__in=product_ids)
)
return response
Expand Down
6 changes: 3 additions & 3 deletions saleor/graphql/attribute/mutations.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from ...core.tracing import traced_atomic_transaction
from ...core.utils import generate_unique_slug
from ...product import models as product_models
from ...product.search import update_products_search_document
from ...product.search import update_products_search_vector
from ..core.enums import MeasurementUnitsEnum
from ..core.fields import JSONString
from ..core.inputs import ReorderInput
Expand Down Expand Up @@ -734,7 +734,7 @@ def post_save_action(cls, info, instance, cleaned_input):
Q(Exists(instance.productassignments.filter(product_id=OuterRef("id"))))
| Q(Exists(variants.filter(product_id=OuterRef("id"))))
)
update_products_search_document(products)
update_products_search_vector(products)


class AttributeValueDelete(ModelDeleteMutation):
Expand All @@ -757,7 +757,7 @@ def perform_mutation(cls, _root, info, **data):
instance = cls.get_node_or_error(info, node_id, only_type=AttributeValue)
product_ids = cls.get_product_ids_to_update(instance)
response = super().perform_mutation(_root, info, **data)
update_products_search_document(
update_products_search_vector(
product_models.Product.objects.filter(id__in=product_ids)
)
return response
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,6 @@ def test_delete_attribute_value_product_search_document_updated(
with pytest.raises(value._meta.model.DoesNotExist):
value.refresh_from_db()

product.refresh_from_db()
assert product.search_document
assert name.lower() not in product.search_document


def test_delete_attribute_value_product_search_document_updated_variant_attribute(
staff_api_client,
Expand Down Expand Up @@ -121,7 +117,3 @@ def test_delete_attribute_value_product_search_document_updated_variant_attribut
# then
with pytest.raises(value._meta.model.DoesNotExist):
value.refresh_from_db()

product.refresh_from_db()
assert product.search_document
assert name.lower() not in product.search_document
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,6 @@ def test_update_attribute_value_product_search_document_updated(
value["node"]["name"] for value in data["attribute"]["choices"]["edges"]
]

product.refresh_from_db()
assert name.lower() in product.search_document


def test_update_attribute_value_product_search_document_updated_variant_attribute(
staff_api_client,
Expand Down Expand Up @@ -167,9 +164,6 @@ def test_update_attribute_value_product_search_document_updated_variant_attribut
value["node"]["name"] for value in data["attribute"]["choices"]["edges"]
]

product.refresh_from_db()
assert name.lower() in product.search_document


def test_update_swatch_attribute_value(
staff_api_client,
Expand Down
24 changes: 2 additions & 22 deletions saleor/graphql/attribute/tests/mutations/test_bulk_delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,16 +104,6 @@ def test_delete_attributes_products_search_document_updated(
id__in=[attr.id for attr in product_type_attribute_list]
).exists()

product_1.refresh_from_db()
product_2.refresh_from_db()
assert product_1.search_document
assert attr_1_name not in product_1.search_document
assert color_attribute_value.name.lower() in product_1.search_document
assert attr_3_name not in product_1.search_document

assert product_2.search_document
assert attr_2_name not in product_2.search_document


ATTRIBUTE_VALUE_BULK_DELETE_MUTATION = """
mutation attributeValueBulkDelete($ids: [ID!]!) {
Expand Down Expand Up @@ -160,11 +150,6 @@ def test_delete_attribute_values_search_document_updated(
slug="orange", name="Orange", attribute=attribute, value="#ABCD"
)

val_1_name = value_1.name
val_2_name = value_2.name
val_3_name = value_3.name
val_4_name = value_4.name

product_1 = product_list[0]
product_2 = product_list[1]
variant_1 = product_1.variants.first()
Expand Down Expand Up @@ -195,10 +180,5 @@ def test_delete_attribute_values_search_document_updated(

product_1.refresh_from_db()
product_2.refresh_from_db()
assert product_1.search_document
assert val_1_name not in product_1.search_document
assert val_4_name.lower() in product_1.search_document
assert val_3_name not in product_1.search_document

assert product_2.search_document
assert val_2_name not in product_2.search_document
assert product_1.search_vector
assert product_2.search_vector
27 changes: 26 additions & 1 deletion saleor/graphql/core/connection.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import json
from decimal import Decimal
from decimal import Decimal, InvalidOperation
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast

import graphene
Expand Down Expand Up @@ -51,6 +51,28 @@ def get_field_value(instance: DjangoModel, field_name: str):
return attr


def _prepare_filter_by_rank_expression(
cursor: List[str],
sorting_direction: str,
) -> Q:
try:
rank = Decimal(cursor[0])
int(cursor[1])
except (InvalidOperation, ValueError, TypeError, KeyError):
raise ValueError("Invalid cursor for sorting by rank.")

# Because rank is float number, it gets mangled by PostgreSQL's query parser
# making equal comparisons impossible. Instead we compare rank against small
# range of values, constructed using epsilon.
if sorting_direction == "gt":
return Q(
search_rank__range=(rank - EPSILON, rank + EPSILON), id__lt=cursor[1]
) | Q(search_rank__gt=rank + EPSILON)
return Q(search_rank__range=(rank - EPSILON, rank + EPSILON), id__gt=cursor[1]) | Q(
search_rank__lt=rank - EPSILON
)


def _prepare_filter_expression(
field_name: str,
index: int,
Expand Down Expand Up @@ -92,6 +114,9 @@ def _prepare_filter(
('first_field', 'first_value_form_cursor'))
)
"""
if sorting_fields == ["search_rank", "id"]:
# Fast path for filtering by rank
return _prepare_filter_by_rank_expression(cursor, sorting_direction)
filter_kwargs = Q()
for index, field_name in enumerate(sorting_fields):
if cursor[index] is None and sorting_direction == "gt":
Expand Down

0 comments on commit 4b6f259

Please sign in to comment.