Navigation Menu

Skip to content

Commit

Permalink
Add full-text search to Collection filterset
Browse files Browse the repository at this point in the history
With this the user can submit their full-text querystring as the 'q'
parameter. The results are rank sorted. It provides a weighted result
with the following weights:

- Namespace name (weight A)
- Collection name (weight A)
- Collection tags (weight B)
- Collection content names (weight C)
- Collection description (weight D)

https://pulp.plan.io/issues/5075
closes #5075
  • Loading branch information
Brian Bouterse committed Aug 6, 2019
1 parent b69488b commit 32f00b0
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 6 deletions.
2 changes: 2 additions & 0 deletions CHANGES/5075.feature
@@ -0,0 +1,2 @@
Fulltext Collection search is available with the ``q`` filter argument. A migration creates
databases indexes to speed up the search.
95 changes: 95 additions & 0 deletions pulp_ansible/app/migrations/0004_add_fulltext_search_indexes.py
@@ -0,0 +1,95 @@
from django.contrib.postgres import search as psql_search
from django.db import migrations


# This query generates full text search index based
# the following data ranked from A to D:
# - Namespace name (weight A)
# - Collection name (weight A)
# - Collection tags (weight B)
# - Collection content names (weight C)
# - Collection description (weight D)
TS_VECTOR_SELECT = '''
setweight(to_tsvector(coalesce(namespace,'')), 'A')
|| setweight(to_tsvector(coalesce(name, '')), 'A')
|| (
SELECT
setweight(to_tsvector(
coalesce(string_agg("ansible_tag"."name", ' '), '')
), 'B')
FROM
"ansible_tag" INNER JOIN "ansible_collectionversion_tags" ON ("ansible_tag"."_id" = "ansible_collectionversion_tags"."tag_id")
)
|| (
SELECT
setweight(to_tsvector(
coalesce(string_agg(cvc ->> 'name', ' '), '')
), 'C')
FROM jsonb_array_elements(cv.contents) AS cvc
)
|| setweight(to_tsvector(coalesce(description, '')), 'D')
'''

# Generates search vector for existing CollectionVersion objects in the database.
POPULATE_COLLECTIONS_TS_VECTOR = f'''
UPDATE ansible_collectionversion AS c
SET search_vector = (
SELECT {TS_VECTOR_SELECT}
FROM ansible_collectionversion cv
)
'''


# Creates a database function and a trigger to update collection search
# vector field when a collection reference to a newer version is updated.
#
# Since it's not possible to insert a collection version before a collection, a latest_version_id
# always gets updated as a separated query after collectionversion is inserted. Thus only `ON
# UPDATE` trigger is required.
CREATE_COLLECTIONS_TS_VECTOR_TRIGGER = f'''
CREATE OR REPLACE FUNCTION update_collection_ts_vector()
RETURNS TRIGGER AS
$$
BEGIN
NEW.search_vector := (
SELECT {TS_VECTOR_SELECT}
FROM ansible_collectionversion cv
WHERE cv.content_ptr_id = NEW.content_ptr_id
);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER update_ts_vector
BEFORE UPDATE
ON ansible_collectionversion
FOR EACH ROW
EXECUTE PROCEDURE update_collection_ts_vector();
'''

DROP_COLLECTIONS_TS_VECTOR_TRIGGER = '''
DROP TRIGGER IF EXISTS update_ts_vector ON ansible_collectionversion;
DROP FUNCTION IF EXISTS update_collection_ts_vector();
'''


class Migration(migrations.Migration):

dependencies = [
('ansible', '0003_add_tags_and_collectionversion_fields'),
]

operations = [
migrations.AddField(
model_name='collectionversion',
name='search_vector',
field=psql_search.SearchVectorField(default=''),
),
migrations.RunSQL(
sql=POPULATE_COLLECTIONS_TS_VECTOR,
reverse_sql=migrations.RunSQL.noop,
),
migrations.RunSQL(
sql=CREATE_COLLECTIONS_TS_VECTOR_TRIGGER,
reverse_sql=DROP_COLLECTIONS_TS_VECTOR_TRIGGER,
)
]
8 changes: 7 additions & 1 deletion pulp_ansible/app/models.py
Expand Up @@ -2,6 +2,7 @@

from django.db import models
from django.contrib.postgres import fields as psql_fields
from django.contrib.postgres import search as psql_search

from pulpcore.plugin.models import Content, Model, Remote, RepositoryVersionDistribution

Expand Down Expand Up @@ -56,7 +57,7 @@ class Tag(Model):

class CollectionVersion(Content):
"""
A content type representing a Collection.
A content type representing a CollectionVersion.
This model is primarily designed to adhere to the data format for Collection content. That spec
is here: https://docs.ansible.com/ansible/devel/dev_guide/collections_galaxy_meta.html
Expand Down Expand Up @@ -87,6 +88,7 @@ class CollectionVersion(Content):

TYPE = "collection_version"

# Data Fields
authors = psql_fields.ArrayField(models.CharField(max_length=64), default=list, editable=False)
contents = psql_fields.JSONField(default=list, editable=False)
dependencies = psql_fields.JSONField(default=dict, editable=False)
Expand All @@ -101,11 +103,15 @@ class CollectionVersion(Content):
repository = models.URLField(default="", blank=True, max_length=128, editable=False)
version = models.CharField(max_length=32, editable=False)

# Foreign Key Fields
collection = models.ForeignKey(
Collection, on_delete=models.CASCADE, related_name="versions", editable=False
)
tags = models.ManyToManyField(Tag, editable=False)

# Search Fields
search_vector = psql_search.SearchVectorField(default="")

@property
def relative_path(self):
"""
Expand Down
2 changes: 0 additions & 2 deletions pulp_ansible/app/tasks/collections.py
Expand Up @@ -152,8 +152,6 @@ def import_collection(artifact_pk):
collection, created = Collection.objects.get_or_create(
namespace=collection_info["namespace"], name=collection_info["name"]
)
if created:
CreatedResource.objects.create(content_object=collection)

tags = collection_info.pop("tags")

Expand Down
33 changes: 30 additions & 3 deletions pulp_ansible/app/viewsets.py
Expand Up @@ -2,14 +2,16 @@
from gettext import gettext as _
from packaging.version import parse

from django.contrib.postgres.search import SearchQuery
from django.db import IntegrityError
from django.db.models import fields as db_fields
from django.db.models.expressions import F, Func
from django_filters import filters
from drf_yasg.utils import swagger_auto_schema
from rest_framework import mixins, serializers, viewsets
from rest_framework.decorators import action
from rest_framework.parsers import FormParser, MultiPartParser


from pulpcore.plugin.exceptions import DigestValidationError
from pulpcore.plugin.models import Artifact
from pulpcore.plugin.serializers import (
Expand Down Expand Up @@ -74,9 +76,34 @@ class CollectionVersionFilter(ContentFilter):
FilterSet for Ansible Collections.
"""

namespace = filters.CharFilter(field_name="collection__namespace")
name = filters.CharFilter(field_name="collection__name")
namespace = filters.CharFilter(field_name="namespace")
name = filters.CharFilter(field_name="name")
latest = filters.BooleanFilter(field_name="latest", method="filter_latest")
q = filters.CharFilter(field_name="q", method="filter_by_q")

def filter_by_q(self, queryset, name, value):
"""
Full text search provided by the 'q' option.
Args:
queryset: The query to add the additional full-text search filtering onto
name: The name of the option specified, i.e. 'q'
value: The string to search on
Returns:
The Django queryset that was passed in, additionally filtered by full-text search.
"""
search_query = SearchQuery(value)
qs = queryset.filter(search_vector=search_query)
ts_rank_fn = Func(
F("search_vector"),
search_query,
32, # RANK_NORMALIZATION = 32
function="ts_rank",
output_field=db_fields.FloatField(),
)
return qs.annotate(rank=ts_rank_fn).order_by("-rank")

def filter_latest(self, queryset, name, value):
"""
Expand Down

0 comments on commit 32f00b0

Please sign in to comment.