Skip to content

Commit

Permalink
records: improve indexing
Browse files Browse the repository at this point in the history
* Adds kibana container to docker compose file.
* Creates CLI command to index resources managed by `invenio-records-resources`.
* Stores full name of user in index.
* Uniformizes the storage of suggestions fields.
* Moves autocomplete filter and analyzer in record template.
* Removes old mapping for projects.
* Adds a method `bulk_reindex` in record's service base class.
* Creates a specific `RecordIdProvider` for project resource to specify the right `pid_type`.
* Stores the `pid_type` in Record class to be able to retrieve it later.

Co-Authored-by: Sébastien Délèze <sebastien.deleze@rero.ch>
  • Loading branch information
Sébastien Délèze committed Mar 17, 2021
1 parent 3b5a84d commit 3f679f8
Show file tree
Hide file tree
Showing 16 changed files with 226 additions and 195 deletions.
6 changes: 6 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ services:
extends:
file: docker-services.yml
service: es
kibana:
extends:
file: docker-services.yml
service: kibana
links:
- es
grobid:
extends:
file: docker-services.yml
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@
sonar.modules.documents.cli.oaiharvester:oaiharvester',
'utils = sonar.modules.cli:utils',
'es = sonar.elasticsearch.cli:es',
'heg = sonar.heg.cli:heg'
'heg = sonar.heg.cli:heg',
'resources = sonar.resources.cli:resources'
],
'invenio_base.apps': [
'sonar = sonar.ext:Sonar',
Expand Down
15 changes: 15 additions & 0 deletions sonar/es_templates/v7/record.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
"index_patterns": "*",
"settings": {
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 20
}
},
"tokenizer": {
"char_group_tokenizer": {
"type": "char_group",
Expand Down Expand Up @@ -29,6 +36,14 @@
"icu_folding",
"german_normalization"
]
},
"autocomplete": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"autocomplete_filter"
]
}
}
}
Expand Down
6 changes: 5 additions & 1 deletion sonar/ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,14 @@
from flask_security import user_registered
from flask_wiki import Wiki
from invenio_files_rest.signals import file_deleted, file_uploaded
from invenio_indexer.signals import before_record_index

from sonar.modules.permissions import has_admin_access, has_submitter_access, \
has_superuser_access
from sonar.modules.receivers import file_deleted_listener, \
file_uploaded_listener
from sonar.modules.users.api import current_user_record
from sonar.modules.users.signals import user_registered_handler
from sonar.modules.users.signals import add_full_name, user_registered_handler
from sonar.modules.utils import get_specific_theme, get_switch_aai_providers, \
get_view_code
from sonar.resources.projects.resource import \
Expand Down Expand Up @@ -95,6 +96,9 @@ def init_app(self, app):
file_uploaded.connect(file_uploaded_listener, weak=False)
file_deleted.connect(file_deleted_listener, weak=False)

# Add user's full name before record index
before_record_index.connect(add_full_name, weak=False)

def init_config(self, app):
"""Initialize configuration."""
for k in dir(config_sonar):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -927,7 +927,7 @@
"form": {
"remoteTypeahead": {
"type": "projects",
"field": "autocomplete_name",
"field": "metadata.name.suggest",
"label": "name"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1734,7 +1734,7 @@
"form": {
"remoteTypeahead": {
"type": "projects",
"field": "autocomplete_name",
"field": "metdata.name.suggest",
"label": "name"
}
}
Expand Down
161 changes: 0 additions & 161 deletions sonar/modules/projects/mappings/v7/projects/project-v1.0.0.json

This file was deleted.

10 changes: 10 additions & 0 deletions sonar/modules/users/mappings/v7/users/user-v1.0.0.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,16 @@
"last_name": {
"type": "text"
},
"full_name": {
"type": "text",
"fields": {
"suggest": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "standard"
}
}
},
"birth_date": {
"type": "date"
},
Expand Down
15 changes: 15 additions & 0 deletions sonar/modules/users/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,18 @@ def user_registered_handler(app, user, confirm_token):
role = datastore.find_role(UserRecord.ROLE_USER)
datastore.add_role_to_user(user, role)
datastore.commit()


def add_full_name(sender=None, record=None, json=None, index=None, **kwargs):
"""Add full name field in index.
:param sender: Sender of the signal.
:param record: Record to index.
:param json: Indexed data.
:param index: Index where data is sent.
"""
# Takes care only about users indexing
if not index.startswith('users'):
return

json['full_name'] = f'{json["first_name"]} {json["last_name"]}'
50 changes: 50 additions & 0 deletions sonar/resources/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
#
# Swiss Open Access Repository
# Copyright (C) 2021 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Resources CLI commands."""

import click
from flask.cli import with_appcontext
from invenio_indexer.cli import abort_if_false

from sonar.proxies import sonar


@click.group()
def resources():
"""Resources CLI commands."""


@resources.command('reindex')
@click.argument('record-type')
@click.option('--yes-i-know',
is_flag=True,
callback=abort_if_false,
expose_value=False,
prompt='Do you really want to reindex all records?')
@with_appcontext
def reindex(record_type):
"""Reindex all records for the given type.
Reindex all records managed by `invenio-records-resouces` for the given
type.
:param record_type: Record type.
"""
click.secho(f'Indexing records of type "{record_type}"')
sonar.service(record_type).bulk_reindex()
click.secho('Record indexed successfully!', fg='green')
14 changes: 12 additions & 2 deletions sonar/resources/projects/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@

"""API for projects resources."""

from invenio_pidstore.providers.recordid import RecordIdProvider
from invenio_pidstore.providers.recordid import \
RecordIdProvider as BaseRecordIdProvider
from invenio_records.dumpers import ElasticsearchDumper, ElasticsearchDumperExt
from invenio_records.systemfields import ConstantField
from invenio_records_resources.records.api import Record as BaseRecord
Expand All @@ -31,6 +32,10 @@

from . import models

# Custom provider to set the PID type
RecordIdProvider = type('RecordIdProvider', (BaseRecordIdProvider, ),
dict(pid_type='proj'))


class ElasticsearchDumperObjectsExt(ElasticsearchDumperExt):
"""Interface for Elasticsearch dumper extensions."""
Expand Down Expand Up @@ -64,7 +69,12 @@ class Record(BaseRecord):

index = IndexField('projects-project-v1.0.0', search_alias='projects')

pid = PIDField('id', pid_type='proj', provider=RecordIdProvider)
# The `pid_type` must not be filled as argument in this constructor.
# Instead it is guessed from RecordIdProvider.
pid = PIDField('id', provider=RecordIdProvider)

# PID type retrieved from provider
pid_type = RecordIdProvider.pid_type

dumper = ElasticsearchDumper(extensions=[ElasticsearchDumperObjectsExt()])

Expand Down
Loading

0 comments on commit 3f679f8

Please sign in to comment.