-
-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4368 from safwanrahman/comman
[Fix #4333] Implement asynchronous search reindex functionality using celery
- Loading branch information
Showing
20 changed files
with
382 additions
and
93 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
58 changes: 0 additions & 58 deletions
58
readthedocs/core/management/commands/reindex_elasticsearch.py
This file was deleted.
Oops, something went wrong.
22 changes: 22 additions & 0 deletions
22
readthedocs/projects/migrations/0028_importedfile_modified_date.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# -*- coding: utf-8 -*- | ||
# Generated by Django 1.9.13 on 2018-07-27 09:54 | ||
from __future__ import unicode_literals | ||
|
||
from django.db import migrations, models | ||
import django.utils.timezone | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('projects', '0027_add_htmlfile_model'), | ||
] | ||
|
||
operations = [ | ||
migrations.AddField( | ||
model_name='importedfile', | ||
name='modified_date', | ||
field=models.DateTimeField(auto_now=True, default=django.utils.timezone.now, verbose_name='Modified date'), | ||
preserve_default=False, | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
default_app_config = 'readthedocs.search.apps.SearchConfig' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
"""Project app config""" | ||
|
||
from django.apps import AppConfig | ||
|
||
|
||
class SearchConfig(AppConfig): | ||
name = 'readthedocs.search' | ||
|
||
def ready(self): | ||
from .signals import index_html_file, remove_html_file |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
101 changes: 101 additions & 0 deletions
101
readthedocs/search/management/commands/reindex_elasticsearch.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
import datetime | ||
import logging | ||
|
||
from celery import chord, chain | ||
from django.apps import apps | ||
from django.conf import settings | ||
from django.core.management import BaseCommand | ||
from django.utils import timezone | ||
from django_elasticsearch_dsl.registries import registry | ||
|
||
from ...tasks import (index_objects_to_es, switch_es_index, create_new_es_index, | ||
index_missing_objects) | ||
from ...utils import chunk_queryset | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
|
||
class Command(BaseCommand): | ||
|
||
@staticmethod | ||
def _get_indexing_tasks(app_label, model_name, queryset, document_class, index_name): | ||
queryset = queryset.values_list('id', flat=True) | ||
chunked_queryset = chunk_queryset(queryset, settings.ES_TASK_CHUNK_SIZE) | ||
|
||
for chunk in chunked_queryset: | ||
data = { | ||
'app_label': app_label, | ||
'model_name': model_name, | ||
'document_class': document_class, | ||
'index_name': index_name, | ||
'objects_id': list(chunk) | ||
} | ||
yield index_objects_to_es.si(**data) | ||
|
||
def _run_reindex_tasks(self, models): | ||
for doc in registry.get_documents(models): | ||
queryset = doc().get_queryset() | ||
# Get latest object from the queryset | ||
index_time = timezone.now() | ||
|
||
app_label = queryset.model._meta.app_label | ||
model_name = queryset.model.__name__ | ||
|
||
index_name = doc._doc_type.index | ||
timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M%S') | ||
new_index_name = "{}_{}".format(index_name, timestamp) | ||
|
||
pre_index_task = create_new_es_index.si(app_label=app_label, | ||
model_name=model_name, | ||
index_name=index_name, | ||
new_index_name=new_index_name) | ||
|
||
indexing_tasks = self._get_indexing_tasks(app_label=app_label, model_name=model_name, | ||
queryset=queryset, | ||
document_class=str(doc), | ||
index_name=new_index_name) | ||
|
||
post_index_task = switch_es_index.si(app_label=app_label, model_name=model_name, | ||
index_name=index_name, | ||
new_index_name=new_index_name) | ||
|
||
# Task to run in order to add the objects | ||
# that has been inserted into database while indexing_tasks was running | ||
# We pass the creation time of latest object, so its possible to index later items | ||
missed_index_task = index_missing_objects.si(app_label=app_label, | ||
model_name=model_name, | ||
document_class=str(doc), | ||
index_generation_time=index_time) | ||
|
||
# http://celery.readthedocs.io/en/latest/userguide/canvas.html#chords | ||
chord_tasks = chord(header=indexing_tasks, body=post_index_task) | ||
# http://celery.readthedocs.io/en/latest/userguide/canvas.html#chain | ||
chain(pre_index_task, chord_tasks, missed_index_task).apply_async() | ||
|
||
message = ("Successfully issued tasks for {}.{}, total {} items" | ||
.format(app_label, model_name, queryset.count())) | ||
log.info(message) | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument( | ||
'--models', | ||
dest='models', | ||
type=str, | ||
nargs='*', | ||
help=("Specify the model to be updated in elasticsearch." | ||
"The format is <app_label>.<model_name>") | ||
) | ||
|
||
def handle(self, *args, **options): | ||
""" | ||
Index models into Elasticsearch index asynchronously using celery. | ||
You can specify model to get indexed by passing | ||
`--model <app_label>.<model_name>` parameter. | ||
Otherwise, it will reindex all the models | ||
""" | ||
models = None | ||
if options['models']: | ||
models = [apps.get_model(model_name) for model_name in options['models']] | ||
|
||
self._run_reindex_tasks(models=models) |
Oops, something went wrong.