diff --git a/readthedocs/search/management/commands/reindex_elasticsearch.py b/readthedocs/search/management/commands/reindex_elasticsearch.py index 32f9a4c8534..7c0ea6982cf 100644 --- a/readthedocs/search/management/commands/reindex_elasticsearch.py +++ b/readthedocs/search/management/commands/reindex_elasticsearch.py @@ -10,7 +10,6 @@ from ...tasks import (index_objects_to_es, switch_es_index, create_new_es_index, index_missing_objects) -from ...utils import get_chunk log = logging.getLogger(__name__) @@ -19,17 +18,32 @@ class Command(BaseCommand): @staticmethod def _get_indexing_tasks(app_label, model_name, index_name, queryset, document_class): - total = queryset.count() - chunks = get_chunk(total, settings.ES_TASK_CHUNK_SIZE) - - for chunk in chunks: - data = { - 'app_label': app_label, - 'model_name': model_name, - 'document_class': document_class, - 'index_name': index_name, - 'chunk': chunk - } + chunk_size = settings.ES_TASK_CHUNK_SIZE + qs_iterator = queryset.only('pk').iterator() + is_iterator_empty = False + + data = { + 'app_label': app_label, + 'model_name': model_name, + 'document_class': document_class, + 'index_name': index_name, + } + + while not is_iterator_empty: + objects_id = [] + + try: + for _ in range(chunk_size): + pk = next(qs_iterator).pk + objects_id.append(pk) + + if pk % 5000 == 0: + log.info('Total: %s', pk) + + except StopIteration: + is_iterator_empty = True + + data['objects_id'] = objects_id yield index_objects_to_es.si(**data) def _run_reindex_tasks(self, models, queue): diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py index 0ff42ddcdd2..cf1f0fb73aa 100644 --- a/readthedocs/search/utils.py +++ b/readthedocs/search/utils.py @@ -94,14 +94,6 @@ def get_project_list_or_404(project_slug, user, version_slug=None): return project_list -def get_chunk(total, chunk_size): - """Yield successive `chunk_size` chunks.""" - # Based on https://stackoverflow.com/a/312464 - # licensed under cc by-sa 3.0 - for i in range(0, total, chunk_size): - yield (i, i + chunk_size) - - def _get_index(indices, index_name): """ Get Index from all the indices.