Skip to content

Commit

Permalink
Merge pull request #4615 from safwanrahman/search_fix
Browse files Browse the repository at this point in the history
fixing the indexing
  • Loading branch information
ericholscher committed Sep 7, 2018
2 parents cbc5322 + c09aced commit 3ca137c
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 14 deletions.
2 changes: 1 addition & 1 deletion readthedocs/projects/tasks.py
Expand Up @@ -59,7 +59,7 @@
from .exceptions import RepositoryError
from .models import Domain, Feature, ImportedFile, Project, HTMLFile
from .signals import (
after_build, after_vcs, before_build, before_vcs,
after_build, after_vcs, before_build, before_vcs,
files_changed, bulk_post_create, bulk_post_delete)

log = logging.getLogger(__name__)
Expand Down
10 changes: 5 additions & 5 deletions readthedocs/search/management/commands/reindex_elasticsearch.py
Expand Up @@ -10,7 +10,7 @@

from ...tasks import (index_objects_to_es, switch_es_index, create_new_es_index,
index_missing_objects)
from ...utils import chunk_queryset
from ...utils import get_chunk

log = logging.getLogger(__name__)

Expand All @@ -19,16 +19,16 @@ class Command(BaseCommand):

@staticmethod
def _get_indexing_tasks(app_label, model_name, queryset, document_class, index_name):
queryset = queryset.values_list('id', flat=True)
chunked_queryset = chunk_queryset(queryset, settings.ES_TASK_CHUNK_SIZE)
total = queryset.count()
chunks = get_chunk(total, settings.ES_TASK_CHUNK_SIZE)

for chunk in chunked_queryset:
for chunk in chunks:
data = {
'app_label': app_label,
'model_name': model_name,
'document_class': document_class,
'index_name': index_name,
'objects_id': list(chunk)
'chunk': chunk
}
yield index_objects_to_es.si(**data)

Expand Down
20 changes: 16 additions & 4 deletions readthedocs/search/tasks.py
Expand Up @@ -68,14 +68,26 @@ def switch_es_index(app_label, model_name, index_name, new_index_name):


@app.task(queue='web')
def index_objects_to_es(app_label, model_name, document_class, index_name, objects_id):
def index_objects_to_es(app_label, model_name, document_class, index_name,
chunk=None, objects_id=None):

assert not (chunk and objects_id), "You can not pass both chunk and objects_id"

model = apps.get_model(app_label, model_name)
document = _get_document(model=model, document_class=document_class)

# Use queryset from model as the ids are specific
queryset = model.objects.all().filter(id__in=objects_id).iterator()
log.info("Indexing model: {}, id:'{}'".format(model.__name__, objects_id))
document().update(queryset, index_name=index_name)
queryset = model.objects.all()
if chunk:
# Chunk is a tuple with start and end index of queryset
start = chunk[0]
end = chunk[1]
queryset = queryset[start:end]
elif objects_id:
queryset = queryset.filter(id__in=objects_id)

log.info("Indexing model: {}, '{}' objects".format(model.__name__, queryset.count()))
document().update(queryset.iterator(), index_name=index_name)


@app.task(queue='web')
Expand Down
7 changes: 3 additions & 4 deletions readthedocs/search/utils.py
Expand Up @@ -323,10 +323,9 @@ def get_project_list_or_404(project_slug, user):
return project_list


def chunk_queryset(queryset, chunk_size):
"""Yield successive `chunk_size` chunks of queryset."""
def get_chunk(total, chunk_size):
"""Yield successive `chunk_size` chunks"""
# Based on https://stackoverflow.com/a/312464
# licensed under cc by-sa 3.0
total = queryset.count()
for i in range(0, total, chunk_size):
yield queryset[i:i + chunk_size]
yield (i, i + chunk_size)

0 comments on commit 3ca137c

Please sign in to comment.