Skip to content

Commit

Permalink
Merge pull request readthedocs#4615 from safwanrahman/search_fix
Browse files Browse the repository at this point in the history
fixing the indexing
  • Loading branch information
ericholscher authored and safwanrahman committed Sep 15, 2018
2 parents 9a78698 + ad2d174 commit a508020
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 22 deletions.
13 changes: 4 additions & 9 deletions readthedocs/projects/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,6 @@
from django.utils.translation import ugettext_lazy as _
from slumber.exceptions import HttpClientError

from .constants import LOG_TEMPLATE
from .exceptions import RepositoryError
from .models import ImportedFile, Project, Domain, Feature, HTMLFile
from .signals import before_vcs, after_vcs, before_build, after_build, files_changed, \
bulk_post_create, bulk_post_delete
from readthedocs.builds.constants import (
BUILD_STATE_BUILDING, BUILD_STATE_CLONING, BUILD_STATE_FINISHED,
BUILD_STATE_INSTALLING, LATEST, LATEST_VERBOSE_NAME, STABLE_VERBOSE_NAME)
Expand All @@ -59,12 +54,12 @@
from readthedocs.search.parse_json import process_all_json_files
from readthedocs.vcs_support import utils as vcs_support_utils
from readthedocs.worker import app

from .constants import LOG_TEMPLATE
from .exceptions import RepositoryError
from .models import Domain, Feature, ImportedFile, Project
from .signals import (
after_build, after_vcs, before_build, before_vcs, files_changed)
from .models import Domain, ImportedFile, Project
from .models import HTMLFile
from .signals import (after_build, after_vcs, before_build, before_vcs,
bulk_post_create, bulk_post_delete, files_changed)

log = logging.getLogger(__name__)

Expand Down
10 changes: 5 additions & 5 deletions readthedocs/search/management/commands/reindex_elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from ...tasks import (index_objects_to_es, switch_es_index, create_new_es_index,
index_missing_objects)
from ...utils import chunk_queryset
from ...utils import get_chunk

log = logging.getLogger(__name__)

Expand All @@ -19,16 +19,16 @@ class Command(BaseCommand):

@staticmethod
def _get_indexing_tasks(app_label, model_name, queryset, document_class, index_name):
queryset = queryset.values_list('id', flat=True)
chunked_queryset = chunk_queryset(queryset, settings.ES_TASK_CHUNK_SIZE)
total = queryset.count()
chunks = get_chunk(total, settings.ES_TASK_CHUNK_SIZE)

for chunk in chunked_queryset:
for chunk in chunks:
data = {
'app_label': app_label,
'model_name': model_name,
'document_class': document_class,
'index_name': index_name,
'objects_id': list(chunk)
'chunk': chunk
}
yield index_objects_to_es.si(**data)

Expand Down
20 changes: 16 additions & 4 deletions readthedocs/search/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,26 @@ def switch_es_index(app_label, model_name, index_name, new_index_name):


@app.task(queue='web')
def index_objects_to_es(app_label, model_name, document_class, index_name, objects_id):
def index_objects_to_es(app_label, model_name, document_class, index_name,
chunk=None, objects_id=None):

assert not (chunk and objects_id), "You can not pass both chunk and objects_id"

model = apps.get_model(app_label, model_name)
document = _get_document(model=model, document_class=document_class)

# Use queryset from model as the ids are specific
queryset = model.objects.all().filter(id__in=objects_id).iterator()
log.info("Indexing model: {}, id:'{}'".format(model.__name__, objects_id))
document().update(queryset, index_name=index_name)
queryset = model.objects.all()
if chunk:
# Chunk is a tuple with start and end index of queryset
start = chunk[0]
end = chunk[1]
queryset = queryset[start:end]
elif objects_id:
queryset = queryset.filter(id__in=objects_id)

log.info("Indexing model: {}, '{}' objects".format(model.__name__, queryset.count()))
document().update(queryset.iterator(), index_name=index_name)


@app.task(queue='web')
Expand Down
7 changes: 3 additions & 4 deletions readthedocs/search/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,10 +323,9 @@ def get_project_list_or_404(project_slug, user):
return project_list


def chunk_queryset(queryset, chunk_size):
"""Yield successive `chunk_size` chunks of queryset."""
def get_chunk(total, chunk_size):
"""Yield successive `chunk_size` chunks"""
# Based on https://stackoverflow.com/a/312464
# licensed under cc by-sa 3.0
total = queryset.count()
for i in range(0, total, chunk_size):
yield queryset[i:i + chunk_size]
yield (i, i + chunk_size)

0 comments on commit a508020

Please sign in to comment.