Skip to content

Commit

Permalink
Merge pull request #7439 from readthedocs/revert-7429-revert-7408-upd…
Browse files Browse the repository at this point in the history
…ate-es-dep

Revert "Revert ES: update dependencies"
  • Loading branch information
stsewd committed Oct 8, 2020
2 parents 80976bf + 1187fc0 commit 7f9778e
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 54 deletions.
14 changes: 7 additions & 7 deletions readthedocs/search/documents.py
@@ -1,7 +1,7 @@
import logging

from django.conf import settings
from django_elasticsearch_dsl import DocType, Index, fields
from django_elasticsearch_dsl import Document, Index, fields
from elasticsearch import Elasticsearch

from readthedocs.projects.models import HTMLFile, Project
Expand All @@ -28,8 +28,8 @@ def update(self, *args, **kwargs):
super().update(*args, **kwargs)


@project_index.doc_type
class ProjectDocument(RTDDocTypeMixin, DocType):
@project_index.document
class ProjectDocument(RTDDocTypeMixin, Document):

# Metadata
url = fields.TextField(attr='get_absolute_url')
Expand All @@ -43,14 +43,14 @@ class ProjectDocument(RTDDocTypeMixin, DocType):

modified_model_field = 'modified_date'

class Meta:
class Django:
model = Project
fields = ('name', 'slug', 'description')
ignore_signals = True


@page_index.doc_type
class PageDocument(RTDDocTypeMixin, DocType):
@page_index.document
class PageDocument(RTDDocTypeMixin, Document):

# Metadata
project = fields.KeywordField(attr='project.slug')
Expand Down Expand Up @@ -88,7 +88,7 @@ class PageDocument(RTDDocTypeMixin, DocType):

modified_model_field = 'modified_date'

class Meta:
class Django:
model = HTMLFile
fields = ('commit', 'build')
ignore_signals = True
Expand Down
4 changes: 2 additions & 2 deletions readthedocs/search/faceted_search.py
Expand Up @@ -148,7 +148,7 @@ def query(self, search, query):
class ProjectSearchBase(RTDFacetedSearch):
facets = {'language': TermsFacet(field='language')}
doc_types = [ProjectDocument]
index = ProjectDocument._doc_type.index
index = ProjectDocument._index._name
fields = ('name^10', 'slug^5', 'description')
operators = ['and', 'or']

Expand All @@ -163,7 +163,7 @@ class PageSearchBase(RTDFacetedSearch):
),
}
doc_types = [PageDocument]
index = PageDocument._doc_type.index
index = PageDocument._index._name

# boosting for these fields need to be close enough
# to be re-boosted by the page rank.
Expand Down
14 changes: 9 additions & 5 deletions readthedocs/search/management/commands/reindex_elasticsearch.py
@@ -1,15 +1,19 @@
import datetime
import logging

from celery import chord, chain
from celery import chain, chord
from django.apps import apps
from django.conf import settings
from django.core.management import BaseCommand
from django.utils import timezone
from django_elasticsearch_dsl.registries import registry

from ...tasks import (index_objects_to_es, switch_es_index, create_new_es_index,
index_missing_objects)
from ...tasks import (
create_new_es_index,
index_missing_objects,
index_objects_to_es,
switch_es_index,
)

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -64,11 +68,11 @@ def _run_reindex_tasks(self, models, queue):
app_label = queryset.model._meta.app_label
model_name = queryset.model.__name__

index_name = doc._doc_type.index
index_name = doc._index._name
new_index_name = "{}_{}".format(index_name, timestamp)
# Set index temporarily for indexing,
# this will only get set during the running of this command
doc._doc_type.index = new_index_name
doc._index._name = new_index_name

pre_index_task = create_new_es_index.si(app_label=app_label,
model_name=model_name,
Expand Down
20 changes: 10 additions & 10 deletions readthedocs/search/serializers.py
Expand Up @@ -130,7 +130,7 @@ def get_blocks(self, obj):

sorted_results = sorted(
itertools.chain(sections, domains),
key=attrgetter('_score'),
key=attrgetter('meta.score'),
reverse=True,
)
sorted_results = [
Expand All @@ -157,11 +157,11 @@ def get_content(self, obj):
class DomainSearchSerializer(serializers.Serializer):

type = serializers.CharField(default='domain', source=None, read_only=True)
role = serializers.CharField(source='_source.role_name')
name = serializers.CharField(source='_source.name')
id = serializers.CharField(source='_source.anchor')
content = serializers.CharField(source='_source.docstrings')
highlights = DomainHighlightSerializer(source='highlight', default=dict)
role = serializers.CharField(source='role_name')
name = serializers.CharField()
id = serializers.CharField(source='anchor')
content = serializers.CharField(source='docstrings')
highlights = DomainHighlightSerializer(source='meta.highlight', default=dict)


class SectionHighlightSerializer(serializers.Serializer):
Expand All @@ -181,7 +181,7 @@ def get_content(self, obj):
class SectionSearchSerializer(serializers.Serializer):

type = serializers.CharField(default='section', source=None, read_only=True)
id = serializers.CharField(source='_source.id')
title = serializers.CharField(source='_source.title')
content = serializers.CharField(source='_source.content')
highlights = SectionHighlightSerializer(source='highlight', default=dict)
id = serializers.CharField()
title = serializers.CharField()
content = serializers.CharField()
highlights = SectionHighlightSerializer(source='meta.highlight', default=dict)
15 changes: 9 additions & 6 deletions readthedocs/search/tasks.py
Expand Up @@ -9,7 +9,8 @@
from readthedocs.projects.models import Project
from readthedocs.search.models import SearchQuery
from readthedocs.worker import app
from .utils import _get_index, _get_document

from .utils import _get_document, _get_index

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -44,17 +45,19 @@ def index_objects_to_es(

if index_name:
# Hack the index name temporarily for reindexing tasks
old_index_name = document._doc_type.index
document._doc_type.index = index_name
old_index_name = document._index._name
document._index._name = index_name
log.info('Replacing index name %s with %s', old_index_name, index_name)

log.info("Indexing model: %s, '%s' objects", model.__name__, queryset.count())
doc_obj.update(queryset.iterator())

if index_name:
log.info('Undoing index replacement, settings %s with %s',
document._doc_type.index, old_index_name)
document._doc_type.index = old_index_name
log.info(
'Undoing index replacement, settings %s with %s',
document._index._name, old_index_name,
)
document._index._name = old_index_name


@app.task(queue='web')
Expand Down
9 changes: 6 additions & 3 deletions readthedocs/search/utils.py
Expand Up @@ -89,7 +89,7 @@ def _get_index(indices, index_name):
:return: DED Index
"""
for index in indices:
if str(index) == index_name:
if index._name == index_name:
return index


Expand All @@ -116,7 +116,10 @@ def _indexing_helper(html_objs_qs, wipe=False):
else, html_objs are indexed.
"""
from readthedocs.search.documents import PageDocument
from readthedocs.search.tasks import index_objects_to_es, delete_objects_in_es
from readthedocs.search.tasks import (
delete_objects_in_es,
index_objects_to_es,
)

if html_objs_qs:
obj_ids = []
Expand Down Expand Up @@ -148,7 +151,7 @@ def _get_sorted_results(results, source_key='_source'):
source_key: hit._source.to_dict(),
'highlight': hit.highlight.to_dict() if hasattr(hit, 'highlight') else {}
}
for hit in sorted(results, key=attrgetter('_score'), reverse=True)
for hit in sorted(results, key=attrgetter('meta.score'), reverse=True)
]

return sorted_results
Expand Down
9 changes: 5 additions & 4 deletions readthedocs/settings/base.py
Expand Up @@ -581,16 +581,17 @@ def DOCKER_LIMITS(self):
ES_INDEXES = {
'project': {
'name': 'project_index',
'settings': {'number_of_shards': 1,
'number_of_replicas': 1
}
'settings': {
'number_of_shards': 1,
'number_of_replicas': 1
},
},
'page': {
'name': 'page_index',
'settings': {
'number_of_shards': 1,
'number_of_replicas': 1,
}
},
},
}

Expand Down
19 changes: 2 additions & 17 deletions requirements/pip.txt
Expand Up @@ -47,23 +47,8 @@ GitPython==3.1.8

# Search
elasticsearch==6.8.1 # pyup: <7.0.0


# elasticsearch-dsl==6.3.1 produces this error
# File "/home/travis/build/rtfd/readthedocs.org/.tox/py36/lib/python3.6/site-packages/django_elasticsearch_dsl/documents.py", line 8, in <module>
# from elasticsearch_dsl.document import DocTypeMeta as DSLDocTypeMeta
# ImportError: cannot import name 'DocTypeMeta'
#
# Commit 97e3f75 adds the NestedFacet
git+https://github.com/elastic/elasticsearch-dsl-py@97e3f756a8cacd1c863d3ced3d17abcafbb0f85e#egg=elasticsearch-dsl==6.1.1

# django-elasticsearch-dsl==6.4.1 produces this error
# File "/home/travis/build/readthedocs/readthedocs.org/.tox/py36/lib/python3.6/site-packages/django_elasticsearch_dsl/__init__.py", line 3, in <module>
# from .documents import DocType # noqa
# File "/home/travis/build/readthedocs/readthedocs.org/.tox/py36/lib/python3.6/site-packages/django_elasticsearch_dsl/documents.py", line 7, in <module>
# from elasticsearch_dsl import Document as DSLDocument
# ImportError: cannot import name 'Document'
django-elasticsearch-dsl==0.5.1 # pyup: ignore
elasticsearch-dsl==6.4.0 # pyup: <7.0
django-elasticsearch-dsl==6.4.2 # pyup: <7.0
selectolax==0.2.7

# NOTE: this dep can be removed in python 3.7 in favor of ``date.fromisoformat``
Expand Down

0 comments on commit 7f9778e

Please sign in to comment.