diff --git a/kuma/scrape/sources/base.py b/kuma/scrape/sources/base.py index 1f43cd634ec..7e9e25bc952 100644 --- a/kuma/scrape/sources/base.py +++ b/kuma/scrape/sources/base.py @@ -223,7 +223,10 @@ def __init__(self, path, **options): super(DocumentBaseSource, self).__init__(path, **options) if path != unquote(path): raise ValueError('URL-encoded path "%s"' % path) - self.locale, self.slug = self.locale_and_slug(path) + try: + self.locale, self.slug = self.locale_and_slug(path) + except ValueError: + self.locale, self.slug = None, None def locale_and_slug(self, path): """Extract a document locale and slug from a path.""" diff --git a/kuma/scrape/sources/document.py b/kuma/scrape/sources/document.py index 61f7141942d..511538798ce 100644 --- a/kuma/scrape/sources/document.py +++ b/kuma/scrape/sources/document.py @@ -17,11 +17,10 @@ class DocumentSource(DocumentBaseSource): def load_and_validate_existing(self, storage): """Load the document from storage in simple cases.""" - just_this_doc = (not self.translations and self.depth == 0 and self.revisions == 1) - if not self.force and just_this_doc: + if not self.force and just_this_doc and self.locale and self.slug: document = storage.get_document(self.locale, self.slug) if document: return True, [] @@ -31,6 +30,9 @@ def load_prereqs(self, requester, storage): """Load the data needed for a document.""" data = {'needs': []} + if self.locale is None and self.slug is None: + raise self.SourceError('Not a document path "%s"', self.path) + # Load data, gathering further source needs self.load_prereq_parent_topic(storage, data) self.load_prereq_redirect_check(storage, data) diff --git a/kuma/scrape/tests/test_source_document.py b/kuma/scrape/tests/test_source_document.py index bfdf9be900c..424f2364ad4 100644 --- a/kuma/scrape/tests/test_source_document.py +++ b/kuma/scrape/tests/test_source_document.py @@ -126,6 +126,17 @@ def test_gather_standard_doc_empty_history_is_error(): assert source.state == source.STATE_ERROR +def test_gather_document_zone_url_is_error(): + """Old vanity zone URLs are not loaded.""" + doc_path = "/en-US/Firefox/Releases/22" + source = DocumentSource(doc_path) + storage = mock_storage(spec=[]) # Storage is skipped + resources = source.gather(None, storage) + assert resources == [] + assert source.state == source.STATE_ERROR + assert source.freshness == source.FRESH_UNKNOWN + + def test_gather_standard_doc_all_prereqs(): path = '/en-US/docs/Test' source = DocumentSource(path, force=True)