diff --git a/kuma/scrape/scraper.py b/kuma/scrape/scraper.py index d2b260659aa..86d590e8d61 100644 --- a/kuma/scrape/scraper.py +++ b/kuma/scrape/scraper.py @@ -9,8 +9,8 @@ from .sources import ( DocumentChildrenSource, DocumentCurrentSource, DocumentHistorySource, - DocumentMetaSource, DocumentRenderedSource, DocumentSource, LinksSource, - RevisionSource, Source, UserSource, ZoneRootSource) + DocumentMetaSource, DocumentRedirectSource, DocumentSource, LinksSource, + RevisionSource, Source, UserSource) from .storage import Storage logger = logging.getLogger('kuma.scraper') @@ -35,9 +35,9 @@ def session(self): self._session = requests.Session() return self._session - def request(self, path, raise_for_status=True): + def request(self, path, raise_for_status=True, method='GET'): url = self.base_url + path - logger.debug("GET %s", url) + logger.debug("%s %s", method, url) attempts = 0 response = None retry = True @@ -46,8 +46,9 @@ def request(self, path, raise_for_status=True): attempts += 1 err = None retry = False + request_function = getattr(self.session, method.lower()) try: - response = self.session.get(url, timeout=timeout) + response = request_function(url, timeout=timeout) except requests.exceptions.Timeout as err: logger.warn("Timeout on request %d for %s", attempts, url) time.sleep(timeout) @@ -90,11 +91,10 @@ class Scraper(object): 'document_current': DocumentCurrentSource, 'document_history': DocumentHistorySource, 'document_meta': DocumentMetaSource, - 'document_rendered': DocumentRenderedSource, + 'document_redirect': DocumentRedirectSource, 'links': LinksSource, 'revision': RevisionSource, 'user': UserSource, - 'zone_root': ZoneRootSource, } def __init__(self, host='developer.mozilla.org', ssl=True): diff --git a/kuma/scrape/sources/__init__.py b/kuma/scrape/sources/__init__.py index eb36afd1444..a330cd3e087 100644 --- a/kuma/scrape/sources/__init__.py +++ b/kuma/scrape/sources/__init__.py @@ -7,11 +7,10 @@ from .document_current import DocumentCurrentSource from .document_history import DocumentHistorySource from .document_meta import DocumentMetaSource -from .document_rendered import DocumentRenderedSource +from .document_redirect import DocumentRedirectSource from .links import LinksSource from .revision import RevisionSource from .user import UserSource -from .zone_root import ZoneRootSource __all__ = [ DocumentBaseSource, @@ -19,11 +18,10 @@ DocumentCurrentSource, DocumentHistorySource, DocumentMetaSource, - DocumentRenderedSource, + DocumentRedirectSource, DocumentSource, LinksSource, RevisionSource, Source, UserSource, - ZoneRootSource, ] diff --git a/kuma/scrape/sources/base.py b/kuma/scrape/sources/base.py index df506bd7bdf..1f43cd634ec 100644 --- a/kuma/scrape/sources/base.py +++ b/kuma/scrape/sources/base.py @@ -223,12 +223,7 @@ def __init__(self, path, **options): super(DocumentBaseSource, self).__init__(path, **options) if path != unquote(path): raise ValueError('URL-encoded path "%s"' % path) - try: - self.locale, self.slug = self.locale_and_slug(path) - except ValueError: - self.locale, self.slug, self.normalized_path = None, None, None - else: - self.normalized_path = path + self.locale, self.slug = self.locale_and_slug(path) def locale_and_slug(self, path): """Extract a document locale and slug from a path.""" diff --git a/kuma/scrape/sources/document.py b/kuma/scrape/sources/document.py index 3bb929dae26..61f7141942d 100644 --- a/kuma/scrape/sources/document.py +++ b/kuma/scrape/sources/document.py @@ -20,8 +20,7 @@ def load_and_validate_existing(self, storage): just_this_doc = (not self.translations and self.depth == 0 and - self.revisions == 1 and - self.normalized_path) + self.revisions == 1) if not self.force and just_this_doc: document = storage.get_document(self.locale, self.slug) if document: @@ -33,41 +32,20 @@ def load_prereqs(self, requester, storage): data = {'needs': []} # Load data, gathering further source needs - self.load_prereq_normalized_path(storage, data) - if self.normalized_path: - self.load_prereq_parent_topic(storage, data) - self.load_prereq_rendered(storage, data) - if data.get('has_rendered'): - self.load_prereq_redirect(storage, data) - if data.get('is_standard_page'): - self.load_prereq_metadata(storage, data) - self.load_prereq_english_parent(storage, data) - self.load_prereq_history(storage, data) - self.load_prereq_children(storage, data) + self.load_prereq_parent_topic(storage, data) + self.load_prereq_redirect_check(storage, data) + if data.get('has_redirect_check'): + self.load_prereq_redirect(storage, data) + if data.get('is_standard_page'): + self.load_prereq_metadata(storage, data) + self.load_prereq_english_parent(storage, data) + self.load_prereq_history(storage, data) + self.load_prereq_children(storage, data) return not data['needs'], data - def load_prereq_normalized_path(self, storage, data): - """Load zone data to normalize path, if needed.""" - if self.normalized_path: - return # Already normalized, done - - # Determine the standard path associated with the zone - zone_data = storage.get_zone_root(self.path) - if zone_data is None: - data['needs'].append(('zone_root', self.path, {})) - elif zone_data.get('errors'): - raise self.SourceError( - 'Unable to load zone root for %s', self.path) - else: - self.normalized_path = self.path.replace( - zone_data['zone_path'], zone_data['doc_path']) - self.locale, self.slug = self.locale_and_slug( - self.normalized_path) - def load_prereq_parent_topic(self, storage, data): """Load the parent topic, if a child page.""" - assert self.normalized_path if not self.parent_slug: return # No parent to load @@ -77,63 +55,37 @@ def load_prereq_parent_topic(self, storage, data): else: data['parent_topic'] = parent_topic - def load_prereq_rendered(self, storage, data): - """Load the rendered page, to detect redirects and zones.""" - assert self.normalized_path - rendered = storage.get_document_rendered(self.locale, self.slug) - if rendered is None: - data['needs'].append( - ('document_rendered', self.normalized_path, {})) + def load_prereq_redirect_check(self, storage, data): + """Check the URL for redirects.""" + redirect = storage.get_document_redirect(self.locale, self.slug) + if redirect is None: + data['needs'].append(('document_redirect', self.path, {})) else: - data['has_rendered'] = True - data['redirect_to'] = rendered.get('redirect_to') - data['is_zone_root'] = rendered.get('is_zone_root', False) - data['zone_css_slug'] = rendered.get('zone_css_slug', '') + data['has_redirect_check'] = True + data['redirect_to'] = redirect.get('redirect_to') def load_prereq_redirect(self, storage, data): - """Load the zone or standard redirect.""" - assert self.normalized_path - data['is_standard_page'] = data.get('has_rendered') + """Load the destination of a redirect.""" + data['is_standard_page'] = data.get('has_redirect_check') redirect_to = data.get('redirect_to') if not redirect_to: return # Not a redirect, don't follow - # Is it a zoned URL or a moved page? - try: - rd_locale, rd_slug = self.locale_and_slug(redirect_to) - except ValueError: - # Zoned URL - zone_redirect = storage.get_zone_root(redirect_to) - if zone_redirect is None: - data['needs'].append(('zone_root', redirect_to, {})) - elif zone_redirect.get('errors'): - raise self.SourceError('Unable to get zone_root "%s"', - redirect_to) - else: - data['zone_redirect_path'] = zone_redirect['zone_path'] - z_path = zone_redirect['doc_path'] - if z_path != self.path: - z_locale, z_slug = self.locale_and_slug(z_path) - zone_root_doc = storage.get_document(z_locale, z_slug) - if zone_root_doc is None: - data['needs'].append(('document', z_path, {})) - else: - # Moved Page - redirect = storage.get_document(rd_locale, rd_slug) - data['is_standard_page'] = False - if redirect is None: - data['needs'].append(('document', redirect_to, {})) + # Load the destination page + rd_locale, rd_slug = self.locale_and_slug(redirect_to) + redirect = storage.get_document(rd_locale, rd_slug) + data['is_standard_page'] = False + if redirect is None: + data['needs'].append(('document', redirect_to, {})) def load_prereq_metadata(self, storage, data): """Load the document metadata.""" - assert self.normalized_path meta = storage.get_document_metadata(self.locale, self.slug) if meta is None: - data['needs'].append(('document_meta', self.normalized_path, + data['needs'].append(('document_meta', self.path, self.current_options())) elif 'error' in meta: - raise self.SourceError('Error getting metadata for %s', - self.normalized_path) + raise self.SourceError('Error getting metadata for %s', self.path) elif meta: data['id'] = meta['id'] data['locale'] = meta['locale'] @@ -178,11 +130,11 @@ def load_prereq_history(self, storage, data): """Load the revision history.""" history = storage.get_document_history(self.locale, self.slug) if history is None: - data['needs'].append(('document_history', self.normalized_path, + data['needs'].append(('document_history', self.path, {"revisions": self.revisions})) elif len(history) == 0: raise self.SourceError('Empty history for document "%s"', - self.normalized_path) + self.path) def load_prereq_children(self, storage, data): """Load the document children.""" @@ -191,13 +143,12 @@ def load_prereq_children(self, storage, data): children = storage.get_document_children(self.locale, self.slug) if children is None: options = self.current_options() - data['needs'].append(('document_children', self.normalized_path, - options)) + data['needs'].append(('document_children', self.path, options)) def save_data(self, storage, data): """Save the document as a redirect or full document.""" redirect_to = data.get('redirect_to') - if redirect_to and not data.get('zone_redirect_path'): + if redirect_to: # Prepare data for a redirect document doc_data = { 'locale': self.locale, @@ -208,7 +159,6 @@ def save_data(self, storage, data): # Prepare data for a full document keys = ( 'id', - 'is_zone_root', 'locale', 'modified', 'parent', @@ -217,8 +167,6 @@ def save_data(self, storage, data): 'tags', 'title', 'uuid', - 'zone_css_slug', - 'zone_redirect_path', ) doc_data = {} for key in keys: @@ -235,5 +183,5 @@ def save_data(self, storage, data): doc_data['locale'], self.path) doc_data['locale'] = self.locale storage.save_document(doc_data) - return [('document_current', self.normalized_path, + return [('document_current', self.path, {'revisions': self.revisions})] diff --git a/kuma/scrape/sources/document_redirect.py b/kuma/scrape/sources/document_redirect.py new file mode 100644 index 00000000000..738f5413f3c --- /dev/null +++ b/kuma/scrape/sources/document_redirect.py @@ -0,0 +1,36 @@ +"""DocumentRedirectSource checks if a MDN wiki document is a redirect.""" +from __future__ import absolute_import, unicode_literals + +from django.utils.six.moves.urllib.parse import urlparse + +from .base import DocumentBaseSource + + +class DocumentRedirectSource(DocumentBaseSource): + """Request the rendered document, to detect redirects.""" + + def source_path(self): + return '/%s/docs/%s' % (self.locale, self.slug) + + def load_prereqs(self, requester, storage): + """Request the document, and process the redirects and response.""" + response = requester.request(self.source_path(), + raise_for_status=False, + method='HEAD') + if response.status_code not in (200, 301, 302): + raise self.SourceError('status_code %s', response.status_code) + data = {} + + # Is this a redirect? + if response.history: + redirect_from = urlparse(response.history[0].url).path + redirect_to = urlparse(response.url).path + if redirect_to != redirect_from: + data['redirect_to'] = self.decode_href(redirect_to) + + return True, data + + def save_data(self, storage, data): + """Save the rendered document data.""" + storage.save_document_redirect(self.locale, self.slug, data) + return [] diff --git a/kuma/scrape/sources/document_rendered.py b/kuma/scrape/sources/document_rendered.py deleted file mode 100644 index 8ed5c897181..00000000000 --- a/kuma/scrape/sources/document_rendered.py +++ /dev/null @@ -1,67 +0,0 @@ -"""DocumentRenderedSource requests MDN wiki documents.""" -from __future__ import absolute_import, unicode_literals - -import re - -from django.utils.six.moves.urllib.parse import urlparse -from pyquery import PyQuery as pq - -from .base import DocumentBaseSource - - -class DocumentRenderedSource(DocumentBaseSource): - """ - Request the rendered document. - - This is used to detect zones and redirects. - """ - - # Regular expression for custom zone CSS, like zone-firefox.css - re_custom_href = re.compile("""(?x) # Verbose RE mode - .* # Match anything - \/zone- # Match '/zone-' - (?P[^.]*) # Capture the slug - (\.[0-9a-fA-F]+)? # There may be a hash in the filename - \.css # Ends in .css - """) - - def source_path(self): - return '/%s/docs/%s' % (self.locale, self.slug) - - def load_prereqs(self, requester, storage): - """Request the document, and process the redirects and response.""" - response = requester.request(self.source_path(), - raise_for_status=False) - if response.status_code not in (200, 301, 302): - raise self.SourceError('status_code %s', response.status_code) - data = {} - - # Is this a redirect? - if response.history: - redirect_from = urlparse(response.history[0].url).path - redirect_to = urlparse(response.url).path - if redirect_to != redirect_from: - data['redirect_to'] = self.decode_href(redirect_to) - - # Is this a zone root? - parsed = pq(response.content) - body = parsed('body') - if body.has_class('zone-landing'): - data['is_zone_root'] = True - - # Find the zone stylesheet - links = parsed('head link') - for link in links: - rel = link.attrib.get('rel') - href = self.decode_href(link.attrib.get('href')) - if rel == 'stylesheet' and href: - match = self.re_custom_href.match(href) - if match: - data['zone_css_slug'] = match.group('slug') - - return True, data - - def save_data(self, storage, data): - """Save the rendered document data.""" - storage.save_document_rendered(self.locale, self.slug, data) - return [] diff --git a/kuma/scrape/sources/zone_root.py b/kuma/scrape/sources/zone_root.py deleted file mode 100644 index 82fcd2f6765..00000000000 --- a/kuma/scrape/sources/zone_root.py +++ /dev/null @@ -1,86 +0,0 @@ -"""ZoneRootSource determine zone URL redirects.""" -from __future__ import absolute_import, unicode_literals - -import logging -import re - -from django.utils.six.moves.urllib.parse import unquote - -from .base import Source - -logger = logging.getLogger('kuma.scraper') - - -class ZoneRootSource(Source): - """Gather data about the root of a DocumentZone.""" - PARAM_NAME = 'path' - re_path = re.compile(r"/(?P[^/]+)/(?P[^/]+)") - - def __init__(self, path, **options): - super(ZoneRootSource, self).__init__(path, **options) - if path != unquote(path): - raise ValueError('URL-encoded path "%s"' % path) - try: - self.locale, self.slug = self.locale_and_zone(path) - except ValueError as exception: - self.locale, self.slug = None, None - logger.warn(exception) - self.state = self.STATE_ERROR - - def locale_and_zone(self, path): - """Extract a document locale and zone subpath from a path.""" - match = self.re_path.match(path) - if match: - return match.groups() - else: - raise ValueError('Not a valid zoned document path "%s"' % path) - - def raise_if_errors(self, errors): - """Raises errors to terminate zone processing.""" - if errors: - raise self.SourceError('Bad JSON data for %s$json: %s', - self.path, ', '.join(errors)) - - def load_and_validate_existing(self, storage): - """Load existing zone root data.""" - data = storage.get_zone_root(self.path) - if data: - self.raise_if_errors(data.get('errors')) - return True, [('document', data['doc_path'], {})] - else: - return False, None - - def load_prereqs(self, requester, storage): - """Scrape JSON data for the zone root.""" - response = requester.request(self.path + "$json") - data = self.extract_data(response.json()) - return True, data - - def save_data(self, storage, data): - """Save zone root for future processing.""" - storage.save_zone_root(self.path, data) - self.raise_if_errors(data.get('errors')) - return [('document', data['doc_path'], {})] - - def extract_data(self, metadata): - """Extract zone root data from JSON.""" - err = [] - url = self.decode_href(metadata['url']) - if url == self.path: - err.append('url "%s" should be the non-zone path' % url) - if metadata['locale'] != self.locale: - err.append('locale "%s" should be the same as the path locale' % - metadata['locale']) - if err: - return { - 'errors': err, - 'doc_locale': self.locale, - 'metadata_locale': metadata['locale'], - 'metadata_url': url, - 'zone_path': self.path, - } - else: - return { - 'doc_path': url, - 'zone_path': self.path, - } diff --git a/kuma/scrape/storage.py b/kuma/scrape/storage.py index 357a6fd99d6..b3ca8483586 100644 --- a/kuma/scrape/storage.py +++ b/kuma/scrape/storage.py @@ -7,8 +7,8 @@ from kuma.users.models import User, UserBan from kuma.wiki.constants import REDIRECT_CONTENT -from kuma.wiki.models import (Document, DocumentTag, DocumentZone, - LocalizationTag, ReviewTag, Revision) +from kuma.wiki.models import (Document, DocumentTag, LocalizationTag, + ReviewTag, Revision) logger = logging.getLogger('kuma.scraper') @@ -21,9 +21,8 @@ def __init__(self): 'document_children': {}, 'document_history': {}, 'document_metadata': {}, - 'document_rendered': {}, + 'document_redirect': {}, 'revision_html': {}, - 'zone_root': {}, } def sorted_tags(self, tags): @@ -73,9 +72,6 @@ def save_document(self, data): doc_id = doc_data.pop('id', None) tags = doc_data.pop('tags', []) redirect_to = doc_data.pop('redirect_to', None) - zone_redirect_path = doc_data.pop('zone_redirect_path', None) - zone_css_slug = doc_data.pop('zone_css_slug', '') - is_zone_root = doc_data.pop('is_zone_root', False) attempt = 0 document = None @@ -109,17 +105,6 @@ def save_document(self, data): assert document is not None self.safe_add_tags(tags, DocumentTag, document.tags) - if is_zone_root: - try: - dz = DocumentZone.objects.get(document=document) - except DocumentZone.DoesNotExist: - dz = DocumentZone.objects.create(document=document) - dz.css_slug = zone_css_slug - if zone_redirect_path: - url_root = zone_redirect_path.split('/')[-1] - dz.url_root = url_root - dz.save() - Document.objects.filter(pk=document.pk).update(json=None) def get_document_metadata(self, locale, slug): @@ -134,11 +119,11 @@ def get_document_history(self, locale, slug): def save_document_history(self, locale, slug, data): self.local['document_history'][(locale, slug)] = data - def get_document_rendered(self, locale, slug): - return self.local['document_rendered'].get((locale, slug), None) + def get_document_redirect(self, locale, slug): + return self.local['document_redirect'].get((locale, slug), None) - def save_document_rendered(self, locale, slug, data): - self.local['document_rendered'][(locale, slug)] = data + def save_document_redirect(self, locale, slug, data): + self.local['document_redirect'][(locale, slug)] = data def get_document_children(self, locale, slug): return self.local['document_children'].get((locale, slug), None) @@ -233,9 +218,3 @@ def save_user(self, data): ban, ban_created = UserBan.objects.get_or_create( user=user, defaults={'by': user, 'reason': 'Ban detected by scraper'}) - - def get_zone_root(self, path): - return self.local['zone_root'].get(path, None) - - def save_zone_root(self, path, data): - self.local['zone_root'][path] = data diff --git a/kuma/scrape/tests/test_source_document.py b/kuma/scrape/tests/test_source_document.py index 912574b5518..bfdf9be900c 100644 --- a/kuma/scrape/tests/test_source_document.py +++ b/kuma/scrape/tests/test_source_document.py @@ -27,23 +27,21 @@ # The data passed to Storage.save_document for this metadata doc_data = { 'id': 100, - 'is_zone_root': False, 'locale': 'en-US', 'modified': datetime(2016, 11, 8, 15, 26, 23, 807948), 'slug': 'Test', 'tags': [], 'title': 'Test Title', 'uuid': 'f9f8e807-a98e-4106-867f-4e1c99cb7f2c', - 'zone_css_slug': '', } def test_gather_root_no_prereqs(): doc_path = '/en-US/docs/RootDoc' source = DocumentSource(doc_path) - storage = mock_storage(spec=['get_document', 'get_document_rendered']) + storage = mock_storage(spec=['get_document', 'get_document_redirect']) resources = source.gather(None, storage) - assert resources == [('document_rendered', doc_path, {})] + assert resources == [('document_redirect', doc_path, {})] assert source.state == source.STATE_PREREQ assert source.freshness == source.FRESH_UNKNOWN @@ -63,10 +61,10 @@ def test_gather_forced(): """Resources are fetched if force=True.""" doc_path = '/en-US/docs/RootDoc' source = DocumentSource(doc_path, force=True) - storage = mock_storage(spec=['get_document', 'get_document_rendered']) + storage = mock_storage(spec=['get_document', 'get_document_redirect']) storage.get_document.return_value = "existing document" resources = source.gather(None, storage) - assert resources == [('document_rendered', doc_path, {})] + assert resources == [('document_redirect', doc_path, {})] assert source.state == source.STATE_PREREQ assert source.freshness == source.FRESH_UNKNOWN @@ -76,11 +74,11 @@ def test_gather_child_doc(): parent_path = '/en-US/docs/Root' child_path = parent_path + '/Child' source = DocumentSource(child_path) - storage = mock_storage(spec=['get_document', 'get_document_rendered']) + storage = mock_storage(spec=['get_document', 'get_document_redirect']) resources = source.gather(None, storage) assert resources == [ ('document', parent_path, {}), - ('document_rendered', child_path, {})] + ('document_redirect', child_path, {})] assert source.state == source.STATE_PREREQ @@ -89,10 +87,10 @@ def test_gather_child_doc_parent_in_storage(): parent_path = '/en-US/docs/Root' child_path = parent_path + '/Child' source = DocumentSource(child_path, force=True) - storage = mock_storage(spec=['get_document', 'get_document_rendered']) + storage = mock_storage(spec=['get_document', 'get_document_redirect']) storage.get_document.return_value = 'parent document' resources = source.gather(None, storage) - assert resources == [('document_rendered', child_path, {})] + assert resources == [('document_redirect', child_path, {})] storage.get_document.assert_called_once_with('en-US', 'Root') assert source.state == source.STATE_PREREQ @@ -102,9 +100,9 @@ def test_gather_standard_doc(): path = '/en-US/docs/RootDoc' source = DocumentSource(path, force=True) storage = mock_storage(spec=[ - 'get_document_rendered', 'get_document_metadata', + 'get_document_redirect', 'get_document_metadata', 'get_document_history']) - storage.get_document_rendered.return_value = {} + storage.get_document_redirect.return_value = {} resources = source.gather(None, storage) assert resources == [ ('document_meta', path, {'force': True}), @@ -118,9 +116,9 @@ def test_gather_standard_doc_empty_history_is_error(): path = '/en-US/docs/RootDoc' source = DocumentSource(path, force=True) storage = mock_storage(spec=[ - 'get_document_rendered', 'get_document_metadata', + 'get_document_redirect', 'get_document_metadata', 'get_document_history']) - storage.get_document_rendered.return_value = {} # Standard doc + storage.get_document_redirect.return_value = {} # Standard doc storage.get_document_metadata.return_value = {} # Empty for now storage.get_document_history.return_value = [] # No history resources = source.gather(None, storage) @@ -132,9 +130,9 @@ def test_gather_standard_doc_all_prereqs(): path = '/en-US/docs/Test' source = DocumentSource(path, force=True) storage = mock_storage(spec=[ - 'get_document_rendered', 'get_document_metadata', + 'get_document_redirect', 'get_document_metadata', 'get_document_history', 'save_document']) - storage.get_document_rendered.return_value = {} # Standard doc + storage.get_document_redirect.return_value = {} # Standard doc storage.get_document_metadata.return_value = doc_metadata storage.get_document_history.return_value = [ ('revisions', path + '$revision/2016', {})] @@ -149,9 +147,9 @@ def test_gather_standard_doc_metdata_loses(): path = '/en-US/docs/Test' source = DocumentSource(path, force=True) storage = mock_storage(spec=[ - 'get_document_rendered', 'get_document_metadata', + 'get_document_redirect', 'get_document_metadata', 'get_document_history', 'save_document']) - storage.get_document_rendered.return_value = {} # Standard doc + storage.get_document_redirect.return_value = {} # Standard doc metadata = doc_metadata.copy() metadata['locale'] = 'EN-US' metadata['slug'] = 'TEST' @@ -169,9 +167,9 @@ def test_gather_standard_doc_bad_metadata(): path = '/en-US/docs/Test' source = DocumentSource(path, force=True) storage = mock_storage(spec=[ - 'get_document_rendered', 'get_document_metadata', + 'get_document_redirect', 'get_document_metadata', 'get_document_history']) - storage.get_document_rendered.return_value = {} # Standard doc + storage.get_document_redirect.return_value = {} # Standard doc metadata = doc_metadata.copy() metadata['error'] = True storage.get_document_metadata.return_value = metadata @@ -186,9 +184,9 @@ def test_gather_standard_doc_no_uuid(): path = '/en-US/docs/Test' source = DocumentSource(path, force=True) storage = mock_storage(spec=[ - 'get_document_rendered', 'get_document_metadata', + 'get_document_redirect', 'get_document_metadata', 'get_document_history', 'save_document']) - storage.get_document_rendered.return_value = {} # Standard doc + storage.get_document_redirect.return_value = {} # Standard doc metadata = doc_metadata.copy() del metadata['uuid'] storage.get_document_metadata.return_value = metadata @@ -203,53 +201,11 @@ def test_gather_standard_doc_no_uuid(): storage.save_document.assert_called_once_with(expected) -def test_gather_zoned_doc_init(): - """A zone URL requests the zone doc.""" - path = '/en-US/Zone' - source = DocumentSource(path, force=True) - storage = mock_storage(spec=['get_zone_root']) - resources = source.gather(None, storage) - assert resources == [('zone_root', path, {})] - assert source.state == source.STATE_PREREQ - - -def test_gather_zoned_doc_error(): - """If the zoned document fails (isn't a zone), then the doc errors too.""" - path = '/en-US/Zone' - source = DocumentSource(path, force=True) - storage = mock_storage(spec=['get_zone_root']) - storage.get_zone_root.return_value = {'errors': ['failed']} - resources = source.gather(None, storage) - assert resources == [] - assert source.state == source.STATE_ERROR - - -def test_gather_zoned_doc_is_normalized(): - """The zoned doc is used to normalize the URL.""" - path = '/en-US/Zone' - source = DocumentSource(path, force=True) - assert not source.normalized_path - assert not source.locale - assert not source.slug - storage = mock_storage(spec=[ - 'get_zone_root', 'get_document', 'get_document_rendered']) - storage.get_zone_root.return_value = { - 'zone_path': path, 'doc_path': '/en-US/docs/Root/Zone'} - resources = source.gather(None, storage) - assert resources == [ - ('document', '/en-US/docs/Root', {}), - ('document_rendered', '/en-US/docs/Root/Zone', {})] - assert source.state == source.STATE_PREREQ - assert source.normalized_path == '/en-US/docs/Root/Zone' - assert source.locale == 'en-US' - assert source.slug == 'Root/Zone' - - -def test_gather_normalized_path_moved_page_needed(): +def test_gather_redirect_moved_page_needed(): """If a document is a redirect, request the target page.""" source = DocumentSource('/en-US/docs/Origin', force=True) - storage = mock_storage(spec=['get_document', 'get_document_rendered']) - storage.get_document_rendered.return_value = { + storage = mock_storage(spec=['get_document', 'get_document_redirect']) + storage.get_document_redirect.return_value = { 'redirect_to': '/en-US/docs/NewLocation'} resources = source.gather(None, storage) assert resources == [ @@ -258,12 +214,12 @@ def test_gather_normalized_path_moved_page_needed(): assert source.state == source.STATE_PREREQ -def test_gather_normalized_path_moved_page_followed(): +def test_gather_redirect_moved_page_followed(): """If a document is a redirect to a normal page, create a redirect.""" source = DocumentSource('/en-US/docs/Origin', force=True) storage = mock_storage(spec=[ - 'get_document', 'get_document_rendered', 'save_document']) - storage.get_document_rendered.return_value = { + 'get_document', 'get_document_redirect', 'save_document']) + storage.get_document_redirect.return_value = { 'redirect_to': '/en-US/docs/NewLocation'} storage.get_document.return_value = "Redirect Document" resources = source.gather(None, storage) @@ -278,131 +234,14 @@ def test_gather_normalized_path_moved_page_followed(): storage.save_document.assert_called_once_with(expected_data) -def test_gather_redirect_to_zone_page_first_pass(): - """If a document is a redirect to a zone, request the zone root.""" - parent_path = '/en-US/docs/Root' - path = parent_path + '/Zone' - zone_path = '/en-US/Zone' - source = DocumentSource(path, force=True) - storage = mock_storage(spec=[ - 'get_document', 'get_document_rendered', 'get_zone_root', - 'get_document_metadata', 'get_document_history']) - storage.get_document_rendered.return_value = {'redirect_to': zone_path} - resources = source.gather(None, storage) - assert resources == [ - ('document', parent_path, {}), - ('zone_root', zone_path, {}), - ('document_meta', path, {'force': True}), - ('document_history', path, {'revisions': 1})] - assert source.state == source.STATE_PREREQ - - -def test_gather_redirect_to_errored_zone_page_is_error(): - """If a document is a redirect to an errored zone, doc is also errored.""" - parent_path = '/en-US/docs/Root' - path = parent_path + '/Zone' - zone_path = '/en-US/Zone' - source = DocumentSource(path, force=True) - storage = mock_storage(spec=[ - 'get_document', 'get_document_rendered', 'get_zone_root', - 'get_document_metadata', 'get_document_history']) - storage.get_document_rendered.return_value = {'redirect_to': zone_path} - storage.get_zone_root.return_value = {'errors': 'bad zone'} - source.gather(None, storage) - assert source.state == source.STATE_ERROR - - -def test_gather_redirect_to_zone_page_complete(): - """A zoned document has more data passed to storage.save_document()""" - parent_path = '/en-US/docs/Root' - path = parent_path + '/Zone' - zone_path = '/en-US/Zone' - source = DocumentSource(path, force=True) - storage = mock_storage(spec=[ - 'get_document', 'get_document_rendered', 'get_zone_root', - 'get_document_metadata', 'get_document_history', 'save_document']) - storage.get_document.return_value = 'Root doc' - storage.get_document_rendered.return_value = {'redirect_to': zone_path} - storage.get_zone_root.return_value = { - 'doc_path': path, - 'zone_path': zone_path} - metadata = doc_metadata.copy() - storage.get_document_metadata.return_value = metadata - storage.get_document_history.return_value = [ - ('revisions', path + '$revision/2017', {})] - resources = source.gather(None, storage) - assert resources == [('document_current', path, {'revisions': 1})] - assert source.state == source.STATE_DONE - expected = doc_data.copy() - expected['slug'] = 'Root/Zone' - expected['parent_topic'] = 'Root doc' - expected['zone_redirect_path'] = zone_path - assert storage.save_document.call_count == 1 - assert storage.save_document.call_args[0][0] == expected # Better diff - storage.save_document.assert_called_once_with(expected) - - -def test_gather_redirect_to_zone_subpage(): - """If a document is a redirect to zone subpage, request the zone root.""" - parent_path = '/en-US/docs/Root/Zone' - path = parent_path + '/Child' - zone_root_path = '/en-US/Zone' - zone_path = zone_root_path + '/Child' - source = DocumentSource(path, force=True) - storage = mock_storage(spec=[ - 'get_document', 'get_document_rendered', 'get_zone_root', - 'get_document_metadata', 'get_document_history', 'save_document']) - storage.get_document_rendered.return_value = {'redirect_to': zone_path} - storage.get_zone_root.return_value = { - 'doc_path': parent_path, 'zone_path': zone_root_path} - resources = source.gather(None, storage) - assert resources == [ - ('document', parent_path, {}), # Parerent of current page - ('document', parent_path, {}), # zone_path from zone root - ('document_meta', path, {'force': True}), - ('document_history', path, {'revisions': 1}), - ] - assert source.state == source.STATE_PREREQ - - -def test_gather_redirect_to_zone_subpage_complete(): - """A zoned subpage has more data passed to storage.save_document()""" - parent_path = '/en-US/docs/Root/Zone' - path = parent_path + '/Child' - zone_root_path = '/en-US/Zone' - zone_path = zone_root_path + '/Child' - source = DocumentSource(path, force=True) - storage = mock_storage(spec=[ - 'get_document', 'get_document_rendered', 'get_zone_root', - 'get_document_metadata', 'get_document_history', 'save_document']) - storage.get_document.return_value = 'Root doc' - storage.get_document_rendered.return_value = {'redirect_to': zone_path} - storage.get_zone_root.return_value = { - 'doc_path': parent_path, 'zone_path': '/en-US/Zone'} - metadata = doc_metadata.copy() - storage.get_document_metadata.return_value = metadata - storage.get_document_history.return_value = [ - ('revisions', path + '$revision/2018', {})] - resources = source.gather(None, storage) - assert resources == [('document_current', path, {'revisions': 1})] - assert source.state == source.STATE_DONE - expected = doc_data.copy() - expected['slug'] = 'Root/Zone/Child' - expected['parent_topic'] = 'Root doc' - expected['zone_redirect_path'] = zone_root_path - assert storage.save_document.call_count == 1 - assert storage.save_document.call_args[0][0] == expected # Better diff - storage.save_document.assert_called_once_with(expected) - - def test_gather_localized_doc_without_metadata(): """A localized document will wait for metadata.""" path = '/fr/docs/Racine' source = DocumentSource(path, force=True) storage = mock_storage(spec=[ - 'get_document_rendered', 'get_document_metadata', + 'get_document_redirect', 'get_document_metadata', 'get_document_history', 'save_document']) - storage.get_document_rendered.return_value = {} + storage.get_document_redirect.return_value = {} storage.get_document_history.return_value = [ ('revisions', path + '$revision/2020', {})] resources = source.gather(None, storage) @@ -415,9 +254,9 @@ def test_gather_localized_doc_with_metadata(): path = '/fr/docs/Racine' source = DocumentSource(path, force=True) storage = mock_storage(spec=[ - 'get_document', 'get_document_rendered', 'get_document_metadata', + 'get_document', 'get_document_redirect', 'get_document_metadata', 'get_document_history', 'save_document']) - storage.get_document_rendered.return_value = {} + storage.get_document_redirect.return_value = {} metadata = doc_metadata.copy() metadata['locale'] = 'fr' metadata['slug'] = 'Racine' @@ -439,9 +278,9 @@ def test_gather_localized_doc_invalid_english(): path = '/fr/docs/Racine' source = DocumentSource(path, force=True) storage = mock_storage(spec=[ - 'get_document_rendered', 'get_document_metadata', + 'get_document_redirect', 'get_document_metadata', 'get_document_history']) - storage.get_document_rendered.return_value = {} + storage.get_document_redirect.return_value = {} metadata = doc_metadata.copy() metadata['locale'] = 'fr' metadata['slug'] = 'Racine' @@ -462,9 +301,9 @@ def test_gather_localized_doc_sets_parent(): path = '/fr/docs/Racine' source = DocumentSource(path, force=True) storage = mock_storage(spec=[ - 'get_document', 'get_document_rendered', 'get_document_metadata', + 'get_document', 'get_document_redirect', 'get_document_metadata', 'get_document_history', 'save_document']) - storage.get_document_rendered.return_value = {} + storage.get_document_redirect.return_value = {} metadata = doc_metadata.copy() metadata['locale'] = 'fr' metadata['slug'] = 'Racine' @@ -493,9 +332,9 @@ def test_gather_document_children(): doc_path = '/en-US/docs/RootDoc' source = DocumentSource(doc_path, depth=1, force=True) storage = mock_storage(spec=[ - 'get_document_rendered', 'get_document_metadata', + 'get_document_redirect', 'get_document_metadata', 'get_document_history', 'get_document_children', 'save_document']) - storage.get_document_rendered.return_value = {} # Standard doc + storage.get_document_redirect.return_value = {} # Standard doc storage.get_document_metadata.return_value = doc_metadata storage.get_document_history.return_value = [ ('revisions', doc_path + '$revision/2016', {})] @@ -511,9 +350,9 @@ def test_gather_document_children_loaded(): doc_path = '/en-US/docs/RootDoc' source = DocumentSource(doc_path, depth='all', force=True) storage = mock_storage(spec=[ - 'get_document_rendered', 'get_document_metadata', + 'get_document_redirect', 'get_document_metadata', 'get_document_history', 'get_document_children', 'save_document']) - storage.get_document_rendered.return_value = {} # Standard doc + storage.get_document_redirect.return_value = {} # Standard doc storage.get_document_metadata.return_value = doc_metadata storage.get_document_history.return_value = [ ('revisions', doc_path + '$revision/2016', {})] diff --git a/kuma/scrape/tests/test_source_document_base.py b/kuma/scrape/tests/test_source_document_base.py index 9d77b359705..86e0c3dcdd6 100644 --- a/kuma/scrape/tests/test_source_document_base.py +++ b/kuma/scrape/tests/test_source_document_base.py @@ -10,7 +10,6 @@ def test_top_level_doc(): assert source.path == '/locale/docs/slug' assert source.locale == 'locale' assert source.slug == 'slug' - assert source.normalized_path == '/locale/docs/slug' assert source.parent_slug is None assert source.parent_path is None @@ -21,22 +20,10 @@ def test_child_doc(): assert source.path == '/locale/docs/parent/child' assert source.locale == 'locale' assert source.slug == 'parent/child' - assert source.normalized_path == '/locale/docs/parent/child' assert source.parent_slug == 'parent' assert source.parent_path == '/locale/docs/parent' -def test_zone_doc(): - """A DocumentBaseSource with a zone slug starts un-normalized.""" - source = DocumentBaseSource('/locale/zone') - assert source.path == '/locale/zone' - assert source.locale is None - assert source.slug is None - assert source.normalized_path is None - assert source.parent_slug is None - assert source.parent_path is None - - def test_url_escaped_raises(): """Initializing with a URL-encoded path raises an exception.""" with pytest.raises(ValueError): diff --git a/kuma/scrape/tests/test_source_document_redirect.py b/kuma/scrape/tests/test_source_document_redirect.py new file mode 100644 index 00000000000..2552032c964 --- /dev/null +++ b/kuma/scrape/tests/test_source_document_redirect.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +"""Tests for the DocumentRedirectSource class (HEAD document).""" +from __future__ import unicode_literals + +from . import mock_requester, mock_storage +from ..sources import DocumentRedirectSource + + +def test_root_doc(root_doc, client): + """Test a page without redirects.""" + url = root_doc.get_absolute_url() + source = DocumentRedirectSource(url) + requester = mock_requester() + storage = mock_storage(spec=['save_document_redirect']) + resources = source.gather(requester, storage) + assert resources == [] + assert source.state == source.STATE_DONE + storage.save_document_redirect.assert_called_once_with( + 'en-US', 'Root', {}) + + +def test_redirect_no_path_change(root_doc, client): + """ + Test a page with a redirect that doesn't change the path. + + For example, a page might redirect from http:// to https://. + """ + url = root_doc.get_absolute_url() + source = DocumentRedirectSource(url) + requester = mock_requester( + response_spec=['content', 'history', 'status_code', 'url'], + history=[(301, url)], + final_path=url) + storage = mock_storage(spec=['save_document_redirect']) + resources = source.gather(requester, storage) + assert resources == [] + assert source.state == source.STATE_DONE + storage.save_document_redirect.assert_called_once_with( + 'en-US', 'Root', {}) + + +def test_redirect(root_doc, client): + """Test a page with a redirect.""" + final_path = root_doc.get_absolute_url() + url = final_path.replace(root_doc.slug, 'Redirect') + source = DocumentRedirectSource(url) + requester = mock_requester( + response_spec=['content', 'history', 'status_code', 'url'], + history=[(301, url)], + final_path=final_path) + storage = mock_storage(spec=['save_document_redirect']) + resources = source.gather(requester, storage) + assert resources == [] + assert source.state == source.STATE_DONE + storage.save_document_redirect.assert_called_once_with( + 'en-US', 'Redirect', {'redirect_to': final_path}) + + +def test_missing_doc(client): + """ + A missing document results in an error. + + One cause: translations are requested, and a recently deleted + translation is in the metadata. + """ + source = DocumentRedirectSource('/en-US/docs/missing') + requester = mock_requester(status_code=404) + storage = mock_storage() + resources = source.gather(requester, storage) + assert resources == [] + assert source.state == source.STATE_ERROR diff --git a/kuma/scrape/tests/test_source_document_rendered.py b/kuma/scrape/tests/test_source_document_rendered.py deleted file mode 100644 index 94518ad3984..00000000000 --- a/kuma/scrape/tests/test_source_document_rendered.py +++ /dev/null @@ -1,141 +0,0 @@ -# -*- coding: utf-8 -*- -"""Tests for the DocumentRenderedSource class (GET document).""" -from __future__ import unicode_literals - -from datetime import datetime - -import pytest - -from kuma.wiki.models import Document, DocumentZone, Revision - -from . import mock_requester, mock_storage -from ..sources import DocumentRenderedSource - - -@pytest.fixture -def zone_root_doc(root_doc, settings): - """A Document record with a DocumentZone with style and a redirect.""" - settings.PIPELINE_CSS['zone-special'] = { - 'output_filename': 'build/styles/zone-special.css'} - doc = Document.objects.create( - locale='en-US', - slug=root_doc.slug + '/Zone', - parent_topic=root_doc) - DocumentZone.objects.create( - document=doc, - url_root='Zone', - css_slug='special') - revision = Revision.objects.create( - document=doc, - creator=root_doc.current_revision.creator, - content='

This is the Zone.

', - created=datetime(2016, 12, 14)) - assert doc.current_revision == revision - doc.rendered_html = doc.current_revision.content - doc.save() - return doc - - -@pytest.fixture -def zone_child_doc(zone_root_doc): - """A Document record that is below the zone root.""" - doc = Document.objects.create( - locale='en-US', - slug=zone_root_doc.slug + '/Child', - parent_topic=zone_root_doc) - creator = zone_root_doc.current_revision.creator - Revision.objects.create( - content='

A zone subpage.

', - creator=creator, - document=doc) - return doc - - -def test_root_doc(root_doc, client): - """Test a page without redirects.""" - url = root_doc.get_absolute_url() - html = client.get(url).content - source = DocumentRenderedSource(url) - requester = mock_requester(content=html) - storage = mock_storage(spec=['save_document_rendered']) - resources = source.gather(requester, storage) - assert resources == [] - assert source.state == source.STATE_DONE - storage.save_document_rendered.assert_called_once_with( - 'en-US', 'Root', {}) - - -def test_non_zone_redirect(root_doc, client): - """ - Test a page with non-zone redirects. - - For example, a page might redirect from http:// to https:// without - changing the path. - """ - url = root_doc.get_absolute_url() - html = client.get(url).content - source = DocumentRenderedSource(url) - requester = mock_requester( - response_spec=['content', 'history', 'status_code', 'url'], - history=[(301, url)], - final_path=url, - content=html) - storage = mock_storage(spec=['save_document_rendered']) - resources = source.gather(requester, storage) - assert resources == [] - assert source.state == source.STATE_DONE - storage.save_document_rendered.assert_called_once_with( - 'en-US', 'Root', {}) - - -def test_zone_root_doc(zone_root_doc, client): - """The zone_css_slug is extracted from zone roots.""" - url = zone_root_doc.get_absolute_url() - html = client.get(url, follow=True).content - source = DocumentRenderedSource(url) - requester = mock_requester( - response_spec=['content', 'history', 'status_code', 'url'], - history=[(302, url)], - final_path=zone_root_doc.zone.url_root, - content=html) - storage = mock_storage(spec=['save_document_rendered']) - resources = source.gather(requester, storage) - assert resources == [] - assert source.state == source.STATE_DONE - context = {'redirect_to': 'Zone'} - storage.save_document_rendered.assert_called_once_with( - 'en-US', 'Root/Zone', context) - - -def test_zone_child_doc(zone_root_doc, zone_child_doc, client): - """The zone_css_slug is not extracted from zone children.""" - url = zone_child_doc.get_absolute_url() - html = client.get(url, follow=True).content - source = DocumentRenderedSource(url) - requester = mock_requester( - response_spec=['content', 'history', 'status_code', 'url'], - history=[(302, url)], - final_path=zone_root_doc.zone.url_root, - content=html) - storage = mock_storage(spec=['save_document_rendered']) - resources = source.gather(requester, storage) - assert resources == [] - assert source.state == source.STATE_DONE - context = {'redirect_to': 'Zone'} - storage.save_document_rendered.assert_called_once_with( - 'en-US', 'Root/Zone/Child', context) - - -def test_missing_doc(client): - """ - A missing document results in an error. - - One cause: translations are requested, and a recently deleted - translation is in the metadata. - """ - source = DocumentRenderedSource('/en-US/docs/missing') - requester = mock_requester(status_code=404) - storage = mock_storage() - resources = source.gather(requester, storage) - assert resources == [] - assert source.state == source.STATE_ERROR diff --git a/kuma/scrape/tests/test_source_zone_root.py b/kuma/scrape/tests/test_source_zone_root.py deleted file mode 100644 index ad9248243eb..00000000000 --- a/kuma/scrape/tests/test_source_zone_root.py +++ /dev/null @@ -1,105 +0,0 @@ -# -*- coding: utf-8 -*- -"""Tests for the ZoneRootSource class (zone redirect URL).""" -from __future__ import unicode_literals - -import pytest - -from . import mock_requester, mock_storage -from ..sources import ZoneRootSource - - -def test_escaped_url(): - """Detect URL-encoded paths and fail at init.""" - with pytest.raises(ValueError) as error: - ZoneRootSource('/tr/docs/%C3%96%C4%9Fren/CSS') - expected_message = 'URL-encoded path "/tr/docs/%C3%96%C4%9Fren/CSS"' - assert str(error.value) == expected_message - - -def test_invalid_url(): - """Detect invalid paths and fail at init.""" - source = ZoneRootSource('/en-US') - assert source.state == source.STATE_ERROR - - -def test_gather(): - """Zone root data can be gathered from metadata on the first pass.""" - metadata = { - 'url': '/en-US/docs/Root/Zone', - 'locale': 'en-US' - } - source = ZoneRootSource('/en-US/Zone') - requester = mock_requester(response_spec=['json'], json=metadata) - storage = mock_storage(spec=['get_zone_root', 'save_zone_root']) - resources = source.gather(requester, storage) - assert resources == [('document', '/en-US/docs/Root/Zone', {})] - assert source.state == source.STATE_DONE - assert source.freshness == source.FRESH_YES - data = { - 'doc_path': '/en-US/docs/Root/Zone', - 'zone_path': '/en-US/Zone', - } - storage.save_zone_root.assert_called_once_with('/en-US/Zone', data) - - -def test_gather_when_stored(): - """Previously stored zone root data prevents scraping.""" - data = { - 'doc_path': '/en-US/docs/Root/Zone', - 'zone_path': '/en-US/Zone', - } - source = ZoneRootSource('/en-US/Zone') - requester = mock_requester(requester_spec=[]) - storage = mock_storage(spec=['get_zone_root']) - storage.get_zone_root.return_value = data - resources = source.gather(requester, storage) - assert resources == [('document', '/en-US/docs/Root/Zone', {})] - assert source.state == source.STATE_DONE - assert source.freshness == source.FRESH_NO - - -def test_gather_notzone_is_error(): - """Passing a non-zone URL is detected when processing metadata.""" - metadata = { - 'url': '/en-US/docs/Root/Zone', - 'locale': 'en-US' - } - source = ZoneRootSource('/en-US/docs/Root/Zone') - requester = mock_requester(response_spec=['json'], json=metadata) - storage = mock_storage(spec=['get_zone_root', 'save_zone_root']) - resources = source.gather(requester, storage) - assert resources == [] - assert source.state == source.STATE_ERROR - assert source.freshness == source.FRESH_YES - expected = { - 'errors': ['url "/en-US/docs/Root/Zone" should be the non-zone path'], - 'doc_locale': 'en-US', - 'metadata_locale': 'en-US', - 'metadata_url': '/en-US/docs/Root/Zone', - 'zone_path': '/en-US/docs/Root/Zone', - } - storage.save_zone_root.assert_called_once_with( - '/en-US/docs/Root/Zone', expected) - - -def test_extract_locale_mismatch_is_error(): - """ - If the metadata locale doesn't match the URL, it is an error. - - This appears to be common on zoned URLs with only one translation, - and requires reseting the stored JSON data. - """ - source = ZoneRootSource('/en-US/Zone') - metadata = { - 'url': '/es/docs/Root/Zone', - 'locale': 'es' - } - data = source.extract_data(metadata) - expected = { - 'errors': ['locale "es" should be the same as the path locale'], - 'doc_locale': 'en-US', - 'metadata_locale': 'es', - 'metadata_url': '/es/docs/Root/Zone', - 'zone_path': '/en-US/Zone', - } - assert data == expected diff --git a/kuma/scrape/tests/test_storage.py b/kuma/scrape/tests/test_storage.py index 5dda219c1ba..6b646b09c57 100644 --- a/kuma/scrape/tests/test_storage.py +++ b/kuma/scrape/tests/test_storage.py @@ -8,7 +8,7 @@ from taggit.models import Tag from kuma.wiki.constants import REDIRECT_CONTENT -from kuma.wiki.models import Document, DocumentTag, DocumentZone, Revision +from kuma.wiki.models import Document, DocumentTag, Revision from ..storage import Storage @@ -18,11 +18,10 @@ ('document_children', ('locale', 'slug')), ('document_metadata', ('locale', 'slug')), ('document_history', ('locale', 'slug')), - ('document_rendered', ('locale', 'slug')), + ('document_redirect', ('locale', 'slug')), ('revision_html', ('path',)), - ('zone_root', ('path',)), ), ids=['document_children', 'document_metadata', 'document_history', - 'document_rendered', 'revision_html', 'zone_root']) + 'document_redirect', 'revision_html']) def test_local_storage(data_name, param_list): """Local storage objects are None when unset, return the saved value.""" storage = Storage() @@ -200,54 +199,6 @@ def ca_weirdness(**data): assert ca_doc.parent == en_doc -def test_save_document_create_zone_with_redirect(simple_doc): - """A document with a vanity URL creates the associated DocumentZone.""" - data = { - 'parent_topic': simple_doc, - 'locale': 'en-US', - 'slug': 'Root/Zone', - 'zone_redirect_path': '/en-US/Zone', - 'is_zone_root': True, - 'zone_css_slug': 'other-slug', - } - Storage().save_document(data) - doc = Document.objects.get(locale='en-US', slug='Root/Zone') - assert doc.zone - assert doc.zone.css_slug == data['zone_css_slug'] - assert doc.zone.url_root == 'Zone' - - -def test_save_document_create_simple_zone(simple_doc): - """A document with a plain zone creates the associated DocumentZone.""" - data = { - 'parent_topic': simple_doc, - 'locale': 'en-US', - 'slug': 'Root/Zone', - 'is_zone_root': True, - } - Storage().save_document(data) - doc = Document.objects.get(locale='en-US', slug='Root/Zone') - assert doc.zone - assert doc.zone.css_slug == '' - assert doc.zone.url_root is None - - -def test_save_document_zone_child(simple_doc): - """A zone child document does not create a DocumentZone.""" - data = { - 'parent_topic': simple_doc, - 'locale': 'en-US', - 'slug': 'Root/ZoneChild', - 'is_zone_root': False, - 'zone_redirect_path': '/en-US/Root/ZoneChild', - 'zone_css_slug': 'other-slug', - } - Storage().save_document(data) - doc = Document.objects.get(locale='en-US', slug='Root/ZoneChild') - with pytest.raises(DocumentZone.DoesNotExist): - doc.zone - - def test_get_revision_existing(root_doc): stored = Storage().get_revision(root_doc.current_revision_id) assert stored == root_doc.current_revision