Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes on 404, robots, and sitemap #6798

Merged
merged 5 commits into from Mar 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
45 changes: 44 additions & 1 deletion readthedocs/proxito/tests/test_full.py
@@ -1,9 +1,9 @@
# Copied from .org

import os
from unittest import mock

import django_dynamic_fixture as fixture
from unittest import mock
from django.conf import settings
from django.http import HttpResponse
from django.test.utils import override_settings
Expand Down Expand Up @@ -232,6 +232,16 @@ def test_default_robots_txt(self, storage_mock):
b'User-agent: *\nAllow: /\nSitemap: https://project.readthedocs.io/sitemap.xml\n'
)

@mock.patch('readthedocs.proxito.views.serve.get_storage_class')
def test_default_robots_txt_private_version(self, storage_mock):
storage_mock()().exists.return_value = False
self.project.versions.update(active=True, built=True, privacy_level=constants.PRIVATE)
response = self.client.get(
reverse('robots_txt'),
HTTP_HOST='project.readthedocs.io',
)
self.assertEqual(response.status_code, 404)

def test_custom_robots_txt(self):
self.project.versions.update(active=True, built=True)
response = self.client.get(
Expand All @@ -242,6 +252,14 @@ def test_custom_robots_txt(self):
response['x-accel-redirect'], '/proxito/media/html/project/latest/robots.txt',
)

def test_custom_robots_txt_private_version(self):
self.project.versions.update(active=True, built=True, privacy_level=constants.PRIVATE)
response = self.client.get(
reverse('robots_txt'),
HTTP_HOST='project.readthedocs.io',
)
self.assertEqual(response.status_code, 404)

def test_directory_indexes(self):
self.project.versions.update(active=True, built=True)
# Confirm we've serving from storage for the `index-exists/index.html` file
Expand Down Expand Up @@ -303,6 +321,14 @@ def test_directory_indexes_get_args(self):
@mock.patch('readthedocs.proxito.views.serve.get_storage_class')
def test_404_storage_serves_404(self, storage_mock):
self.project.versions.update(active=True, built=True)
fancy_version = fixture.get(
Version,
slug='fancy-version',
privacy_level=constants.PUBLIC,
active=True,
built=True,
project=self.project,
)

storage_mock()().exists.side_effect = [False, False, True]
response = self.client.get(
Expand All @@ -323,6 +349,15 @@ def test_404_storage_serves_404(self, storage_mock):
@mock.patch('readthedocs.proxito.views.serve.get_storage_class')
def test_404_storage_paths_checked(self, storage_mock):
self.project.versions.update(active=True, built=True)
fancy_version = fixture.get(
Version,
slug='fancy-version',
privacy_level=constants.PUBLIC,
active=True,
built=True,
project=self.project,
)

storage_mock()().exists.return_value = False
self.client.get(
reverse('proxito_404_handler', kwargs={'proxito_path': '/en/fancy-version/not-found'}),
Expand Down Expand Up @@ -466,3 +501,11 @@ def test_sitemap_xml(self):
),)
self.assertEqual(response.context['versions'][1]['priority'], 0.9)
self.assertEqual(response.context['versions'][1]['changefreq'], 'daily')

def test_sitemap_all_private_versions(self):
self.project.versions.update(active=True, built=True, privacy_level=constants.PRIVATE)
response = self.client.get(
reverse('sitemap_xml'),
HTTP_HOST='project.readthedocs.io',
)
self.assertEqual(response.status_code, 404)
46 changes: 21 additions & 25 deletions readthedocs/proxito/views/serve.py
Expand Up @@ -14,19 +14,17 @@
from django.views import View
from django.views.decorators.cache import cache_page

from readthedocs.builds.constants import LATEST, STABLE, EXTERNAL
from readthedocs.builds.constants import EXTERNAL, LATEST, STABLE
from readthedocs.builds.models import Version
from readthedocs.core.utils.extend import SettingsOverrideObject
from readthedocs.projects import constants
from readthedocs.projects.templatetags.projects_tags import sort_version_aware
from readthedocs.redirects.exceptions import InfiniteRedirectException

from .mixins import ServeDocsMixin, ServeRedirectMixin

from .decorators import map_project_slug
from .mixins import ServeDocsMixin, ServeRedirectMixin
from .utils import _get_project_data_from_request


log = logging.getLogger(__name__) # noqa


Expand Down Expand Up @@ -266,14 +264,17 @@ def get(self, request, proxito_path, template_name='404.html'):
if default_version_slug != version_slug:
versions.append(default_version_slug)
for version_slug_404 in versions:
if not self.allowed_user(request, final_project, version_slug_404):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm. This is going to add even more queries to our slow 404's. I'm a bit worried about that. I guess it won't cause a query on the .org, which is where i'm most worried about it, so it should be fine.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, on .org that function just returns True, it doesn't query the db.

continue

storage_root_path = final_project.get_storage_path(
type_='html',
version_slug=version_slug_404,
include_file=False,
version_type=self.version_type,
)
for tryfile in ('404.html', '404/index.html'):
storage_root_path = final_project.get_storage_path(
type_='html',
version_slug=version_slug_404,
include_file=False,
version_type=self.version_type,
)
storage_filename_path = os.path.join(storage_root_path, tryfile)
storage_filename_path = f'{storage_root_path}/{tryfile}'
if storage.exists(storage_filename_path):
log.info(
'Serving custom 404.html page: [project: %s] [version: %s]',
Expand Down Expand Up @@ -307,9 +308,7 @@ def get(self, request, project):
version = project.versions.get(slug=version_slug)

no_serve_robots_txt = any([
# If project is private or,
project.privacy_level == constants.PRIVATE,
# default version is private or,
# If the default version is private or,
version.privacy_level == constants.PRIVATE,
# default version is not active or,
not version.active,
Expand All @@ -327,7 +326,7 @@ def get(self, request, project):
include_file=False,
version_type=self.version_type,
)
path = os.path.join(storage_path, 'robots.txt')
path = f'{storage_path}/robots.txt'
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We shouldn't use path.join for storage paths, there are more in this file


storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()
if storage.exists(path):
Expand Down Expand Up @@ -369,9 +368,7 @@ def get(self, request, project):
frequency. Starting from 1 and decreasing by 0.1 for priorities and starting
from daily, weekly to monthly for change frequency.

If the project is private, the view raises ``Http404``. On the other hand,
if the project is public but a version is private, this one is not included
in the sitemap.
If the project doesn't have any public version, the view raises ``Http404``.

:param request: Django request object
:param project: Project instance to generate the sitemap
Expand Down Expand Up @@ -417,15 +414,14 @@ def changefreqs_generator():
changefreqs = ['weekly', 'daily']
yield from itertools.chain(changefreqs, itertools.repeat('monthly'))

if project.privacy_level == constants.PRIVATE:
public_versions = Version.internal.public(
project=project,
only_active=True,
)
if not public_versions.exists():
raise Http404

sorted_versions = sort_version_aware(
Version.internal.public(
project=project,
only_active=True,
),
)
sorted_versions = sort_version_aware(public_versions)

# This is a hack to swap the latest version with
# stable version to get the stable version first in the sitemap.
Expand Down