Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port additional features to proxito #6286

Merged
merged 24 commits into from
Nov 18, 2019
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
64e5e69
Clean up proxito logging
ericholscher Oct 14, 2019
d39b461
Additional log cleanup
ericholscher Oct 14, 2019
9febb48
More explicit redirects
ericholscher Oct 14, 2019
7deae5c
Use Django default error handlers to make 404's less costly
ericholscher Oct 14, 2019
2f68347
Add a faster 404 handler that doesn't render templates
ericholscher Oct 15, 2019
b3185be
Nicer 400 page for invalid domains
ericholscher Oct 17, 2019
0130351
Fix 404 handler
ericholscher Oct 17, 2019
2a4fec1
Initial port of the 404 & sitemap code
ericholscher Oct 17, 2019
97e40c5
Update sitemap tests to fix them
ericholscher Oct 17, 2019
4fc7611
Fix up tests
ericholscher Oct 17, 2019
9db5605
Update readthedocs/proxito/views.py
ericholscher Oct 28, 2019
34b9296
Handle directory indexing and 404’s properly in proxito
ericholscher Nov 7, 2019
812de7a
Merge branch 'proxito-cleanup' of github.com:readthedocs/readthedocs.…
ericholscher Nov 7, 2019
fb98bbf
Lint cleanup
ericholscher Nov 7, 2019
fac0172
finally fix lint
ericholscher Nov 7, 2019
70659a2
Fix up tests for 404 handling
ericholscher Nov 7, 2019
f9322ce
Remove duplicated comments
ericholscher Nov 7, 2019
7dfb09a
A bit more docs
ericholscher Nov 7, 2019
72c2ce1
Add another test
ericholscher Nov 7, 2019
e503db9
Fix test
ericholscher Nov 7, 2019
1c98250
Fix silly linting
ericholscher Nov 7, 2019
101a7d4
Update readthedocs/proxito/views.py
ericholscher Nov 13, 2019
f1315fd
Address review feedback
ericholscher Nov 13, 2019
32e6882
Merge remote-tracking branch 'origin/proxito-cleanup' into proxito-cl…
ericholscher Nov 18, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions readthedocs/proxito/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,11 @@
import logging

from django.conf import settings
from django.http import HttpResponseBadRequest
from django.shortcuts import render
from django.utils.deprecation import MiddlewareMixin
from django.utils.translation import ugettext_lazy as _

from readthedocs.projects.models import Domain


log = logging.getLogger(__name__) # noqa


Expand Down
57 changes: 56 additions & 1 deletion readthedocs/proxito/tests/test_full.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def test_default_robots_txt(self):
@mock.patch('readthedocs.proxito.views.get_storage_class')
def test_custom_robots_txt(self, storage_mock):
self.project.versions.update(active=True, built=True)
storage_mock().exists.return_value = True
storage_mock()().exists.return_value = True
response = self.client.get(
reverse('robots_txt'),
HTTP_HOST='project.readthedocs.io',
Expand All @@ -185,6 +185,61 @@ def test_custom_robots_txt(self, storage_mock):
response['x-accel-redirect'], '/proxito/html/project/latest/robots.txt',
)

@mock.patch('readthedocs.proxito.views.get_storage_class')
def test_directory_indexes(self, storage_mock):
self.project.versions.update(active=True, built=True)
storage_mock()().exists.return_value = True
storage_mock()().open().read.return_value = 'foo'
response = self.client.get(
reverse('serve_error_404', kwargs={'proxito_path': '/en/latest/index-exists'}),
humitos marked this conversation as resolved.
Show resolved Hide resolved
HTTP_HOST='project.readthedocs.io',
)
self.assertEqual(
response.content, b'foo'
)
self.assertEqual(
response.status_code, 200
)

@mock.patch('readthedocs.proxito.views.get_storage_class')
def test_404_storage_serves_404(self, storage_mock):
self.project.versions.update(active=True, built=True)

storage_mock()().exists.side_effect = [False, False, True]
response = self.client.get(
reverse('serve_error_404', kwargs={'proxito_path': '/en/fancy-version/index-exists'}),
humitos marked this conversation as resolved.
Show resolved Hide resolved
HTTP_HOST='project.readthedocs.io',
)
storage_mock()().exists.assert_has_calls(
[
mock.call('html/project/fancy-version/index-exists/index.html'),
mock.call('html/project/fancy-version/index-exists/README.html'),
mock.call('html/project/fancy-version/404.html'),
]
)
self.assertEqual(
response.status_code, 404
)
Comment on lines +221 to +223
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may want to assert that we are receiving the content of the custom page here as well and not the default maze.


@mock.patch('readthedocs.proxito.views.get_storage_class')
def test_404_storage_paths_checked(self, storage_mock):
self.project.versions.update(active=True, built=True)
storage_mock()().exists.return_value = False
self.client.get(
reverse('serve_error_404', kwargs={'proxito_path': '/en/fancy-version/index-exists'}),
humitos marked this conversation as resolved.
Show resolved Hide resolved
HTTP_HOST='project.readthedocs.io',
)
storage_mock()().exists.assert_has_calls(
[
mock.call('html/project/fancy-version/index-exists/index.html'),
mock.call('html/project/fancy-version/index-exists/README.html'),
mock.call('html/project/fancy-version/404.html'),
mock.call('html/project/fancy-version/404/index.html'),
mock.call('html/project/latest/404.html'),
mock.call('html/project/latest/404/index.html')
]
)

def test_sitemap_xml(self):
self.project.versions.update(active=True)
private_version = fixture.get(
Expand Down
6 changes: 6 additions & 0 deletions readthedocs/proxito/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@


urlpatterns = [
# Serve custom 404 pages
url(
r'^_proxito_404_(?P<proxito_path>.*)$',
views.serve_error_404,
name='serve_error_404',
),
url(r'robots\.txt$', views.robots_txt, name='robots_txt'),
url(r'sitemap\.xml$', views.sitemap_xml, name='sitemap_xml'),

Expand Down
198 changes: 154 additions & 44 deletions readthedocs/proxito/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from django.core.files.storage import get_storage_class
from django.http import Http404, HttpResponse, HttpResponseRedirect
from django.shortcuts import get_object_or_404, render
from django.urls import resolve as url_resolve
from django.utils.encoding import iri_to_uri
from django.views.decorators.cache import cache_page
from django.views.static import serve
Expand Down Expand Up @@ -125,7 +126,7 @@ def inner_view( # noqa
@map_subproject_slug
def redirect_page_with_filename(request, project, subproject, filename): # pylint: disable=unused-argument # noqa
"""
Redirect /page/file.html ->
Redirect /page/file.html to.

/<default-lang>/<default-version>/file.html.
"""
Expand Down Expand Up @@ -156,45 +157,22 @@ def redirect_project_slug(request, project, subproject): # pylint: disable=unus

@map_project_slug
@map_subproject_slug
def serve_docs(
def _get_project_data_from_request(
request,
project,
subproject,
lang_slug=None,
version_slug=None,
filename='',
):
"""Take the incoming parsed URL's and figure out what file to serve."""

log.debug(
'project=%s, subproject=%s, lang_slug=%s, version_slug=%s, filename=%s',
project, subproject, lang_slug, version_slug, filename
)
"""
Get the proper project based on the request and URL.

This is used in a few places and so we break out into a utility function.
"""
# Take the most relevant project so far
current_project = subproject or project

# Handle a / redirect when we aren't a single version
if all([lang_slug is None, version_slug is None, filename == '',
not current_project.single_version]):
redirect_to = redirect_project_slug(
request,
project=current_project,
subproject=None,
)
log.info(
'Proxito redirect: from=%s, to=%s, project=%s', filename,
redirect_to, current_project.slug
)
return redirect_to

if (lang_slug is None or version_slug is None) and not current_project.single_version:
log.info(
'Invalid URL for project with versions. url=%s, project=%s',
filename, current_project.slug
)
raise Http404('Invalid URL for project with versions')

# Handle single-version projects that have URLs like a real project
if current_project.single_version:
if lang_slug and version_slug:
Expand All @@ -209,22 +187,70 @@ def serve_docs(
current_project.translations.all(), language=lang_slug
)

# Handle single version by grabbing the default version
if final_project.single_version:
version_slug = final_project.get_default_version()

# ``final_project`` is now the actual project we want to serve docs on,
# accounting for:
# * Project
# * Subproject
# * Translations

return final_project, lang_slug, version_slug, filename


def serve_docs(
request,
project_slug=None,
subproject_slug=None,
lang_slug=None,
version_slug=None,
filename='',
):
"""Take the incoming parsed URL's and figure out what file to serve."""

log.debug(
'project=%s, subproject=%s, lang_slug=%s, version_slug=%s, filename=%s',
project_slug, subproject_slug, lang_slug, version_slug, filename
)
humitos marked this conversation as resolved.
Show resolved Hide resolved

final_project, lang_slug, version_slug, filename = _get_project_data_from_request( # noqa
request,
project_slug=project_slug,
subproject_slug=subproject_slug,
lang_slug=lang_slug,
version_slug=version_slug,
filename=filename,
)

# Handle a / redirect when we aren't a single version
if all([lang_slug is None, version_slug is None, filename == '',
not final_project.single_version]):
redirect_to = redirect_project_slug(
request,
project=final_project,
subproject=None,
)
log.info(
'Proxito redirect: from=%s, to=%s, project=%s', filename,
redirect_to, final_project.slug
)
return redirect_to

if (lang_slug is None or version_slug is None) and not final_project.single_version:
log.info(
'Invalid URL for project with versions. url=%s, project=%s',
filename, final_project.slug
)
raise Http404('Invalid URL for project with versions')

# TODO: Redirects need to be refactored before we can turn them on
# They currently do 1 request per redirect that exists for the project
# path, http_status = final_project.redirects.get_redirect_path_with_status(
# language=lang_slug, version_slug=version_slug, path=filename
# )

# Handle single version by grabbing the default version
if final_project.single_version:
version_slug = final_project.get_default_version()

# Don't do auth checks
# try:
# Version.objects.public(user=request.user, project=final_project).get(slug=version_slug)
Expand All @@ -240,11 +266,8 @@ def serve_docs(
)
path = os.path.join(storage_path, filename)

# Handle out backend storage not supporting directory indexes,
# Handle our backend storage not supporting directory indexes,
# so we need to append index.html when appropriate.
# TODO: We don't currently support `docs.example.com/en/latest/install`
# where the user actually wants `docs.example.com/en/latest/install/index.html`
# We would need to emulate nginx's try_files in order to handle this.
if path[-1] == '/':
path += 'index.html'

Expand All @@ -259,13 +282,8 @@ def _serve_docs(request, final_project, path):
this in production, but I don't want to force a check for DEBUG.
"""

if not path.startswith('/proxito/'):
if path[0] == '/':
path = path[1:]
path = f'/proxito/{path}'

if settings.PYTHON_MEDIA:
return _serve_docs_nginx(
return _serve_docs_python(
request, final_project=final_project, path=path
)
return _serve_docs_nginx(request, final_project=final_project, path=path)
Expand All @@ -279,6 +297,7 @@ def _serve_docs_python(request, final_project, path):
"""
log.info('[Django serve] path=%s, project=%s', path, final_project.slug)

storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()
root_path = storage.path('')
# Serve from Python
return serve(request, path, root_path)
Expand All @@ -292,6 +311,12 @@ def _serve_docs_nginx(request, final_project, path):
serve it directly as an internal redirect.
"""
log.info('[Nginx serve] path=%s, project=%s', path, final_project.slug)

if not path.startswith('/proxito/'):
if path[0] == '/':
path = path[1:]
path = f'/proxito/{path}'

content_type, encoding = mimetypes.guess_type(path)
content_type = content_type or 'application/octet-stream'
response = HttpResponse(
Expand All @@ -311,6 +336,90 @@ def _serve_docs_nginx(request, final_project, path):
return response


def serve_error_404(request, proxito_path, template_name='404.html'):
"""
Handler for 404 pages on subdomains.

This does a couple things:

* Handles directory indexing for URLs that don't end in a slash
* Handles directory indexing for README.html (for now)
* Handles custom 404 serving

For 404's, first search for a 404 page in the current version, then continues
with the default version and finally, if none of them are found, the Read
the Docs default page (Maze Found) is rendered by Django and served.
"""
# pylint: disable=too-many-locals

# Parse the URL using the normal urlconf, so we get proper subdomain/translation data
_, __, kwargs = url_resolve(
proxito_path, urlconf='readthedocs.proxito.urls'
)
final_project, lang_slug, version_slug, filename = _get_project_data_from_request( # noqa
request,
project_slug=kwargs.get('project_slug'),
subproject_slug=kwargs.get('subproject_slug'),
lang_slug=kwargs.get('lang_slug'),
version_slug=kwargs.get('version_slug'),
filename=kwargs.get('filename', ''),
)

storage_root_path = final_project.get_storage_path(
type_='html',
version_slug=version_slug,
include_file=False,
)
storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()

# First, check for dirhtml with slash
for tryfile in ('index.html', 'README.html'):
storage_filename_path = os.path.join(
storage_root_path, filename, tryfile
)
log.debug(
'Trying index filename: project=%s version=%s, file=%s',
final_project.slug,
version_slug,
storage_filename_path,
)
if storage.exists(storage_filename_path):
log.info(
'Serving index file: project=%s version=%s, url=%s',
final_project.slug,
version_slug,
storage_filename_path,
)
resp = HttpResponse(storage.open(storage_filename_path).read())
return resp

# If that doesn't work, attempt to serve the 404 of the current version (version_slug)
# Secondly, try to serve the 404 page for the default version
# (project.get_default_version())
for version_slug_404 in [version_slug, final_project.get_default_version()]:
for tryfile in ('404.html', '404/index.html'):
storage_root_path = final_project.get_storage_path(
type_='html',
version_slug=version_slug_404,
include_file=False,
)
storage_filename_path = os.path.join(storage_root_path, tryfile)
if storage.exists(storage_filename_path):
log.debug(
'serving 404.html page current version: [project: %s] [version: %s]',
ericholscher marked this conversation as resolved.
Show resolved Hide resolved
final_project.slug,
version_slug_404,
)
resp = HttpResponse(storage.open(storage_filename_path).read())
resp.status_code = 404
return resp

# Finally, return the default 404 page generated by Read the Docs
resp = render(request, template_name)
resp.status_code = 404
return resp


@map_project_slug
def robots_txt(request, project):
"""
Expand All @@ -333,7 +442,7 @@ def robots_txt(request, project):
# default version is not built
not version.built,
])
import ipdb; ipdb.set_trace()

if no_serve_robots_txt:
# ... we do return a 404
raise Http404()
Expand Down Expand Up @@ -382,6 +491,7 @@ def sitemap_xml(request, project):

:rtype: django.http.HttpResponse
"""
# pylint: disable=too-many-locals

def priorities_generator():
"""
Expand Down