-
Notifications
You must be signed in to change notification settings - Fork 912
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Switch Bedrock to using Django's own i18n mechanism #14327
Changes from all commits
590bb46
1c2d4f5
b768cb3
34289ba
5b8eda3
4438da2
404fa05
bf3985a
e79df9c
1e33859
837df57
3fd9d94
0ed076d
d50def3
de5b8ae
2a24e7d
07803dd
c7d7b7f
07952b0
c541093
59fe9a7
919fd6f
5fdde80
f9e93df
62dfd25
74d3617
4fcc522
201a98f
e972c9c
8736ecc
02f17e6
5765ec8
c3c4ed0
3baf110
8f19837
e7773e6
8b3771d
3be47ce
e26dc61
fa03b0b
f057e62
5c2b4b5
0fe343d
429a9fa
609eae3
1c2f119
646bfbf
8ea97e3
85c2694
3aef3c3
53f94e2
19b5aad
f968cb9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
# This Source Code Form is subject to the terms of the Mozilla Public | ||
# License, v. 2.0. If a copy of the MPL was not distributed with this | ||
# file, You can obtain one at https://mozilla.org/MPL/2.0/. | ||
|
||
import django.urls | ||
from django.conf import settings | ||
from django.utils.text import capfirst | ||
from django.utils.translation.trans_real import parse_accept_lang_header | ||
|
||
from lib.l10n_utils import translation | ||
|
||
|
||
class LocalePrefixPattern(django.urls.LocalePrefixPattern): | ||
""" | ||
A sub-class of Django's "LocalePrefixPattern" that simply normalizes the language | ||
prefix for Bedrock (e.g., upper-case country codes). | ||
|
||
This is an essential piece, since it controls the resolution of the | ||
language-code prefix of incoming URLs as well as the language code prefix | ||
of the URLs generated by Django's "reverse". | ||
""" | ||
|
||
@property | ||
def language_prefix(self): | ||
language_code = normalize_language(translation.get_language()) or settings.LANGUAGE_CODE | ||
if language_code == settings.LANGUAGE_CODE and not self.prefix_default_language: | ||
return "" | ||
return f"{language_code}/" | ||
|
||
|
||
def path_needs_lang_code(path): | ||
"""Convenient way to spot if a path needs a prefix or not""" | ||
if path in settings.SUPPORTED_LOCALE_IGNORE: | ||
return False | ||
|
||
if path.lstrip("/").split("/")[0] in settings.SUPPORTED_NONLOCALES: | ||
return False | ||
|
||
return True | ||
|
||
|
||
def bedrock_i18n_patterns(*urls, prefix_default_language=True): | ||
""" | ||
Similar to Django's i18n_patterns but uses our "LocalePrefixPattern", defined above | ||
""" | ||
if not settings.USE_I18N: | ||
return list(urls) | ||
return [ | ||
django.urls.URLResolver( | ||
LocalePrefixPattern(prefix_default_language=prefix_default_language), | ||
list(urls), | ||
) | ||
] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note: This is also how SUMO does it |
||
|
||
|
||
def normalize_language(language): | ||
""" | ||
Given a language code, returns the language code supported by Bedrock | ||
in the proper case, for example "eN-us" --> "en-US" - or None if | ||
Bedrock doesn't support the language code. | ||
|
||
We also convert it to a specified fallback language if the actual | ||
presented code is not available _and_ we have a fallback specced in | ||
settings. | ||
|
||
""" | ||
|
||
if not language: | ||
return None | ||
|
||
if settings.IS_POCKET_MODE: | ||
lang_code = language.lower() | ||
else: | ||
lang_code = language | ||
|
||
if lang_code in settings.LANGUAGE_URL_MAP_WITH_FALLBACKS: | ||
return settings.LANGUAGE_URL_MAP_WITH_FALLBACKS[lang_code] | ||
|
||
# Reformat the lang code to be mixed-case, as we expect | ||
# them to be for our lookup | ||
lang, _partition, classifier = language.partition("-") | ||
lang = lang.lower() # this part is _always_ lowercase | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note: What follows could be refactored into something more easily readable, but waiting on feedback. Supporting the special cases could at least be moved into a small helper func |
||
_special_cases = ["hant"] | ||
|
||
if classifier: | ||
if "-" in classifier or classifier in _special_cases: | ||
_classifier, _partition, rest = classifier.partition("-") | ||
if _classifier.lower() in _special_cases: | ||
# support zh-hant-tw -> zh-Hant-TW | ||
_classifier = capfirst(_classifier) | ||
if rest: | ||
classifier = f"{_classifier}-{rest.upper()}" | ||
else: | ||
classifier = _classifier | ||
else: | ||
# Support patterns like ja-JP-mac | ||
classifier = f"{_classifier.upper()}-{rest}" | ||
else: | ||
classifier = classifier.upper() | ||
|
||
lang_code = f"{lang}-{classifier}" | ||
else: | ||
lang_code = lang | ||
|
||
try: | ||
return settings.LANGUAGE_URL_MAP_WITH_FALLBACKS[lang_code] | ||
except KeyError: | ||
pre = lang_code.split("-")[0] | ||
return settings.LANGUAGE_URL_MAP_WITH_FALLBACKS.get(pre) | ||
|
||
|
||
def split_path_and_normalize_language(path_): | ||
""" | ||
Split the requested path into (lang, path) and | ||
switches to a supported lang if available | ||
|
||
locale will be empty if it isn't found. | ||
|
||
Returns: | ||
- lang code (str, may be empty) | ||
- remaining URL path (str) | ||
- whether or not the lang code changed (bool) | ||
|
||
""" | ||
path = path_.lstrip("/") | ||
|
||
# Use partition instead of split since it always returns 3 parts | ||
extracted_lang, _, rest = path.partition("/") | ||
|
||
supported_lang = normalize_language(extracted_lang) | ||
different = extracted_lang != supported_lang | ||
|
||
if extracted_lang and supported_lang: | ||
return supported_lang, rest, different | ||
else: | ||
return "", path, False | ||
|
||
|
||
def get_language_from_headers(request): | ||
""" | ||
Return a locale code we support on the site using the | ||
user's Accept-Language header to determine which is best. This | ||
mostly follows the RFCs but read bug 439568 for details. | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note: (to self): reading this now, maybe we can just use Django's own helpers for this |
||
accept_lang = request.headers.get("Accept-Language") | ||
if accept_lang: | ||
best = get_best_language(accept_lang) | ||
if best: | ||
return best | ||
|
||
return settings.LANGUAGE_CODE | ||
|
||
|
||
def get_best_language(accept_lang): | ||
"""Given an Accept-Language header, return the best-matching language.""" | ||
ranked = parse_accept_lang_header(accept_lang) | ||
for lang, _ in ranked: | ||
supported = normalize_language(lang) | ||
if supported: | ||
return supported | ||
|
||
|
||
def check_for_bedrock_language(lang_code): | ||
return lang_code in [x[0] for x in settings.LANGUAGES] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,48 +10,188 @@ | |
""" | ||
|
||
import base64 | ||
import contextlib | ||
import inspect | ||
import time | ||
from functools import wraps | ||
|
||
from django.conf import settings | ||
from django.core.exceptions import MiddlewareNotUsed | ||
from django.http import Http404, HttpResponse | ||
from django.http import Http404, HttpResponse, HttpResponseRedirect | ||
from django.middleware.locale import LocaleMiddleware as DjangoLocaleMiddleware | ||
from django.utils import translation | ||
from django.utils.deprecation import MiddlewareMixin | ||
from django.utils.translation import trans_real | ||
|
||
from commonware.middleware import FrameOptionsHeader as OldFrameOptionsHeader | ||
|
||
from bedrock.base import metrics | ||
from lib.l10n_utils import translation | ||
from bedrock.base.i18n import ( | ||
check_for_bedrock_language, | ||
get_language_from_headers, | ||
normalize_language, | ||
path_needs_lang_code, | ||
split_path_and_normalize_language, | ||
) | ||
from lib.l10n_utils import is_root_path_with_no_language_clues | ||
|
||
from . import urlresolvers | ||
|
||
class BedrockLangCodeFixupMiddleware(MiddlewareMixin): | ||
"""Middleware focused on prepping a viable, Bedrock-compatible language code | ||
in the URL, ready for the rest of the i18n logic. | ||
|
||
It: | ||
|
||
1) Redirects to a new language-coded path if we detect the lang=XX querystring. | ||
This is basically a no-JS way for us to handle language switching. | ||
|
||
2) Normalises language codes that are in the path - eg en-us -> en-US and also | ||
goes to a prefix code if we don't have support (eg de-AT -> de) | ||
|
||
3) If no redirect is needed, sets request.locale to be the normalized | ||
lang code we've got from the URL | ||
|
||
Querystrings are preserved in GET redirects. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nitpick (non-blocking): This docstring should probably also mention the support for querystring-based language switching |
||
|
||
class LocaleURLMiddleware: | ||
""" | ||
This middleware adjusts the `path_info` for reverse URL resolving. | ||
|
||
We split `request.path_info` into `locale` and `path`. The `path` portion | ||
is saved back to `request.path_info` for reverse URL resolving, while the | ||
`locale` will either be one we support or empty string. | ||
def _redirect(self, request, lang_code, subpath): | ||
dest = f"/{lang_code}/{subpath}" | ||
|
||
# log the redirect without querystrings, in case there's a token or similar we don't want to end up in Markus | ||
metrics.incr("bedrock.langfixup.redirect", tags=[f"from:{request.path}", f"to:{dest}"]) | ||
|
||
if request.GET: | ||
dest += f"?{request.GET.urlencode()}" | ||
|
||
return HttpResponseRedirect(dest) | ||
|
||
def process_request(self, request): | ||
# Initially see if we can separate a lang code from the rest of the URL | ||
# path, along with a hint about whether the lang code changed, which | ||
# suggests we will need to redirect using `lang_code` as a new prefix. | ||
|
||
lang_code, subpath, lang_code_changed = split_path_and_normalize_language(request.path) | ||
|
||
# Check for any no-JS language - doing this eagerly is good, as any further | ||
# fixups due to the actual lang code in the lang=XX querystring will be | ||
# handled by the rest of this function when we come back to it. | ||
# Also fixes https://github.com/mozilla/bedrock/issues/5931 | ||
lang_via_querystring = request.GET.get("lang", None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Interestingly, it doesn't quite quack like that. |
||
if lang_via_querystring is not None: | ||
cleaned_lang_via_querystring = normalize_language(lang_via_querystring) | ||
# Drop the lang querystring to avoid a redirect loop; | ||
# request.GET is immutable so we have to edit a copy | ||
if not cleaned_lang_via_querystring: | ||
cleaned_lang_via_querystring = settings.LANGUAGE_CODE | ||
qs = request.GET.copy() | ||
del qs["lang"] | ||
request.GET = qs | ||
return self._redirect(request, cleaned_lang_via_querystring, subpath) | ||
|
||
stevejalim marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# If we're not redirecting based on a querystring, do we need to find | ||
# a language code? If so, find from the headers and redirect. | ||
if not lang_code and path_needs_lang_code(request.path) and not is_root_path_with_no_language_clues(request): | ||
lang_code = get_language_from_headers(request) | ||
return self._redirect(request, lang_code, subpath) | ||
|
||
# Or if we have a lang code, but it's not the one that was in the | ||
# original URL path – e.g. fixing es-mx to es-MX - we also need to redirect | ||
if lang_code and lang_code_changed: | ||
return self._redirect(request, lang_code, subpath) | ||
|
||
# If we get this far, the path contains a validly formatted lang code | ||
# OR the path does not need a lang code. We annotate the request appropriately | ||
request.locale = lang_code if lang_code else "" | ||
|
||
|
||
class BedrockLocaleMiddleware(DjangoLocaleMiddleware): | ||
"""Middleware that usually* wraps Django's own i18n middleware in order to | ||
ensure we normalize language codes - i..e. we ensure they are in | ||
mixed case we use, rather than Django's internal all-lowercase codes. | ||
|
||
*for one specific situation, though, we skip Django's LocaleMiddleware entirely: | ||
we have a special SEO-helping page that gets returned to robots/spiders that | ||
don't declare an accept-language header, showing a list of locales to pick. | ||
|
||
It needs to be kept super-light so that it doesn't diverge too far from | ||
the stock LocaleMiddleware, lest we wake up dragons when we use | ||
wagtail-localize, which squarely depends on django's LocaleMiddleware. | ||
|
||
Note: this is not SUMO's LocaleMiddleware, this just a tribute. | ||
(https://github.com/escattone/kitsune/blob/main/kitsune/sumo/middleware.py#L128) | ||
stevejalim marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
|
||
def __init__(self, get_response): | ||
self.get_response = get_response | ||
def process_request(self, request): | ||
if is_root_path_with_no_language_clues(request): | ||
# Skip using Django's LocaleMiddleware | ||
metrics.incr( | ||
"bedrock.localemiddleware.skipdjangolocalemiddleware", | ||
tags=[f"path:{request.path}"], | ||
) | ||
else: | ||
with normalized_get_language(): | ||
with simplified_check_for_language(): | ||
return super().process_request(request) | ||
|
||
def __call__(self, request): | ||
response = self.process_request(request) | ||
if response: | ||
def process_response(self, request, response): | ||
if is_root_path_with_no_language_clues(request): | ||
# Skip using Django's LocaleMiddleware on the response cycle too | ||
return response | ||
return self.get_response(request) | ||
|
||
def process_request(self, request): | ||
prefixer = urlresolvers.Prefixer(request) | ||
urlresolvers.set_url_prefix(prefixer) | ||
with normalized_get_language(): | ||
with simplified_check_for_language(): | ||
return super().process_response(request, response) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. praise: Credit for the patcing approach goes to @escattone, who worked out how to do it for SUMO |
||
|
||
@contextlib.contextmanager | ||
def normalized_get_language(): | ||
""" | ||
Ensures that any use of django.utils.translation.get_language() | ||
within its context will return a normalized language code. This | ||
context manager only works when the "get_language" function is | ||
acquired from the "django.utils.translation" module at call time, | ||
so for example, if it's called like "translation.get_language()". | ||
|
||
Note that this does not cover every call to translation.get_language() | ||
as there are calls to it on Django startup and more, but this gives us an | ||
extra layer of (idempotent) fixing up within the request/response cycle. | ||
""" | ||
get_language = translation.get_language | ||
|
||
@wraps(get_language) | ||
def get_normalized_language(): | ||
return normalize_language(get_language()) | ||
|
||
translation.get_language = get_normalized_language | ||
|
||
try: | ||
yield | ||
finally: | ||
translation.get_language = get_language | ||
|
||
|
||
@contextlib.contextmanager | ||
def simplified_check_for_language(): | ||
"""Ensures that calls to trans_real.check_for_language within | ||
its context will not check for the existence of files in | ||
`appname/LANG_CODE/locale` and therefore avoid a false negative about | ||
the langs we support (our lang files are in ./data/l10n-team/LANG_CODE) | ||
but check_for_language is opinionated and expects only the Django directory | ||
pattern for lang files.""" | ||
stevejalim marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
check_for_language = trans_real.check_for_language | ||
|
||
@wraps(check_for_language) | ||
def simpler_check_for_language(lang_code): | ||
return check_for_bedrock_language(lang_code) | ||
|
||
trans_real.check_for_language = simpler_check_for_language | ||
|
||
request.path_info = f"/{prefixer.shortened_path}" | ||
request.locale = prefixer.locale | ||
translation.activate(prefixer.locale or settings.LANGUAGE_CODE) | ||
try: | ||
yield | ||
finally: | ||
trans_real.check_for_language = check_for_language | ||
|
||
|
||
class BasicAuthMiddleware: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
note: It would be nice to make this dynamically check the mozorg nonlocale_urls.py (and to move any other similar routes into a relevant nonlocale_urls.py file.
Note: this comment will make more sense after you've seen the changes to
urls/mozorg_mode.py