diff --git a/bedrock/base/i18n.py b/bedrock/base/i18n.py new file mode 100644 index 00000000000..100f7c0ce4d --- /dev/null +++ b/bedrock/base/i18n.py @@ -0,0 +1,165 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +import django.urls +from django.conf import settings +from django.utils.text import capfirst +from django.utils.translation.trans_real import parse_accept_lang_header + +from lib.l10n_utils import translation + + +class LocalePrefixPattern(django.urls.LocalePrefixPattern): + """ + A sub-class of Django's "LocalePrefixPattern" that simply normalizes the language + prefix for Bedrock (e.g., upper-case country codes). + + This is an essential piece, since it controls the resolution of the + language-code prefix of incoming URLs as well as the language code prefix + of the URLs generated by Django's "reverse". + """ + + @property + def language_prefix(self): + language_code = normalize_language(translation.get_language()) or settings.LANGUAGE_CODE + if language_code == settings.LANGUAGE_CODE and not self.prefix_default_language: + return "" + return f"{language_code}/" + + +def path_needs_lang_code(path): + """Convenient way to spot if a path needs a prefix or not""" + if path in settings.SUPPORTED_LOCALE_IGNORE: + return False + + if path.lstrip("/").split("/")[0] in settings.SUPPORTED_NONLOCALES: + return False + + return True + + +def bedrock_i18n_patterns(*urls, prefix_default_language=True): + """ + Similar to Django's i18n_patterns but uses our "LocalePrefixPattern", defined above + """ + if not settings.USE_I18N: + return list(urls) + return [ + django.urls.URLResolver( + LocalePrefixPattern(prefix_default_language=prefix_default_language), + list(urls), + ) + ] + + +def normalize_language(language): + """ + Given a language code, returns the language code supported by Bedrock + in the proper case, for example "eN-us" --> "en-US" - or None if + Bedrock doesn't support the language code. + + We also convert it to a specified fallback language if the actual + presented code is not available _and_ we have a fallback specced in + settings. + + """ + + if not language: + return None + + if settings.IS_POCKET_MODE: + lang_code = language.lower() + else: + lang_code = language + + if lang_code in settings.LANGUAGE_URL_MAP_WITH_FALLBACKS: + return settings.LANGUAGE_URL_MAP_WITH_FALLBACKS[lang_code] + + # Reformat the lang code to be mixed-case, as we expect + # them to be for our lookup + lang, _partition, classifier = language.partition("-") + lang = lang.lower() # this part is _always_ lowercase + + _special_cases = ["hant"] + + if classifier: + if "-" in classifier or classifier in _special_cases: + _classifier, _partition, rest = classifier.partition("-") + if _classifier.lower() in _special_cases: + # support zh-hant-tw -> zh-Hant-TW + _classifier = capfirst(_classifier) + if rest: + classifier = f"{_classifier}-{rest.upper()}" + else: + classifier = _classifier + else: + # Support patterns like ja-JP-mac + classifier = f"{_classifier.upper()}-{rest}" + else: + classifier = classifier.upper() + + lang_code = f"{lang}-{classifier}" + else: + lang_code = lang + + try: + return settings.LANGUAGE_URL_MAP_WITH_FALLBACKS[lang_code] + except KeyError: + pre = lang_code.split("-")[0] + return settings.LANGUAGE_URL_MAP_WITH_FALLBACKS.get(pre) + + +def split_path_and_normalize_language(path_): + """ + Split the requested path into (lang, path) and + switches to a supported lang if available + + locale will be empty if it isn't found. + + Returns: + - lang code (str, may be empty) + - remaining URL path (str) + - whether or not the lang code changed (bool) + + """ + path = path_.lstrip("/") + + # Use partition instead of split since it always returns 3 parts + extracted_lang, _, rest = path.partition("/") + + supported_lang = normalize_language(extracted_lang) + different = extracted_lang != supported_lang + + if extracted_lang and supported_lang: + return supported_lang, rest, different + else: + return "", path, False + + +def get_language_from_headers(request): + """ + Return a locale code we support on the site using the + user's Accept-Language header to determine which is best. This + mostly follows the RFCs but read bug 439568 for details. + """ + accept_lang = request.headers.get("Accept-Language") + if accept_lang: + best = get_best_language(accept_lang) + if best: + return best + + return settings.LANGUAGE_CODE + + +def get_best_language(accept_lang): + """Given an Accept-Language header, return the best-matching language.""" + ranked = parse_accept_lang_header(accept_lang) + for lang, _ in ranked: + supported = normalize_language(lang) + if supported: + return supported + + +def check_for_bedrock_language(lang_code): + return lang_code in [x[0] for x in settings.LANGUAGES] diff --git a/bedrock/base/middleware.py b/bedrock/base/middleware.py index 980cdd2ca48..af69012b769 100644 --- a/bedrock/base/middleware.py +++ b/bedrock/base/middleware.py @@ -10,48 +10,188 @@ """ import base64 +import contextlib import inspect import time +from functools import wraps from django.conf import settings from django.core.exceptions import MiddlewareNotUsed -from django.http import Http404, HttpResponse +from django.http import Http404, HttpResponse, HttpResponseRedirect +from django.middleware.locale import LocaleMiddleware as DjangoLocaleMiddleware +from django.utils import translation from django.utils.deprecation import MiddlewareMixin +from django.utils.translation import trans_real from commonware.middleware import FrameOptionsHeader as OldFrameOptionsHeader from bedrock.base import metrics -from lib.l10n_utils import translation +from bedrock.base.i18n import ( + check_for_bedrock_language, + get_language_from_headers, + normalize_language, + path_needs_lang_code, + split_path_and_normalize_language, +) +from lib.l10n_utils import is_root_path_with_no_language_clues -from . import urlresolvers +class BedrockLangCodeFixupMiddleware(MiddlewareMixin): + """Middleware focused on prepping a viable, Bedrock-compatible language code + in the URL, ready for the rest of the i18n logic. + + It: + + 1) Redirects to a new language-coded path if we detect the lang=XX querystring. + This is basically a no-JS way for us to handle language switching. + + 2) Normalises language codes that are in the path - eg en-us -> en-US and also + goes to a prefix code if we don't have support (eg de-AT -> de) + + 3) If no redirect is needed, sets request.locale to be the normalized + lang code we've got from the URL + + Querystrings are preserved in GET redirects. -class LocaleURLMiddleware: """ - This middleware adjusts the `path_info` for reverse URL resolving. - We split `request.path_info` into `locale` and `path`. The `path` portion - is saved back to `request.path_info` for reverse URL resolving, while the - `locale` will either be one we support or empty string. + def _redirect(self, request, lang_code, subpath): + dest = f"/{lang_code}/{subpath}" + + # log the redirect without querystrings, in case there's a token or similar we don't want to end up in Markus + metrics.incr("bedrock.langfixup.redirect", tags=[f"from:{request.path}", f"to:{dest}"]) + + if request.GET: + dest += f"?{request.GET.urlencode()}" + + return HttpResponseRedirect(dest) + def process_request(self, request): + # Initially see if we can separate a lang code from the rest of the URL + # path, along with a hint about whether the lang code changed, which + # suggests we will need to redirect using `lang_code` as a new prefix. + + lang_code, subpath, lang_code_changed = split_path_and_normalize_language(request.path) + + # Check for any no-JS language - doing this eagerly is good, as any further + # fixups due to the actual lang code in the lang=XX querystring will be + # handled by the rest of this function when we come back to it. + # Also fixes https://github.com/mozilla/bedrock/issues/5931 + lang_via_querystring = request.GET.get("lang", None) + if lang_via_querystring is not None: + cleaned_lang_via_querystring = normalize_language(lang_via_querystring) + # Drop the lang querystring to avoid a redirect loop; + # request.GET is immutable so we have to edit a copy + if not cleaned_lang_via_querystring: + cleaned_lang_via_querystring = settings.LANGUAGE_CODE + qs = request.GET.copy() + del qs["lang"] + request.GET = qs + return self._redirect(request, cleaned_lang_via_querystring, subpath) + + # If we're not redirecting based on a querystring, do we need to find + # a language code? If so, find from the headers and redirect. + if not lang_code and path_needs_lang_code(request.path) and not is_root_path_with_no_language_clues(request): + lang_code = get_language_from_headers(request) + return self._redirect(request, lang_code, subpath) + + # Or if we have a lang code, but it's not the one that was in the + # original URL path – e.g. fixing es-mx to es-MX - we also need to redirect + if lang_code and lang_code_changed: + return self._redirect(request, lang_code, subpath) + + # If we get this far, the path contains a validly formatted lang code + # OR the path does not need a lang code. We annotate the request appropriately + request.locale = lang_code if lang_code else "" + + +class BedrockLocaleMiddleware(DjangoLocaleMiddleware): + """Middleware that usually* wraps Django's own i18n middleware in order to + ensure we normalize language codes - i..e. we ensure they are in + mixed case we use, rather than Django's internal all-lowercase codes. + + *for one specific situation, though, we skip Django's LocaleMiddleware entirely: + we have a special SEO-helping page that gets returned to robots/spiders that + don't declare an accept-language header, showing a list of locales to pick. + + It needs to be kept super-light so that it doesn't diverge too far from + the stock LocaleMiddleware, lest we wake up dragons when we use + wagtail-localize, which squarely depends on django's LocaleMiddleware. + + Note: this is not SUMO's LocaleMiddleware, this just a tribute. + (https://github.com/escattone/kitsune/blob/main/kitsune/sumo/middleware.py#L128) """ - def __init__(self, get_response): - self.get_response = get_response + def process_request(self, request): + if is_root_path_with_no_language_clues(request): + # Skip using Django's LocaleMiddleware + metrics.incr( + "bedrock.localemiddleware.skipdjangolocalemiddleware", + tags=[f"path:{request.path}"], + ) + else: + with normalized_get_language(): + with simplified_check_for_language(): + return super().process_request(request) - def __call__(self, request): - response = self.process_request(request) - if response: + def process_response(self, request, response): + if is_root_path_with_no_language_clues(request): + # Skip using Django's LocaleMiddleware on the response cycle too return response - return self.get_response(request) - def process_request(self, request): - prefixer = urlresolvers.Prefixer(request) - urlresolvers.set_url_prefix(prefixer) + with normalized_get_language(): + with simplified_check_for_language(): + return super().process_response(request, response) + + +@contextlib.contextmanager +def normalized_get_language(): + """ + Ensures that any use of django.utils.translation.get_language() + within its context will return a normalized language code. This + context manager only works when the "get_language" function is + acquired from the "django.utils.translation" module at call time, + so for example, if it's called like "translation.get_language()". + + Note that this does not cover every call to translation.get_language() + as there are calls to it on Django startup and more, but this gives us an + extra layer of (idempotent) fixing up within the request/response cycle. + """ + get_language = translation.get_language + + @wraps(get_language) + def get_normalized_language(): + return normalize_language(get_language()) + + translation.get_language = get_normalized_language + + try: + yield + finally: + translation.get_language = get_language + + +@contextlib.contextmanager +def simplified_check_for_language(): + """Ensures that calls to trans_real.check_for_language within + its context will not check for the existence of files in + `appname/LANG_CODE/locale` and therefore avoid a false negative about + the langs we support (our lang files are in ./data/l10n-team/LANG_CODE) + but check_for_language is opinionated and expects only the Django directory + pattern for lang files.""" + + check_for_language = trans_real.check_for_language + + @wraps(check_for_language) + def simpler_check_for_language(lang_code): + return check_for_bedrock_language(lang_code) + + trans_real.check_for_language = simpler_check_for_language - request.path_info = f"/{prefixer.shortened_path}" - request.locale = prefixer.locale - translation.activate(prefixer.locale or settings.LANGUAGE_CODE) + try: + yield + finally: + trans_real.check_for_language = check_for_language class BasicAuthMiddleware: diff --git a/bedrock/base/tests/test_accepted_locales.py b/bedrock/base/tests/test_accepted_locales.py index 7f36171029c..230399dfd8e 100644 --- a/bedrock/base/tests/test_accepted_locales.py +++ b/bedrock/base/tests/test_accepted_locales.py @@ -87,10 +87,17 @@ def test_dev_languages(self): settings.DEV = True # simulate the successful result of the DEV_LANGUAGES list # comprehension defined in settings. - settings.DEV_LANGUAGES = ["en-US", "fr"] + settings.DEV_LANGUAGES = [ + "en-US", + "fr", + "de", + "es-MX", + ] assert settings.LANGUAGE_URL_MAP == { - "en-us": "en-US", + "en-US": "en-US", "fr": "fr", + "de": "de", + "es-MX": "es-MX", }, "DEV is True, but DEV_LANGUAGES are not used to define the allowed locales." def test_prod_languages(self): @@ -100,4 +107,4 @@ def test_prod_languages(self): """ settings.DEV = False - assert settings.LANGUAGE_URL_MAP == {"en-us": "en-US"}, "DEV is False, but PROD_LANGUAGES are not used to define the allowed locales." + assert settings.LANGUAGE_URL_MAP == {"en-US": "en-US"}, "DEV is False, but PROD_LANGUAGES are not used to define the allowed locales." diff --git a/bedrock/base/tests/test_i18n.py b/bedrock/base/tests/test_i18n.py new file mode 100644 index 00000000000..d0c835a33dd --- /dev/null +++ b/bedrock/base/tests/test_i18n.py @@ -0,0 +1,304 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +from django.test import override_settings +from django.urls import URLResolver + +import pytest + +from bedrock.base.i18n import ( + LocalePrefixPattern, + bedrock_i18n_patterns, + check_for_bedrock_language, + get_best_language, + get_language_from_headers, + normalize_language, + path_needs_lang_code, + split_path_and_normalize_language, +) +from lib.l10n_utils import translation + + +@override_settings(IS_MOZORG_MODE=True, IS_POCKET_MODE=False) +@pytest.mark.parametrize( + "lang_code, expected", + ( + (None, None), + ("", None), + # General normalization + ("EN-US", "en-US"), + ("en-US", "en-US"), + ("eN-gB", "en-GB"), + ("ja-jp-mac", "ja"), + ("JA-JP-MAC", "ja"), + # from CANONICAL_LOCALES + ("en", "en-US"), + ("es", "es-ES"), + ("ja-JP-mac", "ja"), + ("no", "nb-NO"), + ("pt", "pt-BR"), + ("sv", "sv-SE"), + # mixed/ideal case + ("zh-Hant", "zh-TW"), + ("zh-Hant-TW", "zh-TW"), + ("zh-HK", "zh-TW"), + ("zh-Hant-HK", "zh-TW"), + # all lowercase + ("zh-hant", "zh-TW"), + ("zh-hant-tw", "zh-TW"), + ("zh-hk", "zh-TW"), + ("zh-hant-hk", "zh-TW"), + # lowercase country + ("zh-Hant", "zh-TW"), + ("zh-Hant-tw", "zh-TW"), + ("zh-hk", "zh-TW"), + ("zh-Hant-hk", "zh-TW"), + # lowercase middle part + ("zh-Hant-TW", "zh-TW"), + ("zh-hant-HK", "zh-TW"), + # misses/no matches + ("dude", None), + ("the-DUDE", None), + ("xx", None), + ("dq", None), + ), +) +def test_normalize_language_mozorg_mode(lang_code, expected): + assert normalize_language(lang_code) == expected + + +@override_settings( + IS_MOZORG_MODE=False, + IS_POCKET_MODE=True, + LANGUAGE_URL_MAP_WITH_FALLBACKS={ + # patch in what we set in settings.__init__ + "de": "de", + "en": "en", + "es": "es", + "es-la": "es-la", + "fr-ca": "fr-ca", + "fr": "fr", + "it": "it", + "ja": "ja", + "ko": "ko", + "nl": "nl", + "pl": "pl", + "pt-br": "pt-br", + "pt": "pt", + "ru": "ru", + "zh-cn": "zh-cn", + "zh-tw": "zh-tw", + }, +) +@pytest.mark.parametrize( + "lang_code, expected", + ( + (None, None), + ("", None), # General normalization + ("de", "de"), + ("de-DE", "de"), + ("DE", "de"), + ("en", "en"), + ("en-US", "en"), + ("EN", "en"), + ("es", "es"), + ("ES", "es"), + ("es-la", "es-la"), + ("es-LA", "es-la"), + ("ES-LA", "es-la"), + ("fr-ca", "fr-ca"), + ("fr-CA", "fr-ca"), + ("FR-CA", "fr-ca"), + ("fr", "fr"), + ("FR", "fr"), + ("it", "it"), + ("IT", "it"), + ("ja", "ja"), + ("JA", "ja"), + ("ko", "ko"), + ("KO", "ko"), + ("nl", "nl"), + ("NL", "nl"), + ("pl", "pl"), + ("PL", "pl"), + ("pt-br", "pt-br"), + ("pt-BR", "pt-br"), + ("PT-BR", "pt-br"), + ("pt", "pt"), + ("PT", "pt"), + ("pt-PT", "pt"), + ("PT-PT", "pt"), + ("ru", "ru"), + ("RU", "ru"), + ("zh-cn", "zh-cn"), + ("zh-CN", "zh-cn"), + ("ZH-CN", "zh-cn"), + ("zh-tw", "zh-tw"), + ("zh-TW", "zh-tw"), + ("ZH-TW", "zh-tw"), + ), +) +def test_normalize_language_pocket_mode(lang_code, expected): + assert normalize_language(lang_code) == expected + + +@pytest.mark.parametrize( + "path, expected", + ( + ("media", False), + ("static", False), + ("certs", False), + ("images", False), + ("contribute.json", False), + ("credits", False), + ("gameon", False), + ("robots.txt", False), + (".well-known", False), + ("telemetry", False), + ("webmaker", False), + ("contributor-data", False), + ("healthz", False), + ("readiness", False), + ("healthz-cron", False), + ("2004", False), + ("2005", False), + ("2006", False), + ("keymaster", False), + ("microsummaries", False), + ("xbl", False), + ("revision.txt", False), + ("locales", False), + ("/sitemap_none.xml", False), + ("/sitemap.xml", False), + # Some example paths that do need them + ("/about/", True), + ("about", True), + ("/products/", True), + ("products/", True), + ("security/some/path/here", True), + # Note that NOT needing a lang string is based on affirmation, so anything + # that isn't affirmed (e.g. cos it already has a lang code) is a default True + ("/en-US/products/", True), + ), +) +def test_path_needs_lang_code(path, expected): + assert path_needs_lang_code(path) == expected + + +@pytest.mark.parametrize( + "path, result", + [ + # Basic + ( + "en-US/some/action", + ("en-US", "some/action", False), + ), + # First slash doesn't matter + ("/en-US/some/action", ("en-US", "some/action", False)), + # Nor does capitalization, but it counts as a 'changed' code + ("En-uS/some/action", ("en-US", "some/action", True)), + # Unsupported languages return a blank language + ("unsupported/sOmE/action", ("", "unsupported/sOmE/action", False)), + # Fallback to a lang-only lang code, not a territory + ("de-AT/test", ("de", "test", True)), + # Unicode handling + polishing of locale + ("fR-Ca/François/test", ("fr", "François/test", True)), + ], +) +def test_split_path_and_normalize_language(path, result): + res = split_path_and_normalize_language(path) + assert res == result + + +@pytest.mark.parametrize( + "language_to_activate, expected_prefix", + ( + ("en-US", "en-US/"), # perfect fit + ("en-us", "en-US/"), # case fixup + ("de", "de/"), # perfect fit + ("de-AT", "de/"), # lang-code-only fallback fit + ("", "en-US/"), # default/fallback + (None, "en-US/"), # default/fallback + ("abc", "en-US/"), # default/fallback + ), +) +def test_locale_prefix_pattern_works(language_to_activate, expected_prefix): + pattern = LocalePrefixPattern() + + if language_to_activate: + translation.activate(language_to_activate) + + assert pattern.language_prefix == expected_prefix + translation.deactivate() + + +@override_settings(LANGUAGE_CODE="en-GB") +def test_local_prefix_pattern_fallback_mode(): + pattern = LocalePrefixPattern(prefix_default_language=False) + assert pattern.language_prefix == "" + + +@pytest.mark.parametrize( + "lang_code, expected_result", + ( + ("en-US", True), + ("fr", True), + ("sco", True), + ("hsb", True), + ("ach", False), + ("de", False), + ), +) +@override_settings( + LANGUAGES=[ + ("en-US", "English"), + ("fr", "French"), + ("sco", "Scots"), + ("hsb", "Hornjoserbsce"), + ] +) +def test_check_for_bedrock_language(lang_code, expected_result): + assert check_for_bedrock_language(lang_code) == expected_result + + +@pytest.mark.parametrize("use_i18n", (True, False)) +def test_bedrock_i18n_patterns(use_i18n): + from bedrock.careers import urls as career_urls + + with override_settings(USE_I18N=use_i18n): + patterns = bedrock_i18n_patterns(career_urls) + if use_i18n: + assert isinstance(patterns[0], URLResolver) + else: + assert patterns[0] == career_urls + + +@pytest.mark.parametrize( + "headers, expected", + ( + ({"HTTP_ACCEPT_LANGUAGE": "sco,de-DE;q=0.8,fr;q=0.6,en-GB;q=0.4,en-US;q=0.2"}, "sco"), + ({"HTTP_ACCEPT_LANGUAGE": "fr,de-DE;q=0.8,sco;q=0.6,en-GB;q=0.4,en-US;q=0.2"}, "fr"), + ({"HTTP_ACCEPT_LANGUAGE": "es-mx,es-es;q=0.8"}, "es-MX"), + ({"HTTP_ACCEPT_LANGUAGE": "de-AT,de;q=0.8,sco;q=0.6,en-GB;q=0.4,en-US;q=0.2"}, "de"), + ({}, "en-US"), + ), +) +def test_get_language_from_headers(rf, headers, expected): + request = rf.get("/", **headers) + assert get_language_from_headers(request) == expected + + +@pytest.mark.parametrize( + "header, expected", + ( + ("sco,de-DE;q=0.8,fr;q=0.6,en-GB;q=0.4,en-US;q=0.2", "sco"), + ("fr,de-DE;q=0.8,sco;q=0.6,en-GB;q=0.4,en-US;q=0.2", "fr"), + ("es-at,es-es;q=0.8", "es-ES"), + ("de-AT,de;q=0.8,sco;q=0.6,en-GB;q=0.4,en-US;q=0.2", "de"), + ("am", None), + ("", None), + ), +) +def test_get_best_language(header, expected): + assert get_best_language(header) == expected diff --git a/bedrock/base/tests/test_middleware.py b/bedrock/base/tests/test_middleware.py index 5653f2741b5..a92acfc4316 100644 --- a/bedrock/base/tests/test_middleware.py +++ b/bedrock/base/tests/test_middleware.py @@ -3,63 +3,22 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. from contextlib import suppress +from unittest import mock from django.http import HttpResponse -from django.test import Client, RequestFactory, TestCase +from django.test import Client, TestCase from django.test.utils import override_settings from django.urls import reverse +import pytest from jinja2.exceptions import UndefinedError from markus.testing import MetricsMock +from pytest_django.asserts import assertTemplateUsed -from bedrock.base.middleware import LocaleURLMiddleware - - -@override_settings(DEV=True) -class TestLocaleURLMiddleware(TestCase): - def setUp(self): - self.rf = RequestFactory() - self.middleware = LocaleURLMiddleware(get_response=HttpResponse) - - @override_settings(DEV_LANGUAGES=("de", "fr")) - def test_matching_locale(self): - locale = "fr" - path = "/the/dude/" - full_path = f"/{locale}{path}" - req = self.rf.get(full_path) - self.middleware.process_request(req) - self.assertEqual(req.path_info, path) - self.assertEqual(req.locale, "fr") - - @override_settings(DEV_LANGUAGES=("de", "fr")) - def test_non_matching_locale(self): - locale = "zh" - path = "/the/dude/" - full_path = f"/{locale}{path}" - req = self.rf.get(full_path) - self.middleware.process_request(req) - self.assertEqual(req.path_info, full_path) - self.assertEqual(req.locale, "") - - @override_settings(DEV_LANGUAGES=("zh-CN", "zh-TW")) - def test_matching_main_language_to_sub_language(self): - locale = "zh" - path = "/the/dude/" - full_path = f"/{locale}{path}" - req = self.rf.get(full_path) - self.middleware.process_request(req) - self.assertEqual(req.path_info, path) - self.assertEqual(req.locale, "zh-CN") - - @override_settings(DEV_LANGUAGES=("es-ES", "fr")) - def test_matching_canonical(self): - locale = "es" - path = "/the/dude/" - full_path = f"/{locale}{path}" - req = self.rf.get(full_path) - self.middleware.process_request(req) - self.assertEqual(req.path_info, path) - self.assertEqual(req.locale, "es-ES") +from bedrock.base.middleware import ( + BedrockLangCodeFixupMiddleware, + BedrockLocaleMiddleware, +) @override_settings( @@ -209,3 +168,101 @@ def test_returns_500(self): "view.timings", tags=["view_path:bedrock.base.tests.urls.returns_500.GET", "module:bedrock.base.tests.urls.GET", "method:GET", "status_code:500"], ) + + +@pytest.mark.parametrize( + "request_path, expected_status_code, expected_dest, expected_request_locale", + ( + ( + "/", + 302, + "/de/", # because accept-language header has de as default lang, + None, + ), + ("/en-us/", 302, "/en-US/", None), + ("/en-US/", 200, None, "en-US"), + ("/de/", 200, None, "de"), + ("/en-US/i/am/a/path/", 200, None, "en-US"), + ("/de/i/am/a/path/", 200, None, "de"), + ("/en-us/path/to/thing/", 302, "/en-US/path/to/thing/", None), + ("/de-AT/path/to/thing/", 302, "/de/path/to/thing/", None), + ("/es-mx/path/here?to=thing&test=true", 302, "/es-MX/path/here?to=thing&test=true", None), + ("/en-us/path/to/an/éclair/", 302, "/en-US/path/to/an/%C3%A9clair/", None), + ("/it/path/to/an/éclair/", 200, None, "it"), + ("/sco/", 200, None, "sco"), + ("/de/path/to/thing/?lang=fr", 302, "/fr/path/to/thing/", None), + ("/de/path/to/thing/?lang=vv", 302, "/en-US/path/to/thing/", None), + ("/de/path/to/thing/?lang", 302, "/en-US/path/to/thing/", None), + ("/de/path/to/thing/?lang=fr&test=true&foo=bar", 302, "/fr/path/to/thing/?test=true&foo=bar", None), + ), + ids=[ + "Bare root path", + "Lowercase lang code for root path", + "No change needed for root path with good locale 1", + "No change needed for root path with good locale 2", + "No change needed for deep path with good locale 1", + "No change needed for deep path with good locale 2", + "Lowercase lang code for deeper path", + "Unsuported lang goes to root supported lang code", + "Querystrings are preserved during fixup", + "Unicode escaped during fixup", + "Unicode accepted during pass-through", + "Three-letter locale acceptable", + "?lang querystring for valid locale", + "?lang querystring for invalid locale", + "?lang querystring with no value", + "?lang querystring for valid locale and further querystrings", + ], +) +def test_BedrockLangCodeFixupMiddleware( + request_path, + expected_status_code, + expected_dest, + expected_request_locale, + rf, +): + request = rf.get( + request_path, + HTTP_ACCEPT_LANGUAGE="de-DE,en-GB;q=0.4,en-US;q=0.2", + ) + + middleware = BedrockLangCodeFixupMiddleware(get_response=HttpResponse) + + resp = middleware.process_request(request) + + if resp: + assert resp.status_code == 302 + assert resp.headers["location"] == expected_dest + else: + # the request will have been annotated by the middleware + assert request.locale == expected_request_locale + + +@pytest.mark.django_db +def test_BedrockLangCodeFixupMiddleware__no_lang_info_gets_locale_page__end_to_end(client): + """Quick end-to-end test confirming the custom 404-locale template is rendered + at the / path when there is no accept-language header""" + + resp = client.get("/", follow=False) + assert "HTTP_ACCEPT_LANGUAGE" not in resp.request + assert resp.status_code == 200 + # this template use actually happens in lib.l10n_utils.render + assertTemplateUsed(resp, "404-locale.html") + + +@mock.patch("django.middleware.locale.LocaleMiddleware.process_request") +@mock.patch("django.middleware.locale.LocaleMiddleware.process_response") +def test_BedrockLocaleMiddleware_skips_super_call_if_path_is_for_root_and_has_no_lang_clues( + mock_django_localemiddleware_process_response, + mock_django_localemiddleware_process_request, + rf, +): + fake_request = rf.get("/") + assert "HTTP_ACCEPT_LANGUAGE" not in fake_request + middleware = BedrockLocaleMiddleware(fake_request) + middleware.process_request(fake_request) + assert not mock_django_localemiddleware_process_request.called + + fake_response = mock.Mock(name="fake response") + middleware.process_response(fake_request, fake_response) + assert not mock_django_localemiddleware_process_response.called diff --git a/bedrock/base/tests/test_urlresolvers.py b/bedrock/base/tests/test_urlresolvers.py deleted file mode 100644 index abdb49d3e3a..00000000000 --- a/bedrock/base/tests/test_urlresolvers.py +++ /dev/null @@ -1,164 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. - -from unittest.mock import Mock, patch - -from django.test import TestCase -from django.test.client import RequestFactory -from django.test.utils import override_settings -from django.urls import path - -import pytest - -from bedrock.base.urlresolvers import Prefixer, find_supported, reverse, split_path - - -@pytest.mark.parametrize( - "path, result", - [ - # Basic - ("en-US/some/action", ("en-US", "some/action")), - # First slash doesn't matter - ("/en-US/some/action", ("en-US", "some/action")), - # Nor does capitalization - ("En-uS/some/action", ("en-US", "some/action")), - # Unsupported languages return a blank language - ("unsupported/some/action", ("", "unsupported/some/action")), - ], -) -def test_split_path(path, result): - res = split_path(path) - assert res == result - - -# Test urlpatterns -urlpatterns = [path("test/", lambda r: None, name="test.view")] - - -class FakePrefixer: - def __init__(self, fix): - self.fix = fix - - -@patch("bedrock.base.urlresolvers.get_url_prefix") -@override_settings(ROOT_URLCONF="bedrock.base.tests.test_urlresolvers") -class TestReverse(TestCase): - def test_unicode_url(self, get_url_prefix): - # If the prefixer returns a unicode URL it should be escaped and cast - # as a str object. - get_url_prefix.return_value = FakePrefixer(lambda p: f"/Françoi{p}") - result = reverse("test.view") - - # Ensure that UTF-8 characters are escaped properly. - self.assertEqual(result, "/Fran%C3%A7oi/test/") - self.assertEqual(type(result), str) - - -class TestPrefixer(TestCase): - def setUp(self): - self.factory = RequestFactory() - - @override_settings(LANGUAGE_CODE="en-US") - def test_get_language_default_language_code(self): - """ - Should return default set by settings.LANGUAGE_CODE if no 'lang' - url parameter and no Accept-Language header - """ - request = self.factory.get("/") - self.assertFalse("lang" in request.GET) - self.assertFalse(request.headers.get("Accept-Language")) - prefixer = Prefixer(request) - assert prefixer.get_language() == "en-US" - - def test_get_language_returns_best(self): - """ - Should pass Accept-Language header value to get_best_language - and return result - """ - request = self.factory.get("/") - request.META["HTTP_ACCEPT_LANGUAGE"] = "de, es" - prefixer = Prefixer(request) - prefixer.get_best_language = Mock(return_value="de") - assert prefixer.get_language() == "de" - prefixer.get_best_language.assert_called_once_with("de, es") - - @override_settings(LANGUAGE_CODE="en-US") - def test_get_language_no_best(self): - """ - Should return default set by settings.LANGUAGE_CODE if - get_best_language return value is None - """ - request = self.factory.get("/") - request.META["HTTP_ACCEPT_LANGUAGE"] = "de, es" - prefixer = Prefixer(request) - prefixer.get_best_language = Mock(return_value=None) - assert prefixer.get_language() == "en-US" - prefixer.get_best_language.assert_called_once_with("de, es") - - @override_settings(LANGUAGE_URL_MAP={"en-us": "en-US", "de": "de"}) - def test_get_best_language_exact_match(self): - """ - Should return exact match if it is in settings.LANGUAGE_URL_MAP - """ - request = self.factory.get("/") - prefixer = Prefixer(request) - assert prefixer.get_best_language("de, es") == "de" - - @override_settings(LANGUAGE_URL_MAP={"en-gb": "en-GB", "en-us": "en-US", "es-ar": "es-AR"}, CANONICAL_LOCALES={"es": "es-ES", "en": "en-US"}) - def test_get_best_language_prefix_match(self): - """ - Should return a language with a matching prefix from - settings.LANGUAGE_URL_MAP + settings.CANONICAL_LOCALES if it exists but - no exact match does - """ - request = self.factory.get("/") - prefixer = Prefixer(request) - assert prefixer.get_best_language("en") == "en-US" - assert prefixer.get_best_language("en-CA") == "en-US" - assert prefixer.get_best_language("en-GB") == "en-GB" - assert prefixer.get_best_language("en-US") == "en-US" - assert prefixer.get_best_language("es") == "es-ES" - assert prefixer.get_best_language("es-AR") == "es-AR" - assert prefixer.get_best_language("es-CL") == "es-ES" - assert prefixer.get_best_language("es-MX") == "es-ES" - - @override_settings(LANGUAGE_URL_MAP={"en-us": "en-US"}) - def test_get_best_language_no_match(self): - """ - Should return None if there is no exact match or matching - prefix - """ - request = self.factory.get("/") - prefixer = Prefixer(request) - assert prefixer.get_best_language("de") is None - - @override_settings(LANGUAGE_URL_MAP={"en-ar": "en-AR", "en-gb": "en-GB", "en-us": "en-US"}, CANONICAL_LOCALES={"en": "en-US"}) - def test_prefixer_with_non_supported_locale(self): - """ - Should set prefixer.locale to a supported locale that repects CANONICAL_LOCALES - when given a URL with a non-supported locale. - """ - request = self.factory.get("/en-CA/") - prefixer = Prefixer(request) - assert prefixer.locale == "en-US" - - -@override_settings(LANGUAGE_URL_MAP={"es-ar": "es-AR", "en-gb": "en-GB", "es-us": "es-US"}, CANONICAL_LOCALES={"es": "es-ES", "en": "en-US"}) -class TestFindSupported(TestCase): - def test_find_supported(self): - assert find_supported("en-CA") == "en-US" - assert find_supported("en-US") == "en-US" - assert find_supported("en-GB") == "en-GB" - assert find_supported("en") == "en-US" - assert find_supported("es-MX") == "es-ES" - assert find_supported("es-AR") == "es-AR" - assert find_supported("es") == "es-ES" - - def test_find_supported_none(self): - """ - Should return None if it can't find any supported locale. - """ - assert find_supported("de") is None - assert find_supported("fr") is None - assert find_supported("dude") is None diff --git a/bedrock/base/tests/test_urls.py b/bedrock/base/tests/test_urls.py new file mode 100644 index 00000000000..d70e86e8b61 --- /dev/null +++ b/bedrock/base/tests/test_urls.py @@ -0,0 +1,44 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +import pytest + +pytestmark = pytest.mark.django_db + + +def _test( + url, + client, + follow=False, + expected_status=200, + expected_content=None, +): + resp = client.get(url, follow=follow) + assert resp.status_code == expected_status + if expected_content is not None: + assert expected_content in str(resp.content) + + +def test_healthz(client): + _test( + url="/healthz/", + client=client, + expected_content="pong", + ) + + +def test_readiness(client): + _test( + url="/readiness/", + client=client, + ) + + +def test_healthz_cron(client): + _test( + url="/healthz-cron/", + client=client, + expected_content="Time Since Last Cron Task Runs", + expected_status=500, # because an unsynced DB returns a 500, but with the correct HTML + ) diff --git a/bedrock/base/urlresolvers.py b/bedrock/base/urlresolvers.py index e31fb2a5a39..2652ebc4b96 100644 --- a/bedrock/base/urlresolvers.py +++ b/bedrock/base/urlresolvers.py @@ -2,132 +2,30 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. -from threading import local +import contextlib -from django.conf import settings from django.urls import reverse as django_reverse -from django.utils.encoding import iri_to_uri -from django.utils.functional import lazy -from django.utils.translation.trans_real import parse_accept_lang_header +from django.utils import translation -# Thread-local storage for URL prefixes. Access with (get|set)_url_prefix. -_local = local() - -def set_url_prefix(prefix): - """Set the ``prefix`` for the current thread.""" - _local.prefix = prefix - - -def get_url_prefix(): - """Get the prefix for the current thread, or None.""" - return getattr(_local, "prefix", None) - - -def reverse(viewname, urlconf=None, args=None, kwargs=None): - """ - Wraps Django's reverse to prepend the correct locale. - - Note: This currently doesn't support passing `current_app`, but we're not - using that in bedrock. If we need it in the future this is the place to add - it. - - """ - prefixer = get_url_prefix() - - url = django_reverse(viewname, urlconf, args, kwargs) - if prefixer: - url = prefixer.fix(url) - - # Ensure any unicode characters in the URL are escaped. - return iri_to_uri(url) - - -reverse_lazy = lazy(reverse, str) - - -def _get_language_map(): +def reverse( + viewname, + urlconf=None, + args=None, + kwargs=None, + current_app=None, + locale=None, +): """ - Return a complete dict of language -> URL mappings, including the canonical - short locale maps (e.g. es -> es-ES and en -> en-US). - :return: dict + Thin wrapper around Django's reverse that allows you to force the locale to something + other than the currently activated locale. Does NOT do prefixing - we use i18n_patterns + for that """ - LUM = settings.LANGUAGE_URL_MAP - langs = dict(list(LUM.items()) + list(settings.CANONICAL_LOCALES.items())) - # Add missing short locales to the list. This will automatically map - # en to en-GB (not en-US), es to es-AR (not es-ES), etc. in alphabetical - # order. To override this behavior, explicitly define a preferred locale - # map with the CANONICAL_LOCALES setting. - langs.update((k.split("-")[0], v) for k, v in LUM.items() if k.split("-")[0] not in langs) - return langs - - -# lazy for easier testing mostly -FULL_LANGUAGE_MAP = lazy(_get_language_map, dict)() - - -def find_supported(lang): - lang = lang.lower() - if lang in FULL_LANGUAGE_MAP: - return FULL_LANGUAGE_MAP[lang] - pre = lang.split("-")[0] - if pre in FULL_LANGUAGE_MAP: - return FULL_LANGUAGE_MAP[pre] - - -def split_path(path_): - """ - Split the requested path into (locale, path). - - locale will be empty if it isn't found. - """ - path = path_.lstrip("/") - - # Use partition instead of split since it always returns 3 parts - first, _, rest = path.partition("/") - - supported = find_supported(first) - if supported: - return supported, rest - else: - return "", path - - -class Prefixer: - def __init__(self, request): - self.request = request - self.locale, self.shortened_path = split_path(request.path_info) - - def get_language(self): - """ - Return a locale code we support on the site using the - user's Accept-Language header to determine which is best. This - mostly follows the RFCs but read bug 439568 for details. - """ - accept_lang = self.request.headers.get("Accept-Language") - if accept_lang: - best = self.get_best_language(accept_lang) - if best: - return best - - return settings.LANGUAGE_CODE - - def get_best_language(self, accept_lang): - """Given an Accept-Language header, return the best-matching language.""" - ranked = parse_accept_lang_header(accept_lang) - for lang, _ in ranked: - supported = find_supported(lang) - if supported: - return supported - - def fix(self, path): - url_parts = [self.request.META["SCRIPT_NAME"]] - - prefix = path.lstrip("/").partition("/")[0] - if prefix not in settings.SUPPORTED_NONLOCALES or path not in settings.SUPPORTED_LOCALE_IGNORE: - locale = self.locale if self.locale else self.get_language() - url_parts.append(locale) - - url_parts.append(path.lstrip("/")) - - return "/".join(url_parts) + with translation.override(locale) if locale else contextlib.nullcontext(): + return django_reverse( + viewname, + urlconf=urlconf, + args=args, + kwargs=kwargs, + current_app=current_app, + ) diff --git a/bedrock/careers/redirects.py b/bedrock/careers/redirects.py new file mode 100644 index 00000000000..a0f31fc8866 --- /dev/null +++ b/bedrock/careers/redirects.py @@ -0,0 +1,10 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +from bedrock.redirects.util import redirect + +redirectpatterns = ( + # kept seprarate from urls.py + redirect(r"^careers/internships/$", "careers.home", name="careers.internships", permanent=False, locale_prefix=False), +) diff --git a/bedrock/careers/urls.py b/bedrock/careers/urls.py index ca93f5742aa..b32730630fa 100644 --- a/bedrock/careers/urls.py +++ b/bedrock/careers/urls.py @@ -6,7 +6,6 @@ from django.urls import path, re_path from bedrock.mozorg.util import page -from bedrock.redirects.util import redirect from . import views from .feeds import LatestPositionsFeed @@ -21,7 +20,6 @@ path("diversity/", views.DiversityView.as_view(), name="careers.diversity"), path("teams/", views.TeamsView.as_view(), name="careers.teams"), path("locations/", views.LocationsView.as_view(), name="careers.locations"), - redirect(r"^internships/$", "careers.home", name="careers.internships", permanent=False, locale_prefix=False), ] diff --git a/bedrock/contentcards/tests/test_models.py b/bedrock/contentcards/tests/test_models.py index 5348acd2a86..0f2a5debc4e 100644 --- a/bedrock/contentcards/tests/test_models.py +++ b/bedrock/contentcards/tests/test_models.py @@ -56,7 +56,7 @@ def test_card_data(self): self.assertEqual(card1.page_name, "home") self.assertEqual(card1.card_name, "card_1") self.assertEqual(card1.locale, "en-US") - with self.activate("de"): + with self.activate_locale("de"): card_data = card1.card_data self.assertDictEqual( diff --git a/bedrock/firefox/tests/test_base.py b/bedrock/firefox/tests/test_base.py index 2c5dfb9f1bb..0bb454fdf4e 100644 --- a/bedrock/firefox/tests/test_base.py +++ b/bedrock/firefox/tests/test_base.py @@ -30,7 +30,7 @@ def setUp(self): self.patcher = patch.dict(jinja_env.globals, download_firefox=self.button_mock) self.patcher.start() self.view_name = "firefox.installer-help" - with self.activate("en-US"): + with self.activate_locale("en-US"): self.url = reverse(self.view_name) def tearDown(self): diff --git a/bedrock/foundation/redirects.py b/bedrock/foundation/redirects.py index b33cbedba42..00a6f6ddb9c 100644 --- a/bedrock/foundation/redirects.py +++ b/bedrock/foundation/redirects.py @@ -16,4 +16,6 @@ redirect(r"^foundation/documents/domain-name-license\.pdf$", "/foundation/trademarks/policy/"), redirect(r"^foundation/trademarks/poweredby/faq/?$", "/foundation/trademarks/policy/"), redirect(r"^foundation/trademarks/l10n-website-policy/?$", "/foundation/trademarks/policy/"), + # Issue 9727 + redirect(r"^foundation/annualreport(/2022)?/?$", "https://stateof.mozilla.org/", name="foundation.annualreport", locale_prefix=False), ) diff --git a/bedrock/foundation/urls.py b/bedrock/foundation/urls.py index 594f4dcb669..810f1e0d19a 100644 --- a/bedrock/foundation/urls.py +++ b/bedrock/foundation/urls.py @@ -3,11 +3,8 @@ # file, You can obtain one at https://mozilla.org/MPL/2.0/. from bedrock.mozorg.util import page -from bedrock.redirects.util import redirect urlpatterns = ( - # Issue 9727 - redirect(r"^annualreport(/2022)?/?$", "https://stateof.mozilla.org/", name="foundation.annualreport", locale_prefix=False), # Older annual report financial faqs - these are linked from blog posts # was e.g.: http://www.mozilla.org/foundation/documents/mozilla-2008-financial-faq.html page("documents/mozilla-2006-financial-faq/", "foundation/documents/mozilla-2006-financial-faq.html"), diff --git a/bedrock/legal/tests/test_forms.py b/bedrock/legal/tests/test_forms.py index 1222da6a653..69420e460b3 100644 --- a/bedrock/legal/tests/test_forms.py +++ b/bedrock/legal/tests/test_forms.py @@ -20,7 +20,7 @@ class TestFraudReport(TestCase): def setUp(self): self.factory = RequestFactory() - with self.activate("en-US"): + with self.activate_locale("en-US"): self.url = reverse("legal.fraud-report") self.data = { diff --git a/bedrock/mozorg/nonlocale_urls.py b/bedrock/mozorg/nonlocale_urls.py new file mode 100644 index 00000000000..c5feee47898 --- /dev/null +++ b/bedrock/mozorg/nonlocale_urls.py @@ -0,0 +1,39 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +"""URL paths which must be prefixed with a language code. + +These are included in the main URLConf via the regular `patterns` function, +which will mean they are NOT prefixed with a language code. + +Note that these are derived from the list of SUPPORTED_NONLOCALES and +SUPPORTED_LOCALE_IGNORE that were used in our former i18n machinery, and +which (currently) remain in use, so these two sources need to be kept in +sync with the urlpatterns below + +Also, redirects from mozorg.urls were moved into here, so that they +don't get miss a lookup if they lack a locale code at the start of their path +""" + +from django.urls import path + +from . import views +from .util import page + +urlpatterns = ( + path("credits/", views.credits_view, name="mozorg.credits"), + page("credits/faq/", "mozorg/credits-faq.html"), + path("robots.txt", views.Robots.as_view(), name="robots.txt"), + path(".well-known/security.txt", views.SecurityDotTxt.as_view(), name="security.txt"), + # namespaces + path("2004/em-rdf", views.namespaces, {"namespace": "em-rdf"}), + path("2005/app-update", views.namespaces, {"namespace": "update"}), + path("2006/addons-blocklist", views.namespaces, {"namespace": "addons-bl"}), + path("2006/browser/search/", views.namespaces, {"namespace": "mozsearch"}), + path("keymaster/gatekeeper/there.is.only.xul", views.namespaces, {"namespace": "xul"}), + path("microsummaries/0.1", views.namespaces, {"namespace": "microsummaries"}), + path("projects/xforms/2005/type", views.namespaces, {"namespace": "xforms-type"}), + path("xbl", views.namespaces, {"namespace": "xbl"}), + path("locales/", views.locales, name="mozorg.locales"), +) diff --git a/bedrock/mozorg/redirects.py b/bedrock/mozorg/redirects.py index 40e99fcc337..5144d92d9c1 100644 --- a/bedrock/mozorg/redirects.py +++ b/bedrock/mozorg/redirects.py @@ -573,4 +573,8 @@ def decider(request, **kwargs): # Issue 14351 redirect(r"^research/?$", "https://foundation.mozilla.org/research/"), redirect(r"^research/cc/?$", "https://foundation.mozilla.org/research/library/?topics=187"), + # redirects that don't need a lang code prefix + redirect(r"^contact/spaces/paris/$", "mozorg.contact.spaces.spaces-landing", locale_prefix=False), + redirect(r"^diversity/$", "mozorg.diversity.2022.index", name="diversity", locale_prefix=False), + redirect(r"^webvision/?$", "mozorg.about.webvision.summary", name="webvision", locale_prefix=True, prepend_locale=False), ) diff --git a/bedrock/mozorg/tests/__init__.py b/bedrock/mozorg/tests/__init__.py index de4cade95dd..9c77586ca84 100644 --- a/bedrock/mozorg/tests/__init__.py +++ b/bedrock/mozorg/tests/__init__.py @@ -4,9 +4,8 @@ from contextlib import contextmanager -from django.test import RequestFactory, TestCase as DjTestCase +from django.test import TestCase as DjTestCase -from bedrock.base.urlresolvers import Prefixer, get_url_prefix, set_url_prefix from lib.l10n_utils import translation @@ -14,13 +13,8 @@ class TestCase(DjTestCase): """Base class for Bedrock test cases.""" @contextmanager - def activate(self, locale): + def activate_locale(self, locale): """Context manager that temporarily activates a locale.""" - old_prefix = get_url_prefix() - old_locale = translation.get_language() - rf = RequestFactory() - set_url_prefix(Prefixer(rf.get(f"/{locale}/"))) translation.activate(locale) yield - set_url_prefix(old_prefix) - translation.activate(old_locale) + translation.deactivate() diff --git a/bedrock/mozorg/tests/contentful_test_urlconf.py b/bedrock/mozorg/tests/contentful_test_urlconf.py new file mode 100644 index 00000000000..1981995062d --- /dev/null +++ b/bedrock/mozorg/tests/contentful_test_urlconf.py @@ -0,0 +1,20 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +# A test-friendly version of dev_urls that includes them as i18n_pattern-ed URLs +# like they would be in production + +# URLS only available with settings.DEV is enabled +from django.urls import path + +from bedrock.base.i18n import bedrock_i18n_patterns +from bedrock.mozorg import views + +urlpatterns = bedrock_i18n_patterns( + path( + "contentful-preview//", + views.ContentfulPreviewView.as_view(), + name="contentful.preview", + ), +) diff --git a/bedrock/mozorg/tests/test_views.py b/bedrock/mozorg/tests/test_views.py index 0c14aa146cb..b8d52353b26 100644 --- a/bedrock/mozorg/tests/test_views.py +++ b/bedrock/mozorg/tests/test_views.py @@ -23,13 +23,13 @@ class TestViews(TestCase): @patch.dict(os.environ, FUNNELCAKE_5_LOCALES="en-US", FUNNELCAKE_5_PLATFORMS="win") def test_download_button_funnelcake(self): """The download button should have the funnelcake ID.""" - with self.activate("en-US"): + with self.activate_locale("en-US"): resp = self.client.get(reverse("firefox.download.thanks"), {"f": "5"}) assert b"product=firefox-stub-f5&" in resp.content def test_download_button_bad_funnelcake(self): """The download button should not have a bad funnelcake ID.""" - with self.activate("en-US"): + with self.activate_locale("en-US"): resp = self.client.get(reverse("firefox.download.thanks"), {"f": "5dude"}) assert b"product=firefox-stub&" in resp.content assert b"product=firefox-stub-f5dude&" not in resp.content @@ -157,7 +157,7 @@ def test_no_post(self, render_mock): # viable when the tests were being run in CI or via Makefile, so # instead we're explicitly including the urlconf that is loaded # when settings.DEV is True -@pytest.mark.urls("bedrock.mozorg.dev_urls") +@pytest.mark.urls("bedrock.mozorg.tests.contentful_test_urlconf") def test_contentful_preview_view( contentfulpage_mock, render_mock, @@ -191,8 +191,27 @@ def test_missing_doc_is_404(self): self.assertEqual(resp.status_code, 404) -class TestWebvisionRedirect(TestCase): - def test_redirect(self): +class TestMozorgRedirects(TestCase): + """Test redirects that are in bedrock.mozorg.nonlocale_urls""" + + def test_projects_calendar_redirect(self): + resp = self.client.get("/projects/calendar/", follow=True) + # Note that we now 301 straight to the lang-prefixed version of the destination of the redirect + self.assertEqual(resp.redirect_chain[0], ("https://www.thunderbird.net/calendar/", 301)) + + def test_paris_office_redirect(self): + resp = self.client.get("/contact/spaces/paris/", follow=True, headers={"accept-language": "en"}) + # Note that we now 301 straight to the lang-prefixed version of the destination of the redirect + self.assertEqual(resp.redirect_chain[0], ("/contact/spaces/", 301)) + self.assertEqual(resp.redirect_chain[1], ("/en-US/contact/spaces/", 302)) + + def test_diversity_redirect(self): + resp = self.client.get("/diversity/", follow=True, headers={"accept-language": "en"}) + # Note that we now 301 straight to the lang-prefixed version of the destination of the redirect + self.assertEqual(resp.redirect_chain[0], ("/diversity/2022/", 301)) + self.assertEqual(resp.redirect_chain[1], ("/en-US/diversity/2022/", 302)) + + def test_webvision_redirect(self): # Since the webvision URL requires a WebvisionDoc to exist, we test this # here instead of in the redirects tests. WebvisionDoc.objects.create(name="summary", content="") @@ -205,7 +224,7 @@ class TestMiecoEmail(TestCase): def setUp(self): self.factory = RequestFactory() self.view = views.mieco_email_form - with self.activate("en-US"): + with self.activate_locale("en-US"): self.url = reverse("mozorg.email_mieco") self.data = { diff --git a/bedrock/mozorg/urls.py b/bedrock/mozorg/urls.py index 2679305de2d..e3b4b9e9a41 100644 --- a/bedrock/mozorg/urls.py +++ b/bedrock/mozorg/urls.py @@ -2,16 +2,24 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. +"""URL paths which must be prefixed with a language code. + +These are included in the main URLConf via i18n_patterns, +which will take care of the prefixing an appropriate language code + +IMPORTANT: if a redirect is needed for a non-localed URL (eg /webvision/) +it must go in mozorg.nonlocale_urls, not this file + +""" + from django.conf import settings from django.urls import path -from bedrock.redirects.util import redirect - from . import views from .dev_urls import urlpatterns as dev_only_urlpatterns from .util import page -urlpatterns = ( +urlpatterns = [ path("", views.HomeView.as_view(), name="mozorg.home"), page("about/", "mozorg/about/index.html", ftl_files=["mozorg/about"]), page("about/manifesto/", "mozorg/about/manifesto.html", ftl_files=["mozorg/about/manifesto"]), @@ -28,12 +36,7 @@ page("about/policy/patents/guide/", "mozorg/about/policy/patents/guide.html"), page("about/this-site/", "mozorg/about/this-site.html", ftl_files=["mozorg/about/this-site.ftl"]), page("book/", "mozorg/book.html"), - path("credits/", views.credits_view, name="mozorg.credits"), - page("credits/faq/", "mozorg/credits-faq.html"), page("about/history/", "mozorg/about/history.html", ftl_files=["mozorg/about/history"]), - # Bug 981063, catch all for old calendar urls. - # must be here to avoid overriding the above - redirect(r"^projects/calendar/", "https://www.thunderbird.net/calendar/", locale_prefix=False), page("mission/", "mozorg/mission.html", ftl_files=["mozorg/mission"]), path("about/forums/", views.forums_view, name="mozorg.about.forums.forums"), page("about/forums/etiquette/", "mozorg/about/forums/etiquette.html"), @@ -103,20 +106,7 @@ page("moss/foundational-technology/", "mozorg/moss/foundational-technology.html"), page("moss/mission-partners/", "mozorg/moss/mission-partners.html"), page("moss/secure-open-source/", "mozorg/moss/secure-open-source.html"), - path("robots.txt", views.Robots.as_view(), name="robots.txt"), - path(".well-known/security.txt", views.SecurityDotTxt.as_view(), name="security.txt"), - # namespaces - path("2004/em-rdf", views.namespaces, {"namespace": "em-rdf"}), - path("2005/app-update", views.namespaces, {"namespace": "update"}), - path("2006/addons-blocklist", views.namespaces, {"namespace": "addons-bl"}), - path("2006/browser/search/", views.namespaces, {"namespace": "mozsearch"}), - path("keymaster/gatekeeper/there.is.only.xul", views.namespaces, {"namespace": "xul"}), - path("microsummaries/0.1", views.namespaces, {"namespace": "microsummaries"}), - path("projects/xforms/2005/type", views.namespaces, {"namespace": "xforms-type"}), - path("xbl", views.namespaces, {"namespace": "xbl"}), - path("locales/", views.locales, name="mozorg.locales"), - # Diversity and inclusion redirect - redirect(r"^diversity/$", "mozorg.diversity.2022.index", name="diversity", locale_prefix=False), + # Diversity and inclusion redirect has moved to mozorg.nonlocale_urls # Main paths page("diversity/2021/", "mozorg/diversity/2021/index.html"), page("diversity/2021/mozilla-foundation-data/", "mozorg/diversity/2021/mofo-data.html"), @@ -133,7 +123,7 @@ page("sustainability/carbon-neutral/", "mozorg/sustainability/carbon-neutral.html"), page("sustainability/emissions-data/", "mozorg/sustainability/emissions-data.html"), # Webvision - redirect(r"^webvision/?$", "mozorg.about.webvision.summary", name="webvision", locale_prefix=False), + # there's also a redirect in mozorg.nonlocale_urls path( "about/webvision/", views.WebvisionDocView.as_view(template_name="mozorg/about/webvision/summary.html", doc_name="summary"), @@ -156,7 +146,7 @@ path("antiharassment-tool/", views.anti_harassment_tool_view, name="mozorg.antiharassment-tool"), page("rise25/nominate/", "mozorg/rise25/landing.html"), page("rise25/thanks/", "mozorg/rise25/thanks.html"), -) +] if settings.DEV: urlpatterns += dev_only_urlpatterns diff --git a/bedrock/newsletter/tests/test_footer_form.py b/bedrock/newsletter/tests/test_footer_form.py index c0f6c2dc416..9406077b460 100644 --- a/bedrock/newsletter/tests/test_footer_form.py +++ b/bedrock/newsletter/tests/test_footer_form.py @@ -22,18 +22,18 @@ def test_country_selected(self): """ The correct country for the locale should be initially selected. """ - with self.activate("en-US"): + with self.activate_locale("en-US"): resp = self.client.get(reverse(self.view_name)) doc = pq(resp.content) assert doc('#id_country option[selected="selected"]').val() == "us" # no country in locale, no country selected - with self.activate("fr"): + with self.activate_locale("fr"): resp = self.client.get(reverse(self.view_name)) doc = pq(resp.content) assert doc('#id_country option[selected="selected"]').val() == "" - with self.activate("pt-BR"): + with self.activate_locale("pt-BR"): resp = self.client.get(reverse(self.view_name)) doc = pq(resp.content) assert doc('#id_country option[selected="selected"]').val() == "br" @@ -44,19 +44,19 @@ def test_language_selected(self): The correct language for the locale should be initially selected or 'en' if it's not an option. """ - with self.activate("fr"): + with self.activate_locale("fr"): resp = self.client.get(reverse(self.view_name)) doc = pq(resp.content) assert doc('#id_lang option[selected="selected"]').val() == "fr" # with hyphenated regional locale, should have only lang - with self.activate("pt-BR"): + with self.activate_locale("pt-BR"): resp = self.client.get(reverse(self.view_name)) doc = pq(resp.content) assert doc('#id_lang option[selected="selected"]').val() == "pt" # not supported. should default to '' - with self.activate("af"): + with self.activate_locale("af"): resp = self.client.get(reverse(self.view_name)) doc = pq(resp.content) assert doc('#id_lang option[selected="selected"]').val() == "" diff --git a/bedrock/press/tests/test_base.py b/bedrock/press/tests/test_base.py index e13c891279b..aec2ff320b3 100644 --- a/bedrock/press/tests/test_base.py +++ b/bedrock/press/tests/test_base.py @@ -18,7 +18,7 @@ class TestPressInquiry(TestCase): def setUp(self): self.factory = RequestFactory() self.view = press_views.PressInquiryView.as_view() - with self.activate("en-US"): + with self.activate_locale("en-US"): self.url = reverse("press.press-inquiry") self.data = { @@ -147,7 +147,7 @@ class TestSpeakerRequest(TestCase): def setUp(self): self.factory = RequestFactory() self.view = press_views.SpeakerRequestView.as_view() - with self.activate("en-US"): + with self.activate_locale("en-US"): self.url = reverse("press.speaker-request") self.data = { diff --git a/bedrock/products/tests/test_helper_misc.py b/bedrock/products/tests/test_helper_misc.py index 01cae383681..ff957779811 100644 --- a/bedrock/products/tests/test_helper_misc.py +++ b/bedrock/products/tests/test_helper_misc.py @@ -1644,7 +1644,7 @@ def _render( optional_attributes=None, optional_parameters=None, ): - with self.activate("en-US"): + with self.activate_locale("en-US"): req = self.rf.get("/") req.locale = "en-US" diff --git a/bedrock/products/tests/test_views.py b/bedrock/products/tests/test_views.py index 271b8b181bc..feafa401ded 100644 --- a/bedrock/products/tests/test_views.py +++ b/bedrock/products/tests/test_views.py @@ -381,7 +381,9 @@ def _request( expected_status=200, ): querystring = querystring if querystring else {} - resp = self.client.get(reverse("products.vpn.resource-center.landing"), querystring, headers={"accept-language": locale}, follow=True) + with self.activate_locale(locale): + dest = reverse("products.vpn.resource-center.landing") + resp = self.client.get(dest, querystring, headers={"accept-language": locale}, follow=True) assert resp.status_code == expected_status return resp @@ -392,7 +394,7 @@ def test_simple_get__for_valid_locale_with_enough_content(self, render_mock): self.assertEqual(passed_context["active_locales"], ["en-US", "fr", "ja"]) self.assertEqual( passed_context["category_list"], - [{"name": "Test Category", "url": "/products/vpn/resource-center/?category=Test+Category"}], + [{"name": "Test Category", "url": "/fr/products/vpn/resource-center/?category=Test+Category"}], ) self.assertEqual(passed_context["selected_category"], "") self.assertEqual( @@ -418,10 +420,6 @@ def test_simple_get__for_unavailable_locale(self, render_mock): resp = self._request(locale="sk") self.assertEqual(resp.redirect_chain, [("/en-US/products/vpn/resource-center/", 302)]) - def test_simple_get__for_invalid_locale(self, render_mock): - resp = self._request(locale="xx") - self.assertEqual(resp.redirect_chain, [("/en-US/products/vpn/resource-center/", 302)]) - @override_settings(CONTENTFUL_LOCALE_SUFFICIENT_CONTENT_PERCENTAGE=95) def test_simple_get__for_valid_locale_WITHOUT_enough_content(self, render_mock): # ie, if you go to the VRC for a language we're working on but which @@ -449,8 +447,8 @@ def test_category_filtering(self, render_mock): self.assertEqual( passed_context["category_list"], [ - {"name": "Test Category", "url": "/products/vpn/resource-center/?category=Test+Category"}, - {"name": "other category", "url": "/products/vpn/resource-center/?category=other+category"}, + {"name": "Test Category", "url": "/en-US/products/vpn/resource-center/?category=Test+Category"}, + {"name": "other category", "url": "/en-US/products/vpn/resource-center/?category=other+category"}, ], ) self.assertEqual(passed_context["selected_category"], "other category") @@ -522,14 +520,7 @@ def test_simple_get(self, render_mock): @patch("bedrock.products.views.l10n_utils.render", return_value=HttpResponse()) def test_simple_get__for_unavailable_locale(self, render_mock): - resp = self.client.get("/products/vpn/resource-center/slug-2/", headers={"accept-language": "de"}) - # Which will 302 as expected - self.assertEqual(resp.headers["location"], "/en-US/products/vpn/resource-center/slug-2/") - render_mock.assert_not_called() - - @patch("bedrock.products.views.l10n_utils.render", return_value=HttpResponse()) - def test_simple_get__for_invalid_locale(self, render_mock): - resp = self.client.get("/products/vpn/resource-center/slug-2/", headers={"accept-language": "xx"}) + resp = self.client.get("/de/products/vpn/resource-center/slug-2/") # Which will 302 as expected self.assertEqual(resp.headers["location"], "/en-US/products/vpn/resource-center/slug-2/") render_mock.assert_not_called() diff --git a/bedrock/redirects/redirects.py b/bedrock/redirects/redirects.py index 3b9aac60d48..156857e77c3 100644 --- a/bedrock/redirects/redirects.py +++ b/bedrock/redirects/redirects.py @@ -910,6 +910,7 @@ redirect(r"^projects/calendar/holidays/$", "https://www.thunderbird.net/calendar/holidays/"), # bug 885799, 952429 redirect(r"^projects/calendar/holidays\.html$", "https://www.thunderbird.net/calendar/holidays/"), + redirect(r"^projects/calendar/", "https://www.thunderbird.net/calendar/", locale_prefix=False), # Bug 981063, catch all for old calendar urls. redirect(r"^products/camino/badges/$", "http://caminobrowser.org/community/promotion/"), redirect(r"^products/camino/features/searchCustomization\.html$", "http://caminobrowser.org/help/"), redirect(r"^products/camino/features/tipsTricks\.html$", "http://caminobrowser.org/help/"), diff --git a/bedrock/redirects/util.py b/bedrock/redirects/util.py index 0553050989a..aa3f1157348 100644 --- a/bedrock/redirects/util.py +++ b/bedrock/redirects/util.py @@ -25,6 +25,7 @@ LOCALE_RE = r"^(?P\w{2,3}(?:-\w{2})?/)?" HTTP_RE = re.compile(r"^https?://", re.IGNORECASE) PROTOCOL_RELATIVE_RE = re.compile(r"^//+") + # redirects registry redirectpatterns = [] @@ -128,6 +129,13 @@ def _view(request, *args, **kwargs): return re_path(pattern, _view) +def _drop_lang_code(path): + first, slash, rest = path.lstrip("/").partition("/") + if first in [x[0] for x in settings.LANGUAGES]: + return slash + rest + return path + + def redirect( pattern, to, @@ -230,6 +238,10 @@ def _view(request, *args, **kwargs): else: try: redirect_url = reverse(to_value, args=to_args, kwargs=to_kwargs) + # reverse() will give us, by default, a redirection + # with settings.LANGAUGE_CODE as the prefixed language. + # We don't want this by default, only if explicitly requested + redirect_url = _drop_lang_code(redirect_url) except NoReverseMatch: # Assume it's a URL redirect_url = to_value diff --git a/bedrock/releasenotes/tests/test_base.py b/bedrock/releasenotes/tests/test_base.py index 94776580a57..5eba282f72d 100644 --- a/bedrock/releasenotes/tests/test_base.py +++ b/bedrock/releasenotes/tests/test_base.py @@ -32,7 +32,7 @@ class TestReleaseViews(TestCase): def setUp(self): ProductRelease.objects.refresh() caches["release-notes"].clear() - self.activate("en-US") + self.activate_locale("en-US") self.factory = RequestFactory() self.request = self.factory.get("/") @@ -370,7 +370,7 @@ def test_get_download_url_android(self): assert link.startswith(store_url % "org.mozilla.fenix") def test_check_url(self): - with self.activate("en-US"): + with self.activate_locale("en-US"): assert views.check_url("Firefox for Android", "45.0") == "https://support.mozilla.org/kb/will-firefox-work-my-mobile-device" assert views.check_url("Firefox for Android", "46.0") == "/en-US/firefox/android/46.0/system-requirements/" assert views.check_url("Firefox for iOS", "1.4") == "/en-US/firefox/ios/1.4/system-requirements/" @@ -405,7 +405,7 @@ def test_relnotes_index_firefox(self, render_mock): firefox_desktop = FirefoxDesktop(json_dir=DATA_PATH) with patch("bedrock.releasenotes.views.firefox_desktop", firefox_desktop): render_mock().render.return_value = HttpResponse("") - with self.activate("en-US"): + with self.activate_locale("en-US"): self.client.get(reverse("firefox.releases.index")) releases = render_mock.call_args[0][2]["releases"] assert len(releases) == len(firefox_desktop.firefox_history_major_releases) @@ -445,7 +445,7 @@ def test_relnotes_index_firefox(self, render_mock): class TestNotesRedirects(TestCase): def _test(self, url_from, url_to): - with self.activate("en-US"): + with self.activate_locale("en-US"): url = "/en-US" + url_from response = self.client.get(url) assert response.status_code == 302 @@ -510,7 +510,7 @@ def test_ios_release_version(self): class TestSysreqRedirect(TestCase): def _test(self, url_from, url_to): - with self.activate("en-US"): + with self.activate_locale("en-US"): url = "/en-US" + url_from response = self.client.get(url) assert response.status_code == 302 diff --git a/bedrock/releasenotes/tests/test_models.py b/bedrock/releasenotes/tests/test_models.py index deaef815245..f1d21f3b082 100644 --- a/bedrock/releasenotes/tests/test_models.py +++ b/bedrock/releasenotes/tests/test_models.py @@ -93,7 +93,7 @@ def test_equivalent_release_for_product_none_match(self): def test_note_fixed_in_release(self): rel = models.get_release("firefox", "55.0a1") note = rel.notes[11] - with self.activate("en-US"): + with self.activate_locale("en-US"): assert note.fixed_in_release.get_absolute_url() == "/en-US/firefox/55.0a1/releasenotes/" def test_field_processors(self): diff --git a/bedrock/settings/__init__.py b/bedrock/settings/__init__.py index 0d6ddc21683..ebcadd0ff04 100644 --- a/bedrock/settings/__init__.py +++ b/bedrock/settings/__init__.py @@ -74,7 +74,7 @@ } FALLBACK_LOCALES = { - # TODO: Drop after confirming no longer used _anywhere_ + # Not needed in Bedrock } PROD_LANGUAGES = [ @@ -102,6 +102,14 @@ DEV_LANGUAGES = PROD_LANGUAGES LANGUAGE_CODE = "en" # Pocket uses `en` not `en-US` + # Pocket mode doesn't need language names available for a lang picker, so we can just do this + # rather than fish around in product_details.languages. This is extra-handy because Pocket Mode + # uses language codes that don't all appear in product_details.languages + LANGUAGES = [(lang, lang) for lang in PROD_LANGUAGES] + + # We don't want any fallback lang support for Pocket mode, so let's override the Bedrock base default + LANGUAGE_URL_MAP_WITH_FALLBACKS = LANGUAGE_URL_MAP + COOKIE_CONSENT_SCRIPT_SRC = "https://cdn.cookielaw.org/scripttemplates/otSDKStub.js" COOKIE_CONSENT_DATA_DOMAIN = "a7ff9c31-9f59-421f-9a8e-49b11a3eb24e-test" if DEV else "a7ff9c31-9f59-421f-9a8e-49b11a3eb24e" diff --git a/bedrock/settings/base.py b/bedrock/settings/base.py index 6a41dcf03e8..3b6714a5c25 100644 --- a/bedrock/settings/base.py +++ b/bedrock/settings/base.py @@ -14,6 +14,7 @@ from os.path import abspath from pathlib import Path +from django.conf.locale import LANG_INFO # we patch this in bedrock.base.apps.BaseAppConfig # noqa: F401 from django.utils.functional import lazy import markus @@ -104,7 +105,7 @@ def data_path(*args): # If you set this to False, Django will make some optimizations so as not # to load the internationalization machinery. -USE_I18N = False +USE_I18N = True USE_TZ = True @@ -218,8 +219,16 @@ def data_path(*args): "Middle East and Africa": ["ach", "af", "ar", "az", "fa", "ff", "gu-IN", "he", "kab", "kn", "skr", "son", "xh"], } + +def _put_default_lang_first(langs, default_lang=LANGUAGE_CODE): + if langs.index(default_lang): + langs.pop(langs.index(default_lang)) + langs.insert(0, default_lang) + return langs + + # Our accepted production locales are the values from the above, plus an exception. -PROD_LANGUAGES = sorted(sum(LOCALES_BY_REGION.values(), []) + ["ja-JP-mac"]) +PROD_LANGUAGES = _put_default_lang_first(sorted(sum(LOCALES_BY_REGION.values(), [])) + ["ja-JP-mac"]) GITHUB_REPO = "https://github.com/mozilla/bedrock" @@ -309,8 +318,8 @@ def get_dev_languages(): return list(PROD_LANGUAGES) -DEV_LANGUAGES = get_dev_languages() -DEV_LANGUAGES.append("en-US") +DEV_LANGUAGES = _put_default_lang_first(get_dev_languages()) + # Map short locale names to long, preferred locale names. This # will be used in urlresolvers to determine the @@ -318,14 +327,14 @@ def get_dev_languages(): CANONICAL_LOCALES = { "en": "en-US", "es": "es-ES", - "ja-jp-mac": "ja", + "ja-JP-mac": "ja", "no": "nb-NO", "pt": "pt-BR", "sv": "sv-SE", - "zh-hant": "zh-TW", # Bug 1263193 - "zh-hant-tw": "zh-TW", # Bug 1263193 - "zh-hk": "zh-TW", # Bug 1338072 - "zh-hant-hk": "zh-TW", # Bug 1338072 + "zh-Hant": "zh-TW", # Bug 1263193 + "zh-Hant-TW": "zh-TW", # Bug 1263193 + "zh-HK": "zh-TW", # Bug 1338072 + "zh-Hant-HK": "zh-TW", # Bug 1338072 } # Unlocalized pages are usually redirected to the English (en-US) equivalent, @@ -362,7 +371,7 @@ def lazy_lang_url_map(): from django.conf import settings langs = settings.DEV_LANGUAGES if settings.DEV else settings.PROD_LANGUAGES - return {i.lower(): i for i in langs} + return {i: i for i in langs} def lazy_langs(): @@ -370,7 +379,12 @@ def lazy_langs(): Override Django's built-in with our native names Note: Unlike the above lazy methods, this one returns a list of tuples to - match Django's expectations. + match Django's expectations, BUT it has mixed-case lang codes, rather + than core Django's all-lowercase codes. This is because we work with + mixed-case codes and we'll need them in LANGUAGES when we use + Wagtail-Localize, as that has to be configured with a subset of LANGUAGES + + :return: list of tuples """ from django.conf import settings @@ -378,11 +392,32 @@ def lazy_langs(): from product_details import product_details langs = DEV_LANGUAGES if settings.DEV else settings.PROD_LANGUAGES - return [(lang.lower(), product_details.languages[lang]["native"]) for lang in langs if lang in product_details.languages] + return [(lang, product_details.languages[lang]["native"]) for lang in langs if lang in product_details.languages] + + +def language_url_map_with_fallbacks(): + """ + Return a complete dict of language -> URL mappings, including the canonical + short locale maps (e.g. es -> es-ES and en -> en-US), as well as fallback + mappings for language variations we don't support directly but via a nearest + match + + :return: dict + """ + lum = lazy_lang_url_map() + langs = dict(list(lum.items()) + list(CANONICAL_LOCALES.items())) + # Add missing short locales to the list. By default, this will automatically + # map en to en-GB (not en-US), etc. in alphabetical order. + # To override this behavior, explicitly define a preferred locale + # map with the CANONICAL_LOCALES setting. + langs.update((k.split("-")[0], v) for k, v in lum.items() if k.split("-")[0] not in langs) + + return langs LANG_GROUPS = lazy(lazy_lang_group, dict)() LANGUAGE_URL_MAP = lazy(lazy_lang_url_map, dict)() +LANGUAGE_URL_MAP_WITH_FALLBACKS = lazy(language_url_map_with_fallbacks, dict)() # used in normalize_language LANGUAGES = lazy(lazy_langs, list)() FEED_CACHE = 3900 @@ -398,31 +433,34 @@ def lazy_langs(): # from redirects.urls "media", "static", - "certs", - "images", - "credits", - "robots.txt", - ".well-known", - "telemetry", - "webmaker", - "contributor-data", - "healthz", - "readiness", - "healthz-cron", - "2004", - "2005", - "2006", - "keymaster", - "microsummaries", - "xbl", - "revision.txt", - "locales", - "sitemap_none.xml", + "certs", # Is this still used? + "images", # In redirects only + "contribute.json", # served from root_files I think + "credits", # in mozorg urls + "gameon", # redirect only + "robots.txt", # in mozorg urls + ".well-known", # in mozorg urls + "telemetry", # redirect only + "webmaker", # redirect only + "contributor-data", # Is this still used? + "healthz", # Needed for k8s + "readiness", # Needed for k8s + "healthz-cron", # status dash, in urls/mozorg_mode.py + "2004", # in mozorg urls + "2005", # in mozorg urls + "2006", # in mozorg urls + "keymaster", # in mozorg urls + "microsummaries", # in mozorg urls + "xbl", # in mozorg urls + "revision.txt", # from root_files + "locales", # in mozorg urls ] # Paths that can exist either with or without a locale code in the URL. # Matches the whole URL path -SUPPORTED_LOCALE_IGNORE = ["/sitemap.xml"] - +SUPPORTED_LOCALE_IGNORE = [ + "/sitemap_none.xml", # in sitemap urls + "/sitemap.xml", # in sitemap urls +] # Pages that we don't want to be indexed by search engines. # Only impacts sitemap generator. If you need to disallow indexing of # specific URLs, add them to mozorg/templates/mozorg/robots.txt. @@ -562,11 +600,13 @@ def get_app_name(hostname): "bedrock.mozorg.middleware.HostnameMiddleware", "django.middleware.http.ConditionalGetMiddleware", "corsheaders.middleware.CorsMiddleware", + # VaryNoCacheMiddleware must be above LocaleMiddleware" + # so that it can see the response has a vary on accept-language. "bedrock.mozorg.middleware.VaryNoCacheMiddleware", "bedrock.base.middleware.BasicAuthMiddleware", - # must come before LocaleURLMiddleware - "bedrock.redirects.middleware.RedirectsMiddleware", - "bedrock.base.middleware.LocaleURLMiddleware", + "bedrock.redirects.middleware.RedirectsMiddleware", # must come before BedrockLocaleMiddleware + "bedrock.base.middleware.BedrockLangCodeFixupMiddleware", # must come after RedirectsMiddleware + "bedrock.base.middleware.BedrockLocaleMiddleware", # wraps django.middleware.locale.LocaleMiddleware "bedrock.mozorg.middleware.ClacksOverheadMiddleware", "bedrock.base.middleware.MetricsStatusMiddleware", "bedrock.base.middleware.MetricsViewTimingMiddleware", diff --git a/bedrock/urls/mozorg_mode.py b/bedrock/urls/mozorg_mode.py index 5fd693c16f8..71ffcd07672 100644 --- a/bedrock/urls/mozorg_mode.py +++ b/bedrock/urls/mozorg_mode.py @@ -9,6 +9,7 @@ from watchman import views as watchman_views from bedrock.base import views as base_views +from bedrock.base.i18n import bedrock_i18n_patterns # The default django 404 and 500 handler doesn't run the ContextProcessors, # which breaks the base template page. So we replace them with views that do! @@ -16,8 +17,8 @@ handler404 = "bedrock.base.views.page_not_found_view" locale404 = "lib.l10n_utils.locale_selection" - -urlpatterns = ( +# Paths that should have a locale prefix +urlpatterns = bedrock_i18n_patterns( # Main pages path("foundation/", include("bedrock.foundation.urls")), path("about/legal/", include("bedrock.legal.urls")), @@ -26,23 +27,28 @@ path("products/", include("bedrock.products.urls")), path("security/", include("bedrock.security.urls")), path("", include("bedrock.firefox.urls")), - path("", include("bedrock.mozorg.urls")), + path("", include("bedrock.mozorg.urls")), # these are locale-needing URLs, vs mozorg.nonlocale_urls path("", include("bedrock.newsletter.urls")), - path("", include("bedrock.sitemaps.urls")), path("careers/", include("bedrock.careers.urls")), +) + +# Paths that must not have a locale prefix +urlpatterns += ( + path("", include("bedrock.mozorg.nonlocale_urls")), + path("", include("bedrock.sitemaps.urls")), path("healthz/", watchman_views.ping, name="watchman.ping"), path("readiness/", watchman_views.status, name="watchman.status"), path("healthz-cron/", base_views.cron_health_check), ) if settings.DEV: - urlpatterns += ( + urlpatterns += bedrock_i18n_patterns( # Add /404-locale/ for localizers. path("404-locale/", import_string(locale404)), ) if settings.DEBUG: - urlpatterns += ( + urlpatterns += bedrock_i18n_patterns( path("404/", import_string(handler404)), path("500/", import_string(handler500)), ) diff --git a/bedrock/urls/pocket_mode.py b/bedrock/urls/pocket_mode.py index b930b227d5e..5beefaf3c16 100644 --- a/bedrock/urls/pocket_mode.py +++ b/bedrock/urls/pocket_mode.py @@ -9,22 +9,25 @@ from watchman import views as watchman_views from bedrock.base import views as base_views +from bedrock.base.i18n import bedrock_i18n_patterns # The default django 404 and 500 handler doesn't run the ContextProcessors, # which breaks the base template page. So we replace them with views that do! handler500 = "bedrock.pocket.views.server_error_view" handler404 = "bedrock.pocket.views.page_not_found_view" - -urlpatterns = ( +urlpatterns = bedrock_i18n_patterns( path("", include("bedrock.pocket.urls")), +) + +urlpatterns += ( path("healthz/", watchman_views.ping, name="watchman.ping"), path("readiness/", watchman_views.status, name="watchman.status"), path("healthz-cron/", base_views.cron_health_check), ) if settings.DEBUG: - urlpatterns += ( + urlpatterns += bedrock_i18n_patterns( path("404/", import_string(handler404)), path("500/", import_string(handler500)), ) diff --git a/bin/run-tests.sh b/bin/run-tests.sh index f14671258d1..fa48cc815c0 100755 --- a/bin/run-tests.sh +++ b/bin/run-tests.sh @@ -12,10 +12,10 @@ moz-l10n-lint l10n-pocket/l10n-vendor.toml python manage.py lint_ftl -q python manage.py version python manage.py migrate --noinput -py.test lib bedrock \ +pytest lib bedrock \ --cov-config=.coveragerc \ --cov-report=html \ --cov-report=term-missing \ --cov-report=xml:python_coverage/coverage.xml \ --cov=. -py.test -r a tests/redirects +pytest -r a tests/redirects diff --git a/lib/l10n_utils/__init__.py b/lib/l10n_utils/__init__.py index d5410a74fbb..97715931c01 100644 --- a/lib/l10n_utils/__init__.py +++ b/lib/l10n_utils/__init__.py @@ -14,7 +14,8 @@ from product_details import product_details from bedrock.base import metrics -from bedrock.base.urlresolvers import _get_language_map, split_path +from bedrock.base.i18n import normalize_language, split_path_and_normalize_language +from bedrock.settings.base import language_url_map_with_fallbacks from .fluent import fluent_l10n, ftl_file_is_active, get_active_locales as ftl_active_locales @@ -52,11 +53,16 @@ def render_to_string(template_name, context=None, request=None, using=None, ftl_ return loader.render_to_string(template_name, context, request, using) +def is_root_path_with_no_language_clues(request): + return request.path_info == "/" and not request.headers.get("Accept-Language") + + def redirect_to_best_locale(request, translations): - # Strict only for the root URL. - strict = request.path_info == "/" and request.headers.get("Accept-Language") is None + # Strict only for the root URL when we have no language clues + strict = is_root_path_with_no_language_clues(request) # Note that translations is list of locale strings (eg ["en-GB", "ru", "fr"]) locale = get_best_translation(translations, get_accept_languages(request), strict) + if locale: return redirect_to_locale(request, locale) return locale_selection(request, translations) @@ -64,9 +70,10 @@ def redirect_to_best_locale(request, translations): def redirect_to_locale(request, locale, permanent=False): redirect_class = HttpResponsePermanentRedirect if permanent else HttpResponseRedirect - response = redirect_class("/" + "/".join([locale, split_path(request.get_full_path())[1]])) + original_prefix, subpath, _ = split_path_and_normalize_language(request.get_full_path()) + response = redirect_class("/" + "/".join([locale, subpath])) # Record count of redirects to this locale. - metrics.incr("locale.redirect", tags=[f"from_locale:{get_locale(request) or 'none'}", f"to_locale:{locale}"]) + metrics.incr("locale.redirect", tags=[f"from_locale:{original_prefix or 'none'}", f"to_locale:{locale}"]) # Add the Vary header to avoid wrong redirects due to a cache response["Vary"] = "Accept-Language" return response @@ -172,10 +179,11 @@ def render(request, template, context=None, ftl_files=None, activation_files=Non # matching locale. # Does that path's locale match the request's locale? - if locale in translations: + # AND is it NOT for the root path with no discernable lang? + if locale in translations and not is_root_path_with_no_language_clues(request): # Redirect to the locale if: # - The URL is the root path but is missing the trailing slash OR - # - The locale isn't in current prefix in the URL. + # - The locale isn't the current prefix in the URL if request.path == f"/{locale}" or locale != request.path.lstrip("/").partition("/")[0]: return redirect_to_locale(request, locale) else: @@ -193,7 +201,11 @@ def render(request, template, context=None, ftl_files=None, activation_files=Non def get_locale(request): - return getattr(request, "locale", settings.LANGUAGE_CODE) + # request.locale is added in bedrock.base.middleware.BedrockLangCodeFixupMiddleware + lang = getattr(request, "locale", None) + if not lang: + lang = settings.LANGUAGE_CODE + return normalize_language(lang) def get_accept_languages(request): @@ -215,14 +227,11 @@ def get_best_translation(translations, accept_languages, strict=False): If none found, it returns the first language code for the first available translation. """ - lang_map = _get_language_map() - # translations contains mixed-case items e.g. "en-US" while the keys - # of `lang_map` are all lowercase. But this works because the values - # of the `lang_map` dict are mixed-case like translations and the - # comparison below is with the values. + lang_map = language_url_map_with_fallbacks() + # translations contains mixed-case items e.g. "en-US" and the keys + # of `lang_map` are (now) also mixed case. valid_lang_map = {k: v for k, v in lang_map.items() if v in translations} for lang in accept_languages: - lang.lower() if lang in valid_lang_map: return valid_lang_map[lang] pre = lang.split("-")[0] diff --git a/lib/l10n_utils/fluent.py b/lib/l10n_utils/fluent.py index d238a29e61c..4e728e5ad03 100644 --- a/lib/l10n_utils/fluent.py +++ b/lib/l10n_utils/fluent.py @@ -144,7 +144,7 @@ def inner(*args, **kwargs): # can not use += here because that mutates the original list ftl_files = ftl_files + settings.FLUENT_DEFAULT_FILES - locale = kwargs.get("locale") or translation.get_language(True) + locale = kwargs.get("locale") or translation.get_language() locales = [locale] if locale != "en": locales.append("en") @@ -234,7 +234,7 @@ def get_active_locales(ftl_files, force=False): def ftl_file_is_active(ftl_file, locale=None): """Return True if the given FTL file is active in the given locale.""" - locale = locale or translation.get_language(True) + locale = locale or translation.get_language() return locale in get_active_locales(ftl_file) diff --git a/lib/l10n_utils/tests/test_base.py b/lib/l10n_utils/tests/test_base.py index dee57071bf1..5247415643e 100644 --- a/lib/l10n_utils/tests/test_base.py +++ b/lib/l10n_utils/tests/test_base.py @@ -13,7 +13,7 @@ from django_jinja.backend import Jinja2 from markus.testing import MetricsMock -from bedrock.base.urlresolvers import Prefixer +from bedrock.base.i18n import get_best_language, split_path_and_normalize_language from lib import l10n_utils ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_files") @@ -32,8 +32,11 @@ def _test(self, path, template, locale, accept_lang, status, destination=None, a request = RequestFactory().get(path) if accept_lang: request.META["HTTP_ACCEPT_LANGUAGE"] = accept_lang - prefixer = Prefixer(request) - request.locale = prefixer.locale + + locale_from_path, _subpath, _lang_code_changed = split_path_and_normalize_language(request.path) + assert not _lang_code_changed, "Did not expect lang code to change" + + request.locale = locale_from_path or get_best_language(accept_lang) ctx = {} if active_locales: @@ -49,7 +52,7 @@ def _test(self, path, template, locale, accept_lang, status, destination=None, a self.assertEqual(response.status_code, 302) self.assertEqual(response["Location"], destination) self.assertEqual(response["Vary"], "Accept-Language") - mm.assert_incr_once("locale.redirect", tags=[f"from_locale:{prefixer.locale or 'none'}", f"to_locale:{destination.split('/')[1]}"]) + mm.assert_incr_once("locale.redirect", tags=[f"from_locale:{locale_from_path or 'none'}", f"to_locale:{destination.split('/')[1]}"]) elif status == 404: self.assertEqual(response.status_code, 404) @@ -65,6 +68,7 @@ def test_no_accept_language_header(self): locales = ["en-US", "en-GB", "fr", "es-ES"] # Test no accept language header and locale-less root path returns 200. + # This is a special case where we render the 404-locale.html template at / self._test("/", template, "", "", 200, active_locales=locales) # Test no accept language header and locale-less path returns 302. @@ -246,7 +250,6 @@ def test_ftl_activations(self, render_mock): render_mock.assert_called_with(self.req, ["dude.html"], ANY, ftl_files="dude", activation_files=["dude", "donny"]) -@patch.object(l10n_utils, "_get_language_map", Mock(return_value={"an": "an", "de": "de", "en": "en-US", "en-us": "en-US", "fr": "fr"})) @pytest.mark.parametrize( "translations, accept_languages, expected", ( @@ -269,11 +272,15 @@ def test_ftl_activations(self, render_mock): (["am", "an"], ["mk", "gu-IN"], "an"), ), ) +@patch.object( + l10n_utils, + "language_url_map_with_fallbacks", + Mock(return_value={"an": "an", "de": "de", "en": "en-US", "en-us": "en-US", "fr": "fr"}), +) def test_get_best_translation(translations, accept_languages, expected): assert l10n_utils.get_best_translation(translations, accept_languages) == expected -@patch.object(l10n_utils, "_get_language_map", Mock(return_value={"an": "an", "de": "de", "en": "en-US", "en-us": "en-US", "fr": "fr"})) @pytest.mark.parametrize( "translations, accept_languages, expected", ( @@ -296,6 +303,11 @@ def test_get_best_translation(translations, accept_languages, expected): (["am", "an"], ["mk", "gu-IN"], None), ), ) +@patch.object( + l10n_utils, + "language_url_map_with_fallbacks", + Mock(return_value={"an": "an", "de": "de", "en": "en-US", "en-us": "en-US", "fr": "fr"}), +) def test_get_best_translation__strict(translations, accept_languages, expected): # Strict is used for the root path, to return the list of localized home pages for bots. assert l10n_utils.get_best_translation(translations, accept_languages, strict=True) == expected diff --git a/lib/l10n_utils/tests/test_translation.py b/lib/l10n_utils/tests/test_translation.py index e22891e1294..3386640e0e6 100644 --- a/lib/l10n_utils/tests/test_translation.py +++ b/lib/l10n_utils/tests/test_translation.py @@ -16,8 +16,10 @@ def test_get_language(self): # nothing set, should return default lang self.assertEqual(translation.get_language(), settings.LANGUAGE_CODE) - translation.activate("non-default") - self.assertEqual(translation.get_language(), "non-default") + translation.activate("lang-xx") + # 2024 Update: we're making Django work with uppercased territories in lang codes + # but see test__fix_case below for more explanation + self.assertEqual(translation.get_language(), "lang-XX") def test_activate_deactivate(self): """Should activate and deactivate languages""" @@ -47,3 +49,32 @@ def test_get_language_bidi(self): translation.activate("skr") self.assertTrue(translation.get_language_bidi()) + + def test__fix_case(self): + cases = ( + ("en-us", "en-US"), + ("EN-gB", "en-GB"), + ("en", "en"), + ("zh-hk", "zh-HK"), + # more complex codes that should not be changed - see function docstring + ("zh-Hant", "zh-Hant"), + ("zh-Hant-TW", "zh-Hant-TW"), + ("zh-HK", "zh-HK"), + ("zh-Hant-HK", "zh-Hant-HK"), + ("zh-hant", "zh-hant"), + ("zh-hant-tw", "zh-hant-tw"), + ("zh-Hant", "zh-Hant"), + ("zh-Hant-tw", "zh-Hant-tw"), + ("zh-Hant-TW", "zh-Hant-TW"), + ("ja-JP-mac", "ja-JP-mac"), + # no-ops + ("dude", "dude"), + ("the-DUDE", "the-DUDE"), + ("the-DUDE-abIDeS", "the-DUDE-abIDeS"), + ("xx", "xx"), + ("dq", "dq"), + # very special case + ) + for val, expected in cases: + with self.subTest(val=val, expected=expected): + self.assertEqual(translation._fix_case(val), expected) diff --git a/lib/l10n_utils/translation.py b/lib/l10n_utils/translation.py index 0edeaeedca1..21c650e974f 100644 --- a/lib/l10n_utils/translation.py +++ b/lib/l10n_utils/translation.py @@ -6,17 +6,11 @@ # and we don't need anything nearly as complex. from django.conf import settings - -from _threading_local import local - -_active = local() +from django.utils import translation def activate(language): - """ - Installs the given language as the language for the current thread. - """ - _active.value = language + return translation.activate(language) def deactivate(): @@ -24,37 +18,39 @@ def deactivate(): Uninstalls the currently active language so that further _ calls will resolve against the default language, again. """ - if hasattr(_active, "value"): - del _active.value + return translation.deactivate() -def _fix_case(locale): - """Convert lowercase locales to uppercase: en-us -> en-US""" - parts = locale.split("-") - if len(parts) == 1: - return locale - else: - return f"{parts[0]}-{parts[1].upper()}" +def _fix_case(lang_code): + """Convert lowercase language code to uppercase: en-us -> en-US + + Note: this is less exhaustive than bedrock.base.i18n.normalize_language + because we don't want it to potentially fall back to to a two-char lang code. + Indeed, because we have normalize_language running earlier in the + request-response cycle the odds of getting a bad lang code are likely + zero, so our default for more complex locale codes (e.g. zh-Hant-TW) is + to leave them untouched. If this proves unreliable, we can merge this with + normalize_locales and add a do-not-fall-back option to that function + """ -def get_language(fix_case=False): - """Returns the currently selected language.""" - lang = getattr(_active, "value", None) - if lang is None: - return settings.LANGUAGE_CODE + if not lang_code: + return None + parts = lang_code.split("-") - if fix_case: - lang = _fix_case(lang) + if len(parts) == 1: + return lang_code + if len(parts) > 2 or len(parts[1]) != 2: + # Don't touch complex codes + return lang_code + else: + return f"{parts[0].lower()}-{parts[1].upper()}" - return lang +def get_language(): + lang = _fix_case(translation.get_language()) + return lang or settings.LANGUAGE_CODE -def get_language_bidi(): - """ - Returns selected language's BiDi layout. - * False = left-to-right layout - * True = right-to-left layout - """ - base_lang = get_language().split("-")[0] - return base_lang in settings.LANGUAGES_BIDI +def get_language_bidi(): + return translation.get_language_bidi() diff --git a/tests/redirects/map_globalconf.py b/tests/redirects/map_globalconf.py index 91f580154fc..ed06b510517 100644 --- a/tests/redirects/map_globalconf.py +++ b/tests/redirects/map_globalconf.py @@ -187,7 +187,7 @@ url_test("/{m,{firefox/,}mobile}/faq/", "https://support.mozilla.org/products/mobile"), # bug 885799, 952429 url_test("/projects/calendar/holidays.html", "https://www.thunderbird.net/calendar/holidays/"), - url_test("/en-US/projects/calendar/random/stuff/", "https://www.thunderbird.net/calendar/"), + url_test("/projects/calendar/random/stuff/", "https://www.thunderbird.net/calendar/"), # bug 1388914 url_test("/thunderbird{,/}", "https://www.thunderbird.net/"), url_test("/thunderbird/channel/", "https://www.thunderbird.net/channel/"), @@ -784,7 +784,7 @@ # bug 1034859 url_test("/en-US/about/buttons/dude.jpg", "/media/img/careers/buttons/dude.jpg"), # bug 1003737 - url_test("/de/impressum/", "/de/about/legal/impressum/"), + url_test("/de/impressum/", "/de/about/legal/impressum/", follow_redirects=True), # bug 1248393 url_test("/de/about/legal/impressum/", status_code=requests.codes.ok), url_test("/{en-US,fr,ja}/about/legal/impressum/", "/de/about/legal/impressum/", status_code=requests.codes.found),