From 8ca459d9acc9d72986ee8497dd81e14b69bff1c0 Mon Sep 17 00:00:00 2001 From: Will Kahn-Greene Date: Wed, 28 Oct 2015 14:46:32 -0400 Subject: [PATCH] [bug 1216998] Switch to Puente for strings extraction --- .gitignore | 1 + .gitmodules | 3 + fjord/base/l10n.py | 95 ------------- fjord/base/management/commands/extract.py | 155 ---------------------- fjord/base/management/commands/merge.py | 119 ----------------- fjord/settings/base.py | 33 +++-- vendor/src/puente | 1 + vendor/vendor.pth | 2 +- 8 files changed, 27 insertions(+), 382 deletions(-) delete mode 100644 fjord/base/l10n.py delete mode 100644 fjord/base/management/commands/extract.py delete mode 100644 fjord/base/management/commands/merge.py create mode 160000 vendor/src/puente diff --git a/.gitignore b/.gitignore index 507350f3..26436f35 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ settings/local.py */settings/local.py *.py[co] *.sw[po] +.cache .coverage .cache pip-log.txt diff --git a/.gitmodules b/.gitmodules index 99807e9f..2067aaf3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -121,3 +121,6 @@ [submodule "vendor/src/django-waffle"] path = vendor/src/django-waffle url = https://github.com/jsocol/django-waffle.git +[submodule "vendor/src/puente"] + path = vendor/src/puente + url = https://github.com/mozilla/puente.git diff --git a/fjord/base/l10n.py b/fjord/base/l10n.py deleted file mode 100644 index 664c64de..00000000 --- a/fjord/base/l10n.py +++ /dev/null @@ -1,95 +0,0 @@ -# NOTE: The code in this file was mostly copied from tower. -import re - -import jinja2 -from babel.messages.extract import extract_python as babel_extract_python -from jinja2 import ext -from jinja2.ext import InternationalizationExtension - - -def add_context(context, message): - # \x04 is a magic gettext number. - return u'%s\x04%s' % (context, message) - - -def split_context(message): - # \x04 is a magic gettext number. - ret = message.split(u'\x04') - if len(ret) == 1: - ret.insert(0, '') - return ret - - -def collapse_whitespace(message): - return re.compile(r'\s+', re.UNICODE).sub(' ', message).strip() - - -@jinja2.contextfunction -def _gettext_alias(context, text, *args, **kwargs): - """Takes the result of gettext and marks it safe""" - return jinja2.Markup( - context.resolve('gettext')(context, text, *args, **kwargs) - ) - - -class MozInternationalizationExtension(InternationalizationExtension): - """ - We override jinja2's _parse_block() to collapse whitespace so we can have - linebreaks wherever we want, and hijack _() to mark the result as safe. - """ - - def __init__(self, environment): - super(MozInternationalizationExtension, self).__init__(environment) - environment.globals['_'] = _gettext_alias - - def _parse_block(self, parser, allow_pluralize): - parse_block = InternationalizationExtension._parse_block - ref, buffer = parse_block(self, parser, allow_pluralize) - return ref, collapse_whitespace(buffer) - - -def tweak_message(message): - """We piggyback on jinja2's babel_extract() (really, Babel's extract_* - functions) but they don't support some things we need so this function will - tweak the message. Specifically: - - 1. We collapse whitespace in the msgid. Jinja2 will only strip - whitespace from the ends of a string so linebreaks show up in - your .po files still. - - 2. Babel doesn't support context (msgctxt). We hack that in ourselves - here. - - """ - if isinstance(message, basestring): - message = collapse_whitespace(message) - elif isinstance(message, tuple): - # A tuple of 2 has context, 3 is plural, 4 is plural with context - if len(message) == 2: - message = add_context(message[1], message[0]) - elif len(message) == 3: - if all(isinstance(x, basestring) for x in message[:2]): - singular, plural, num = message - message = (collapse_whitespace(singular), - collapse_whitespace(plural), - num) - elif len(message) == 4: - singular, plural, num, ctxt = message - message = (add_context(ctxt, collapse_whitespace(singular)), - add_context(ctxt, collapse_whitespace(plural)), - num) - return message - - -def extract_python(fileobj, keywords, comment_tags, options): - msgs = list(babel_extract_python(fileobj, keywords, comment_tags, options)) - for lineno, funcname, message, comments in msgs: - message = tweak_message(message) - yield lineno, funcname, message, comments - - -def extract_template(fileobj, keywords, comment_tags, options): - msgs = list(ext.babel_extract(fileobj, keywords, comment_tags, options)) - for lineno, funcname, message, comments in msgs: - message = tweak_message(message) - yield lineno, funcname, message, comments diff --git a/fjord/base/management/commands/extract.py b/fjord/base/management/commands/extract.py deleted file mode 100644 index 64e40f7b..00000000 --- a/fjord/base/management/commands/extract.py +++ /dev/null @@ -1,155 +0,0 @@ -# NOTE: This code was copied from tower. -# TODO: Look into switching to makemessages provided by django-jinja. -import os -import tempfile -from subprocess import Popen - -from django.core.management.base import BaseCommand -from django.conf import settings - -from babel.messages.extract import DEFAULT_KEYWORDS, extract_from_dir -from translate.storage import po - -from fjord.base.l10n import split_context - - -DEFAULT_DOMAIN = 'all' -TEXT_DOMAIN = 'django' -KEYWORDS = dict(DEFAULT_KEYWORDS) -KEYWORDS['_lazy'] = None - -# List of domains that should be separate from the django.pot file. -STANDALONE_DOMAINS = [TEXT_DOMAIN] - -OPTIONS_MAP = { - '**.*': { - # Get list of extensions for django-jinja template backend - 'extensions': ','.join(settings.TEMPLATES[0]['OPTIONS']['extensions']) - } -} - -COMMENT_TAGS = ['L10n:', 'L10N:', 'l10n:', 'l10N:'] - - -def create_pounit(filename, lineno, message, comments): - unit = po.pounit(encoding='UTF-8') - context, msgid = split_context(message) - unit.setsource(msgid) - if context: - unit.msgctxt = ['"%s"' % context] - for comment in comments: - unit.addnote(comment, 'developer') - - unit.addlocation('%s:%s' % (filename, lineno)) - return unit - - -def create_pofile_from_babel(extracted): - catalog = po.pofile() - - for extracted_unit in extracted: - filename, lineno, message, comments, context = extracted_unit - unit = create_pounit(filename, lineno, message, comments) - catalog.addunit(unit) - - catalog.removeduplicates() - return catalog - - -class Command(BaseCommand): - def add_arguments(self, parser): - parser.add_argument( - '--domain', '-d', default=DEFAULT_DOMAIN, dest='domain', - help=( - 'The domain of the message files. If "all" ' - 'everything will be extracted and combined into ' - '%s.pot. (default: %%default).' % TEXT_DOMAIN - ) - ) - parser.add_argument( - '--output-dir', '-o', - default=os.path.join(settings.ROOT, 'locale', 'templates', - 'LC_MESSAGES'), - dest='outputdir', - help=( - 'The directory where extracted files will be placed. ' - '(Default: %default)' - ) - ) - parser.add_argument( - '-c', '--create', - action='store_true', dest='create', default=False, - help='Create output-dir if missing' - ) - - def handle(self, *args, **options): - domains = options.get('domain') - outputdir = os.path.abspath(options.get('outputdir')) - - if not os.path.isdir(outputdir): - if not options.get('create'): - print ('Output directory must exist (%s) unless -c option is ' - 'given. Specify one with --output-dir' % outputdir) - return 'FAILURE\n' - - os.makedirs(outputdir) - - if domains == 'all': - domains = settings.DOMAIN_METHODS.keys() - else: - domains = [domains] - - root = settings.ROOT - - def callback(filename, method, options): - if method != 'ignore': - print ' %s' % filename - - for domain in domains: - print 'Extracting all strings in domain %s...' % (domain) - - methods = settings.DOMAIN_METHODS[domain] - extracted = extract_from_dir( - root, - method_map=methods, - keywords=KEYWORDS, - comment_tags=COMMENT_TAGS, - callback=callback, - options_map=OPTIONS_MAP, - ) - catalog = create_pofile_from_babel(extracted) - if not os.path.exists(outputdir): - raise Exception('Expected %s to exist... BAILING' % outputdir) - - catalog.savefile(os.path.join(outputdir, '%s.pot' % domain)) - - pot_files = [] - for i in [x for x in domains if x not in STANDALONE_DOMAINS]: - pot_files.append(os.path.join(outputdir, '%s.pot' % i)) - - if len(pot_files) > 1: - print ('Concatenating the non-standalone domains into %s.pot' % - TEXT_DOMAIN) - - final_out = os.path.join(outputdir, '%s.pot' % TEXT_DOMAIN) - - # We add final_out back on because msgcat will combine all - # specified files. We'll redirect everything back in to - # final_out in a minute. - pot_files.append(final_out) - - meltingpot = tempfile.TemporaryFile() - p1 = Popen(['msgcat'] + pot_files, stdout=meltingpot) - p1.communicate() - meltingpot.seek(0) - - # w+ truncates the file first - with open(final_out, 'w+') as final: - final.write(meltingpot.read()) - - meltingpot.close() - - for i in [x for x in domains if x not in STANDALONE_DOMAINS]: - os.remove(os.path.join(outputdir, '%s.pot' % i)) - - print 'Done' diff --git a/fjord/base/management/commands/merge.py b/fjord/base/management/commands/merge.py deleted file mode 100644 index cf157f7e..00000000 --- a/fjord/base/management/commands/merge.py +++ /dev/null @@ -1,119 +0,0 @@ -# NOTE: This code was copied from tower. -# TODO: Look into switching to makemessages provided by django-jinja. -import os -import sys -from subprocess import Popen, call, PIPE -from tempfile import TemporaryFile - -from django.core.management.base import BaseCommand -from django.conf import settings - - -TEXT_DOMAIN = 'django' - -# List of domains that should be separate from the django.pot file. -STANDALONE_DOMAINS = [TEXT_DOMAIN] - - -class Command(BaseCommand): - """Updates all locales' PO files by merging them with the POT files. - - The command looks for POT files in locale/templates/LC_MESSAGES, - which is where software like Verbatim looks for them as well. - - For a given POT file, if a corresponding PO file doesn't exist for - a locale, the command will initialize it with `msginit`. This - guarantees that the newly created PO file has proper gettext - metadata headers. - - During merging (or initializing), the command will also look in - `locale/compendia` for a locale-specific compendium of - translations (serving as a translation memory of sorts). The - compendium file must be called `${locale}.compendium`, - e.g. `es_ES.compendium` for Spanish. The translations in the - compendium will be used by gettext for fuzzy matching. - - """ - def add_argument(self, parser): - parser.add_argument( - '-c', '--create', - action='store_true', dest='create', default=False, - help='Create locale subdirectories' - ) - - def handle(self, *args, **options): - locale_dir = os.path.join(settings.ROOT, 'locale') - - if options.get('create'): - for lang in getattr(settings, 'LANGUAGES', []): - d = os.path.join(locale_dir, lang.replace('-', '_'), - 'LC_MESSAGES') - if not os.path.exists(d): - os.makedirs(d) - - for domain in STANDALONE_DOMAINS: - print 'Merging %s strings to each locale...' % domain - domain_pot = os.path.join(locale_dir, 'templates', 'LC_MESSAGES', - '%s.pot' % domain) - if not os.path.isfile(domain_pot): - sys.exit('Can not find %s.pot' % domain) - - for locale in os.listdir(locale_dir): - if ((not os.path.isdir(os.path.join(locale_dir, locale)) or - locale.startswith('.') or - locale == 'templates' or - locale == 'compendia')): - continue - - compendium = os.path.join(locale_dir, 'compendia', - '%s.compendium' % locale) - domain_po = os.path.join(locale_dir, locale, 'LC_MESSAGES', - '%s.po' % domain) - - if not os.path.isfile(domain_po): - print ' Can not find (%s). Creating...' % (domain_po) - if not call(['which', 'msginit'], stdout=PIPE) == 0: - raise Exception('You do not have gettext installed.') - - p1 = Popen([ - 'msginit', - '--no-translator', - '--locale=%s' % locale, - '--input=%s' % domain_pot, - '--output-file=%s' % domain_po, - '--width=200' - ]) - p1.communicate() - - print 'Merging %s.po for %s' % (domain, locale) - - domain_pot_file = open(domain_pot) - - if locale == 'en_US': - enmerged = TemporaryFile('w+t') - p2 = Popen(['msgen', '-'], stdin=domain_pot_file, - stdout=enmerged) - p2.communicate() - mergeme = enmerged - else: - mergeme = domain_pot_file - - mergeme.seek(0) - command = [ - 'msgmerge', - '--update', - '--width=200', - domain_po, - '-' - ] - if os.path.isfile(compendium): - print '(using a compendium)' - command.insert(1, '--compendium=%s' % compendium) - p3 = Popen(command, stdin=mergeme) - p3.communicate() - mergeme.close() - print 'Domain %s finished' % domain - - print 'All finished' - -Command.help = Command.__doc__ diff --git a/fjord/settings/base.py b/fjord/settings/base.py index c1368e6b..72182d4c 100644 --- a/fjord/settings/base.py +++ b/fjord/settings/base.py @@ -225,6 +225,24 @@ def lazy_langs(): LANGUAGES = lazy(lazy_langs, dict)() +# L10n extraction configuration +PUENTE = { + 'BASE_DIR': BASE_DIR, + 'DOMAIN_METHODS': { + 'django': [ + ('fjord/**.py', 'python'), + ('fjord/**/jinja2/**.html', 'jinja2'), + # FIXME: This is wrong--should be a django-specific extractor + ('fjord/**/templates/**.html', 'jinja2'), + ('templates/**.html', 'jinja2'), + ] + }, + 'PROJECT': 'Mozilla Input', + 'MSGID_BUGS_ADDRESS': ( + 'https://bugzilla.mozilla.org/enter_bug.cgi?product=Input' + ) +} + INSTALLED_APPS = ( # Local apps 'django_browserid', @@ -254,9 +272,10 @@ def lazy_langs(): 'django_extensions', 'django_jinja', 'django_jinja.contrib._humanize', # Adds django humanize filters + 'dennis.django_dennis', 'eadred', 'pipeline', - 'dennis.django_dennis', + 'puente', 'fjord.alerts', 'fjord.analytics', @@ -376,7 +395,7 @@ def lazy_langs(): 'django_jinja.builtins.extensions.CsrfExtension', 'django_jinja.builtins.extensions.StaticFilesExtension', 'django_jinja.builtins.extensions.DjangoFiltersExtension', - 'fjord.base.l10n.MozInternationalizationExtension', + 'puente.ext.i18n', 'pipeline.templatetags.ext.PipelineExtension', 'waffle.jinja.WaffleExtension', ], @@ -577,16 +596,6 @@ def lazy_langs(): # CSRF error page CSRF_FAILURE_VIEW = 'fjord.base.views.csrf_failure' -# Tells the extract script what files to look for L10n in and what -# function handles the extraction. -DOMAIN_METHODS = { - 'django': [ - ('fjord/**.py', 'fjord.base.l10n.extract_python'), - ('fjord/**/templates/**.html', 'fjord.base.l10n.extract_template'), - ('fjord/**/jinja2/**.html', 'fjord.base.l10n.extract_template'), - ('templates/**.html', 'fjord.base.l10n.extract_template'), - ] -} WSGI_APPLICATION = 'fjord.wsgi.application' diff --git a/vendor/src/puente b/vendor/src/puente new file mode 160000 index 00000000..5bb48f83 --- /dev/null +++ b/vendor/src/puente @@ -0,0 +1 @@ +Subproject commit 5bb48f8399b89f29cc9e0fa1ff182d217f8b8b60 diff --git a/vendor/vendor.pth b/vendor/vendor.pth index 1f5b317e..60672d73 100644 --- a/vendor/vendor.pth +++ b/vendor/vendor.pth @@ -26,7 +26,6 @@ src/dennis src/django src/django-adminplus src/django-arecibo -src/babel src/dateutil src/django-browserid src/django_compressor @@ -53,6 +52,7 @@ src/gengo-python src/html5lib-python src/kombu src/nuggets +src/puente src/py-amqp src/pyquery src/pystatsd