From 6a4994dd79fbe93ddbd155792669c72a88d20ff2 Mon Sep 17 00:00:00 2001 From: Francesco Lodolo Date: Tue, 29 Nov 2016 07:39:28 +0100 Subject: [PATCH] Use compare-locales for parsing product repositories (#829) --- app/scripts/glossaire.sh | 4 +- app/scripts/setup.sh | 27 +-- app/scripts/tmx_products.py | 336 +++++++++++++++++++++--------------- 3 files changed, 211 insertions(+), 156 deletions(-) diff --git a/app/scripts/glossaire.sh b/app/scripts/glossaire.sh index 0cbf8e93..09d8f230 100755 --- a/app/scripts/glossaire.sh +++ b/app/scripts/glossaire.sh @@ -154,9 +154,9 @@ function updateStandardRepo() { echogreen "Create ${repo_name^^} cache for $repo_name/$1" if [ "$1" = "en-US" ] then - nice -20 $install/app/scripts/tmx_products.py ${!repo_source}/COMMUN/ ${!repo_source}/COMMUN/ en-US en-US $repo_name + nice -20 $install/app/scripts/tmx_products.py ${!repo_source}/COMMUN/ en-US en-US $repo_name else - nice -20 $install/app/scripts/tmx_products.py ${!repo_l10n}/$1/ ${!repo_source}/COMMUN/ $1 en-US $repo_name + nice -20 $install/app/scripts/tmx_products.py ${!repo_l10n}/$1/ $1 en-US $repo_name fi } diff --git a/app/scripts/setup.sh b/app/scripts/setup.sh index 6d9c100c..b65dc890 100755 --- a/app/scripts/setup.sh +++ b/app/scripts/setup.sh @@ -74,26 +74,32 @@ function createSymlinks() { esac } -function checkoutSilme() { - # Check out SILME library to a specific version (0.8.0) - if [ ! -d $libraries/silme/.hg ] +function setupExternalLibraries() { + # Check out or update compare-locales library + version="RELEASE_1_2_1" + if [ ! -d $libraries/compare-locales/.hg ] then - echogreen "Checking out the SILME library into $libraries" + echogreen "Checking out compare-locales in $libraries" cd $libraries - hg clone https://hg.mozilla.org/l10n/silme -u silme-0.8.0 + hg clone https://hg.mozilla.org/l10n/compare-locales -u $version + cd $install + else + echogreen "Updating compare-locales in $libraries" + cd $libraries/compare-locales + hg pull -r default --update + hg update $version cd $install fi -} -function setupP12nExtract() { + # Check out or update external p12n-extract library if [ ! -d $libraries/p12n/.git ] then - echogreen "Checking out the p12n-extract library into $libraries" + echogreen "Checking out the p12n-extract library in $libraries" cd $libraries git clone https://github.com/flodolo/p12n-extract/ p12n cd $install else - echogreen "Updating the p12n-extract library into $libraries" + echogreen "Updating the p12n-extract library in $libraries" cd $libraries/p12n git pull cd $install @@ -246,8 +252,7 @@ else fi echo "${CURRENT_TIP:0:7}${DEV_VERSION}" > "${install}/cache/version.txt" -checkoutSilme -setupP12nExtract +setupExternalLibraries initDesktopSourceRepo "central" initDesktopSourceRepo "release" diff --git a/app/scripts/tmx_products.py b/app/scripts/tmx_products.py index 8758b95c..15546db7 100755 --- a/app/scripts/tmx_products.py +++ b/app/scripts/tmx_products.py @@ -1,189 +1,239 @@ #!/usr/bin/python import argparse -import datetime +import json +import logging import os import subprocess import sys from ConfigParser import SafeConfigParser +logging.basicConfig() # Get absolute path of ../config from the current script location (not the # current folder) config_folder = os.path.abspath( os.path.join(os.path.dirname(__file__), os.pardir, 'config')) # Read Transvision's configuration file from ../config/config.ini -# If not available use default a /storage folder to store data +# If not available use a default storage folder to store data config_file = os.path.join(config_folder, 'config.ini') if not os.path.isfile(config_file): - print 'Configuration file /app/config/config.ini is missing. Default folders will be used.' - storage_path = os.path.abspath( + print('Configuration file /app/config/config.ini is missing. ' + 'Default settings will be used.') + root_folder = os.path.abspath( os.path.join(os.path.dirname(__file__), os.pardir)) - library_path = os.path.join(storage_path, 'libraries') - storage_path = os.path.join(storage_path, 'tests', 'testfiles', 'output') + library_path = os.path.join(root_folder, 'libraries') else: config_parser = SafeConfigParser() config_parser.read(config_file) library_path = config_parser.get('config', 'libraries') storage_path = os.path.join(config_parser.get('config', 'root'), 'TMX') -# Import Silme library (http://hg.mozilla.org/l10n/silme/) -silme_path = os.path.join(library_path, 'silme') - -if not os.path.isdir(silme_path): +# Import compare-locales (http://hg.mozilla.org/l10n/compare-locales/) +# and add it to the system's path +compare_locales_path = os.path.join(library_path, 'compare-locales') +if not os.path.isdir(compare_locales_path): try: - print 'Cloning silme...' + print('Cloning compare-locales...') cmd_status = subprocess.check_output( - ['hg', 'clone', 'https://hg.mozilla.org/l10n/silme', - silme_path, '-u', 'silme-0.8.0'], + ['hg', 'clone', 'https://hg.mozilla.org/l10n/compare-locales', + compare_locales_path, '-u', 'RELEASE_1_2_1'], stderr=subprocess.STDOUT, shell=False) - print cmd_status + print(cmd_status) except Exception as e: - print e + print(e) +sys.path.insert(0, compare_locales_path) -sys.path.append(os.path.join(silme_path, 'lib')) try: - import silme.core - import silme.io - import silme.format - silme.format.Manager.register('dtd', 'properties', 'ini', 'inc') + from compare_locales import parser except ImportError: - print 'Error importing Silme library' + print('Error importing compare-locales library') sys.exit(1) -def escape(translation): - ''' - Escape quotes and backslahes in translation. There are two issues: - * Internal Python escaping: the string "this is a \", has an internal - representation as "this is a \\". - Also, "\\ test" is equivalent to r"\ test" (raw string). - * We need to print these strings into a file, with the format of a - PHP array delimited by single quotes ('id' => 'translation'). Hence - we need to escape single quotes, but also escape backslashes. - "this is a 'test'" => "this is a \'test\'" - "this is a \'test\'" => "this is a \\\'test\\\'" - ''' - - # Escape slashes - escaped_translation = translation.replace('\\', '\\\\') - # Escape single quotes - escaped_translation = escaped_translation.replace('\'', '\\\'') - - return escaped_translation - - -def get_strings(package, local_directory, strings_array): - '''Store recursively translations from files in local_directory in a list of string''' - for item in package: - if (type(item[1]) is not silme.core.structure.Blob) and not(isinstance(item[1], silme.core.Package)): - for entity in item[1]: - # String ID is the format folder/filename:entity. Make - # sure to remove a starting '/' from the folder's name - string_id = u'{0}/{1}:{2}'.format( - local_directory.lstrip('/'), item[0], entity) - strings_array[string_id] = item[1][entity].get_value() - elif (isinstance(item[1], silme.core.Package)): - if (item[0] != 'en-US') and (item[0] != 'locales'): - get_strings(item[1], local_directory + '/' + item[0], - strings_array) - else: - get_strings(item[1], local_directory, strings_array) - - -def create_directories_list(locale_repo, reference_repo, repository): - ''' Create a list of folders to analyze ''' - dirs_locale = os.listdir(locale_repo) - dirs_reference = [ - 'browser', 'calendar', 'chat', 'devtools', 'dom', 'editor', - 'extensions', 'mail', 'mobile', 'netwerk', 'other-licenses', - 'security', 'services', 'suite', 'toolkit' - ] - dirs = filter(lambda x: x in dirs_locale, dirs_reference) - - return dirs - - -def create_tmx_content(reference_repo, locale_repo, dirs): - ''' Extract strings from repository, return them as a list of PHP array - elements. ''' - tmx_content = [] - for directory in dirs: - path_reference = os.path.join(reference_repo, directory) - path_locale = os.path.join(locale_repo, directory) - - rcsClient = silme.io.Manager.get('file') - try: - l10nPackage_reference = rcsClient.get_package( - path_reference, object_type='entitylist') - except Exception as e: - print 'Silme couldn\'t extract data for', path_reference - print e - continue - - if not os.path.isdir(path_locale): - # Folder doesn't exist for this locale, don't log a warning, - # just continue to the next folder. - continue - - try: - l10nPackage_locale = rcsClient.get_package( - path_locale, object_type='entitylist') - except Exception as e: - print 'Silme couldn\'t extract data for', path_locale - print e - continue - - strings_reference = {} - strings_locale = {} - get_strings(l10nPackage_reference, directory, strings_reference) - get_strings(l10nPackage_locale, directory, strings_locale) - for entity in strings_reference: - # Append string to tmx_content, using the format of a PHP array - # element, but only if there's a translation available - translation = escape( - strings_locale.get(entity, '@@missing@@')).encode('utf-8') - if translation != '@@missing@@': - tmx_content.append("'{0}' => '{1}'".format( - entity.encode('utf-8'), translation)) - tmx_content.sort() - - return tmx_content - - -def write_php_file(filename, tmx_content): - ''' Write TMX content as a PHP array on file ''' - target_locale_file = open(filename, 'w') - target_locale_file.write(' '{1}',\n".format(string_id, translation) + f.write(line) + f.write('];\n') + f.close() + + def escape(self, translation): + ''' + Escape quotes and backslahes in translation. There are two issues: + * Internal Python escaping: the string "this is a \", has an internal + representation as "this is a \\". + Also, "\\ test" is equivalent to r"\ test" (raw string). + * We need to print these strings into a file, with the format of a + PHP array delimited by single quotes ('id' => 'translation'). Hence + we need to escape single quotes, but also escape backslashes. + "this is a 'test'" => "this is a \'test\'" + "this is a \'test\'" => "this is a \\\'test\\\'" + ''' + + # Escape slashes + escaped_translation = translation.replace('\\', '\\\\') + # Escape single quotes + escaped_translation = escaped_translation.replace('\'', '\\\'') + + return escaped_translation def main(): # Read command line input parameters parser = argparse.ArgumentParser() - parser.add_argument('locale_repo', help='Path to locale files') - parser.add_argument('reference_repo', help='Path to reference files') + parser.add_argument('repo_path', help='Path to locale files') parser.add_argument('locale_code', help='Locale language code') parser.add_argument('reference_code', help='Reference language code') - parser.add_argument('repository', help='Repository name') + parser.add_argument('repository_name', help='Repository name') + parser.add_argument('--output', nargs='?', type=str, choices=['json', 'php'], + help='Store only one type of output.', default='') + parser.add_argument('storage_mode', nargs='?', + help='If set to \'append\', translations will be added to an existing cache file', default='') + parser.add_argument('storage_prefix', nargs='?', + help='This prefix will be prependended to the identified path in string IDs (e.g. extensions/irc for Chatzilla)', default='') args = parser.parse_args() - dirs = create_directories_list( - args.reference_repo, args.locale_repo, args.repository - ) - tmx_content = create_tmx_content( - args.reference_repo, args.locale_repo, dirs) - - # Store the actual file on disk - filename_locale = os.path.join( - os.path.join(storage_path, args.locale_code), - 'cache_{0}_{1}.php'.format(args.locale_code, args.repository) - ) - write_php_file(filename_locale, tmx_content) + extracted_strings = StringExtraction( + storage_path, args.locale_code, args.reference_code, args.repository_name) + + extracted_strings.setRepositoryPath(args.repo_path.rstrip('/')) + if args.storage_mode == 'append': + extracted_strings.setStorageMode('append', args.storage_prefix) + + extracted_strings.extractStrings() + extracted_strings.storeTranslations(args.output) if __name__ == '__main__':