Skip to content
Permalink
Browse files

Add script to list/delete obsolete files

Fix #636 and also #636 (ignore README.md for iOS)
Fix tmx_mozillaorg to ignore 'hi' instead of excluding it from clean up script
  • Loading branch information
flodolo committed Feb 23, 2016
1 parent 9af9cf5 commit 405250548bf4f363cfe317703f95ad216b3e8a00
Showing with 163 additions and 2 deletions.
  1. +155 −0 app/scripts/clean_data.py
  2. +7 −1 app/scripts/tmx_mozillaorg
  3. +1 −1 app/scripts/tmx_xliff
@@ -0,0 +1,155 @@
#!/usr/bin/env python

import argparse
import json
import glob
import os
import shutil
import sys
from ConfigParser import SafeConfigParser


def main():
# Parse command line options
cl_parser = argparse.ArgumentParser()
cl_parser.add_argument('--delete',
help='Delete files', action='store_true')
args = cl_parser.parse_args()

# Get absolute path of ../config from the current script location (not the
# current folder)
config_folder = os.path.abspath(
os.path.join(os.path.dirname(__file__), os.pardir, 'config'))

# Read Transvision's configuration file from ../config/config.ini
config_file = os.path.join(config_folder, 'config.ini')
if not os.path.isfile(config_file):
print 'Configuration file /app/config/config.ini is missing.'
sys.exit(1)
else:
config_parser = SafeConfigParser()
config_parser.read(config_file)
storage_path = os.path.join(config_parser.get('config', 'root'), 'TMX')

# Load supported repositories and store information like folder name,
# supported locales.
sources_path = os.path.join(config_folder, 'sources')
sources_file = open(os.path.join(
sources_path, 'supported_repositories.json'))
supported_repositories = {}
json_repositories = json.load(sources_file)

folder_mapping = {
'central': 'TRUNK_L10N',
'firefox_ios': 'firefox_ios',
'mozilla_org': 'mozilla_org'
}

known_folders = []
known_cache_files = []
for id, repository in json_repositories.iteritems():
repository_id = repository['id']

if repository_id.startswith('gaia'):
# Gaia repositories don't use 'L10N' in the folder name, just
# transform the repository ID to uppercase.
folder_name = repository_id.upper()
else:
# Check if this repository is mapped to a special folder name
# (e.g. central -> trunk), otherwise use the repository ID
# (transformed to uppercase) with '_L10N' as folder name.
folder_name = folder_mapping.get(
repository_id, repository_id.upper() + '_L10N')
known_folders.append(folder_name)

# Store supported locales for this repository
locales_file = os.path.join(sources_path, repository_id + '.txt')
supported_locales = open(locales_file, 'r').read().splitlines()

supported_repositories[repository_id] = {
'folder_name': folder_name,
'locales': supported_locales
}

# Store a list of acceptable cache file names
for locale in supported_locales:
known_cache_files.append(
'{0}/{1}/cache_{1}_{2}.php'.format(storage_path, locale, repository_id))

# List all .txt files in /sources
print '--\nAnalyzing sources in config/sources'
excluded_files = ['gaia_versions']

need_cleanup = False
for txtfile in glob.glob(os.path.join(sources_path, '*.txt')):
filename = os.path.splitext(os.path.basename(txtfile))[0]
if not filename in supported_repositories.keys() + excluded_files:
print '{0}.txt is not a supported repository.'.format(filename)
need_cleanup = True
if args.delete:
print "Removing file:", txtfile
os.remove(txtfile)
if not need_cleanup:
print "Nothing to remove."

# Check all repositories for extra folders
print '--\nAnalyzing folders in supported repositories'
# Besides standard VCS folders or templates, we need to exclude some
# locales on mozilla.org:
# hi: is a fake locale used to activate legal-docs, but not actually
# supported by mozilla.org
exclusions = {
'firefox_ios': ['.git', 'templates'],
'mozilla_org': ['.git', 'en-US', 'hi']
}
hg_path = config_parser.get('config', 'local_hg')
git_path = config_parser.get('config', 'local_git')

need_cleanup = False
for repository_id, repository in supported_repositories.iteritems():
# Check if the folder exists as a Mercurial repository. If it doesn't
# assume it's a Git repository.
print '--\nAnalyze', repository_id
if os.path.isdir(os.path.join(hg_path, repository['folder_name'])):
folder_path = os.path.join(hg_path, repository['folder_name'])
else:
folder_path = os.path.join(git_path, repository['folder_name'])

available_folders = os.walk(folder_path).next()[1]
available_folders.sort()
for folder in available_folders:
if folder in exclusions.get(repository_id, []):
continue
if not folder in repository['locales']:
# This folder is inside the repository but doesn't match
# any supported locale.
print '{0} is not a supported locale'.format(folder)
need_cleanup = True
if args.delete:
full_path = os.path.join(folder_path, folder)
print "Removing folder:", full_path
shutil.rmtree(full_path)
if not need_cleanup:
print "Nothing to remove."

# Check cache files
print '--\nAnalyze cache files in TMX'
available_folders = os.walk(storage_path).next()[1]

need_cleanup = False
for folder in available_folders:
# Ignore reference locale
if folder == 'en-US':
continue
for filename in glob.glob(os.path.join(storage_path, folder, '*.php')):
if not filename in known_cache_files:
print '{0} is not a known cache file'.format(filename)
need_cleanup = True
if args.delete:
print "Removing file:", filename
os.remove(filename)
if not need_cleanup:
print "Nothing to remove."

if __name__ == '__main__':
main()
@@ -14,7 +14,13 @@ error_log('Mozilla.org: extraction of strings');
foreach (Files::getFilenamesInFolder(GIT . 'mozilla_org/') as $locale) {
$path = GIT . "mozilla_org/{$locale}/";

if (! is_dir($path)) {
/*
'hi' is a fake locale on mozilla.org: it's not actually supported,
but we have a folder to enable legal documents in Bedrock. That
doesn't mean that we need to create a TMX for it.
*/
$excluded_locales = ['hi'];
if (! is_dir($path) || in_array($locale, $excluded_locales)) {
continue;
}

@@ -36,7 +36,7 @@ if (! isset($supported_projects[$project_name])) {
error_log('Extraction of strings from XLIFF file');

$base_path = $supported_projects[$project_name]['base_path'];
foreach (Files::getFilenamesInFolder($base_path, ['templates', 'README']) as $locale) {
foreach (Files::getFilenamesInFolder($base_path, ['templates', 'README.md']) as $locale) {
$out_translation = '';
$total_strings = 0;

0 comments on commit 4052505

Please sign in to comment.
You can’t perform that action at this time.