Skip to content

Commit 4052505

Browse files
committed
Add script to list/delete obsolete files
Fix #636 and also #636 (ignore README.md for iOS) Fix tmx_mozillaorg to ignore 'hi' instead of excluding it from clean up script
1 parent 9af9cf5 commit 4052505

File tree

3 files changed

+163
-2
lines changed

3 files changed

+163
-2
lines changed

app/scripts/clean_data.py

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
#!/usr/bin/env python
2+
3+
import argparse
4+
import json
5+
import glob
6+
import os
7+
import shutil
8+
import sys
9+
from ConfigParser import SafeConfigParser
10+
11+
12+
def main():
13+
# Parse command line options
14+
cl_parser = argparse.ArgumentParser()
15+
cl_parser.add_argument('--delete',
16+
help='Delete files', action='store_true')
17+
args = cl_parser.parse_args()
18+
19+
# Get absolute path of ../config from the current script location (not the
20+
# current folder)
21+
config_folder = os.path.abspath(
22+
os.path.join(os.path.dirname(__file__), os.pardir, 'config'))
23+
24+
# Read Transvision's configuration file from ../config/config.ini
25+
config_file = os.path.join(config_folder, 'config.ini')
26+
if not os.path.isfile(config_file):
27+
print 'Configuration file /app/config/config.ini is missing.'
28+
sys.exit(1)
29+
else:
30+
config_parser = SafeConfigParser()
31+
config_parser.read(config_file)
32+
storage_path = os.path.join(config_parser.get('config', 'root'), 'TMX')
33+
34+
# Load supported repositories and store information like folder name,
35+
# supported locales.
36+
sources_path = os.path.join(config_folder, 'sources')
37+
sources_file = open(os.path.join(
38+
sources_path, 'supported_repositories.json'))
39+
supported_repositories = {}
40+
json_repositories = json.load(sources_file)
41+
42+
folder_mapping = {
43+
'central': 'TRUNK_L10N',
44+
'firefox_ios': 'firefox_ios',
45+
'mozilla_org': 'mozilla_org'
46+
}
47+
48+
known_folders = []
49+
known_cache_files = []
50+
for id, repository in json_repositories.iteritems():
51+
repository_id = repository['id']
52+
53+
if repository_id.startswith('gaia'):
54+
# Gaia repositories don't use 'L10N' in the folder name, just
55+
# transform the repository ID to uppercase.
56+
folder_name = repository_id.upper()
57+
else:
58+
# Check if this repository is mapped to a special folder name
59+
# (e.g. central -> trunk), otherwise use the repository ID
60+
# (transformed to uppercase) with '_L10N' as folder name.
61+
folder_name = folder_mapping.get(
62+
repository_id, repository_id.upper() + '_L10N')
63+
known_folders.append(folder_name)
64+
65+
# Store supported locales for this repository
66+
locales_file = os.path.join(sources_path, repository_id + '.txt')
67+
supported_locales = open(locales_file, 'r').read().splitlines()
68+
69+
supported_repositories[repository_id] = {
70+
'folder_name': folder_name,
71+
'locales': supported_locales
72+
}
73+
74+
# Store a list of acceptable cache file names
75+
for locale in supported_locales:
76+
known_cache_files.append(
77+
'{0}/{1}/cache_{1}_{2}.php'.format(storage_path, locale, repository_id))
78+
79+
# List all .txt files in /sources
80+
print '--\nAnalyzing sources in config/sources'
81+
excluded_files = ['gaia_versions']
82+
83+
need_cleanup = False
84+
for txtfile in glob.glob(os.path.join(sources_path, '*.txt')):
85+
filename = os.path.splitext(os.path.basename(txtfile))[0]
86+
if not filename in supported_repositories.keys() + excluded_files:
87+
print '{0}.txt is not a supported repository.'.format(filename)
88+
need_cleanup = True
89+
if args.delete:
90+
print "Removing file:", txtfile
91+
os.remove(txtfile)
92+
if not need_cleanup:
93+
print "Nothing to remove."
94+
95+
# Check all repositories for extra folders
96+
print '--\nAnalyzing folders in supported repositories'
97+
# Besides standard VCS folders or templates, we need to exclude some
98+
# locales on mozilla.org:
99+
# hi: is a fake locale used to activate legal-docs, but not actually
100+
# supported by mozilla.org
101+
exclusions = {
102+
'firefox_ios': ['.git', 'templates'],
103+
'mozilla_org': ['.git', 'en-US', 'hi']
104+
}
105+
hg_path = config_parser.get('config', 'local_hg')
106+
git_path = config_parser.get('config', 'local_git')
107+
108+
need_cleanup = False
109+
for repository_id, repository in supported_repositories.iteritems():
110+
# Check if the folder exists as a Mercurial repository. If it doesn't
111+
# assume it's a Git repository.
112+
print '--\nAnalyze', repository_id
113+
if os.path.isdir(os.path.join(hg_path, repository['folder_name'])):
114+
folder_path = os.path.join(hg_path, repository['folder_name'])
115+
else:
116+
folder_path = os.path.join(git_path, repository['folder_name'])
117+
118+
available_folders = os.walk(folder_path).next()[1]
119+
available_folders.sort()
120+
for folder in available_folders:
121+
if folder in exclusions.get(repository_id, []):
122+
continue
123+
if not folder in repository['locales']:
124+
# This folder is inside the repository but doesn't match
125+
# any supported locale.
126+
print '{0} is not a supported locale'.format(folder)
127+
need_cleanup = True
128+
if args.delete:
129+
full_path = os.path.join(folder_path, folder)
130+
print "Removing folder:", full_path
131+
shutil.rmtree(full_path)
132+
if not need_cleanup:
133+
print "Nothing to remove."
134+
135+
# Check cache files
136+
print '--\nAnalyze cache files in TMX'
137+
available_folders = os.walk(storage_path).next()[1]
138+
139+
need_cleanup = False
140+
for folder in available_folders:
141+
# Ignore reference locale
142+
if folder == 'en-US':
143+
continue
144+
for filename in glob.glob(os.path.join(storage_path, folder, '*.php')):
145+
if not filename in known_cache_files:
146+
print '{0} is not a known cache file'.format(filename)
147+
need_cleanup = True
148+
if args.delete:
149+
print "Removing file:", filename
150+
os.remove(filename)
151+
if not need_cleanup:
152+
print "Nothing to remove."
153+
154+
if __name__ == '__main__':
155+
main()

app/scripts/tmx_mozillaorg

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,13 @@ error_log('Mozilla.org: extraction of strings');
1414
foreach (Files::getFilenamesInFolder(GIT . 'mozilla_org/') as $locale) {
1515
$path = GIT . "mozilla_org/{$locale}/";
1616

17-
if (! is_dir($path)) {
17+
/*
18+
'hi' is a fake locale on mozilla.org: it's not actually supported,
19+
but we have a folder to enable legal documents in Bedrock. That
20+
doesn't mean that we need to create a TMX for it.
21+
*/
22+
$excluded_locales = ['hi'];
23+
if (! is_dir($path) || in_array($locale, $excluded_locales)) {
1824
continue;
1925
}
2026

app/scripts/tmx_xliff

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ if (! isset($supported_projects[$project_name])) {
3636
error_log('Extraction of strings from XLIFF file');
3737

3838
$base_path = $supported_projects[$project_name]['base_path'];
39-
foreach (Files::getFilenamesInFolder($base_path, ['templates', 'README']) as $locale) {
39+
foreach (Files::getFilenamesInFolder($base_path, ['templates', 'README.md']) as $locale) {
4040
$out_translation = '';
4141
$total_strings = 0;
4242

0 commit comments

Comments
 (0)