/
generate_data_file.py
159 lines (129 loc) · 7.34 KB
/
generate_data_file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import gettext
import json
import logging
import os
import sys
from collections import OrderedDict
from ocds_babel import TRANSLATABLE_EXTENSION_METADATA_KEYWORDS
from ocds_babel.translate import (translate_codelist_data, translate_schema_data, translate_extension_metadata_data,
translate_markdown_data)
from .base import BaseCommand
from ocdsextensionregistry import EXTENSIONS_DATA, EXTENSION_VERSIONS_DATA
from ocdsextensionregistry.exceptions import CommandError
logger = logging.getLogger('ocdsextensionregistry')
def _translator(version, domain, localedir, language):
domain = '{}/{}/{}'.format(version.id, version.version, domain)
return gettext.translation(domain, localedir, languages=[language], fallback=language == 'en')
class Command(BaseCommand):
name = 'generate-data-file'
help = 'generates a data file in JSON format with all the information about versions of extensions'
def add_arguments(self):
self.add_argument('versions', nargs='*',
help="the versions of extensions to process (e.g. 'bids' or 'lots==master')")
self.add_argument('-d', '--locale-dir',
help='a directory containing MO files'),
self.add_argument('-l', '--languages',
help='a comma-separated list of translations to include (default all)'),
self.add_argument('--extensions-url', help="the URL of the registry's extensions.csv",
default=EXTENSIONS_DATA)
self.add_argument('--extension-versions-url', help="the URL of the registry's extension_versions.csv",
default=EXTENSION_VERSIONS_DATA)
def handle(self):
if self.args.languages and not self.args.locale_dir:
self.subparser.error('--locale-dir is required if --languages is set.')
data = OrderedDict()
languages = {'en'}
localedir = self.args.locale_dir
if localedir:
available_translations = [n for n in os.listdir(localedir) if os.path.isdir(os.path.join(localedir, n))]
if self.args.languages:
for language in self.args.languages.split(','):
if language in available_translations:
languages.add(language)
else:
self.subparser.error('translations to {} are not available'.format(language))
else:
languages.update(available_translations)
for version in self.versions():
# Add the extension's data.
if version.id not in data:
data[version.id] = OrderedDict([
('id', version.id),
('category', version.category),
('core', version.core),
('name', OrderedDict()),
('description', OrderedDict()),
('latest_version', None),
('versions', OrderedDict()),
])
# Add the version's metadata.
version_data = OrderedDict([
('id', version.id),
('date', version.date),
('version', version.version),
('base_url', version.base_url),
('download_url', version.download_url),
('metadata', version.metadata),
('schemas', OrderedDict()),
('codelists', OrderedDict()),
('docs', OrderedDict()),
('readme', OrderedDict()),
])
for language in languages:
# Update the version's metadata and add the version's schema.
translator = _translator(version, 'schema', localedir, language)
translation = translate_extension_metadata_data(version.metadata, translator, lang=language)
for key in TRANSLATABLE_EXTENSION_METADATA_KEYWORDS:
version_data['metadata'][key][language] = translation[key][language]
for name in ('record-package-schema.json', 'release-package-schema.json', 'release-schema.json'):
if name not in version_data['schemas']:
version_data['schemas'][name] = OrderedDict()
if name in version.schemas:
translation = translate_schema_data(version.schemas[name], translator)
version_data['schemas'][name][language] = translation
# Add the version's codelists.
if version.codelists:
translator = _translator(version, 'codelists', localedir, language)
for name in sorted(version.codelists):
if name not in version_data['codelists']:
version_data['codelists'][name] = OrderedDict()
codelist = version.codelists[name]
version_data['codelists'][name][language] = OrderedDict()
translation = [translator.gettext(fieldname) for fieldname in codelist.fieldnames]
version_data['codelists'][name][language]['fieldnames'] = translation
translation = translate_codelist_data(codelist, translator)
version_data['codelists'][name][language]['rows'] = translation
# Add the version's readme and documentation.
translator = _translator(version, 'docs', localedir, language)
translation = translate_markdown_data('README.md', version.remote('README.md'), translator)
version_data['readme'][language] = translation
for name in sorted(version.docs):
# We currently only handle Markdown files.
# find . -type f -not -path '*/.git/*' -not -name '*.csv' -not -name '*.json' -not -name '*.md'
# -not -name '.travis.yml' -not -name 'LICENSE'
if not name.endswith('.md'):
logger.warning('Not translating {} (no .md extension)'.format(name))
continue
if name not in version_data['docs']:
version_data['docs'][name] = OrderedDict()
translation = translate_markdown_data(name, version.docs[name], translator)
version_data['docs'][name][language] = translation
data[version.id]['versions'][version.version] = version_data
for _id in data:
# Determine the latest version.
versions = data[_id]['versions']
if len(versions) == 1:
latest_version = list(versions)[0]
elif 'master' in versions:
latest_version = 'master'
else:
dated = list(filter(lambda item: item[1]['date'], versions.items()))
if dated:
latest_version = sorted(dated, key=lambda item: item[1]['date'])[-1][0]
else:
raise CommandError("Couldn't determine latest version of {}".format(_id))
# Apply the latest version.
data[_id]['latest_version'] = latest_version
for field in ('name', 'description'):
data[_id][field] = data[_id]['versions'][latest_version]['metadata'][field]
json.dump(data, sys.stdout, ensure_ascii=False, indent=2, separators=(',', ': '))