Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

storage: Preserve structured IDs on round trip #4091

Merged
merged 2 commits into from Sep 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
115 changes: 89 additions & 26 deletions translate/storage/base.py
Expand Up @@ -18,6 +18,7 @@

"""Base classes for storage interfaces."""

from collections import OrderedDict
import codecs
import logging
import pickle
Expand Down Expand Up @@ -889,38 +890,100 @@ def merge_on(self):
return "id"


class UnitId:
KEY_SEPARATOR = "."
INDEX_SEPARATOR = ""

def __init__(self, parts):
self.parts = parts

def __str__(self):
def fmt(element, key):
if element == 'key':
return '{}{}'.format(self.KEY_SEPARATOR, key)
elif element == 'index':
return '{}[{}]'.format(self.INDEX_SEPARATOR, key)
else:
raise ValueError('Unsupported element: {}'.format(element))
return ''.join([fmt(*part) for part in self.parts])

def __add__(self, other):
if not isinstance(other, list):
raise ValueError('Not supported type for add: {}'.format(type(other)))
return self.__class__(self.parts + other)

@classmethod
def from_string(cls, text):
result = []
# Strip possible leading separator
if text.startswith(cls.KEY_SEPARATOR):
text = text[len(cls.KEY_SEPARATOR):]
for item in text.split(cls.KEY_SEPARATOR):
if '[' in item and item[-1] == ']':
item, pos = item[:-1].split('[')
if cls.INDEX_SEPARATOR and item:
result.append(("key", item))
result.append(("index", int(pos)))
else:
result.append(("key", item))
return cls(result)


class DictUnit(TranslationUnit):
IdClass = UnitId
DefaultDict = OrderedDict

def __init__(self, source=None):
super().__init__(source)
self._unitid = None

def storevalue(self, output, value, override_key=None):
target = output
if self._unitid is None:
self._unitid = self.IdClass.from_string(self._id)
parts = self._unitid.parts
for pos, part in enumerate(parts[:-1]):
element, key = part
default = [] if parts[pos + 1][0] == 'index' else self.DefaultDict()
if element == 'index':
if len(target) <= key:
target.append(default)
elif element == 'key':
if key not in target or isinstance(target[key], str):
target[key] = default
else:
raise ValueError('Unsupported element: {}'.format(element))
target = target[key]
if override_key:
element, key = 'key', override_key
else:
element, key = parts[-1]
if element == 'key':
target[key] = value
elif element == 'index':
if len(target) <= key:
target.append(value)
else:
target[key] = value
else:
raise ValueError('Unsupported element: {}'.format(element))

def storevalues(self, output):
self.storevalue(output, self.value)

def getvalue(self):
"""Returns dictionary for serialization."""
raise NotImplementedError()
result = {}
self.storevalues(result)
return result

def set_unitid(self, unitid):
self.setid(str(unitid))
self._unitid = unitid


class DictStore(TranslationStore):
def serialize_merge(self, d1, d2):
for k in d2:
if k in d1:
if isinstance(d1[k], dict) and isinstance(d2[k], dict):
self.serialize_merge(d1[k], d2[k])
elif isinstance(d1[k], list) and isinstance(d2[k], tuple):
if isinstance(d2[k][1], dict):
if len(d1[k]) > d2[k][0]:
d1[k][d2[k][0]].update(d2[k][1])
else:
d1[k].append(d2[k][1])
else:
if len(d1[k]) > d2[k][0]:
d1[k][d2[k][0]] = d2[k][1]
else:
d1[k].append(d2[k][1])
elif isinstance(d1[k], list) and isinstance(d2[k], list):
d1[k].extend(d2[k])
else:
d1[k] = d2[k]
elif isinstance(d2[k], tuple):
d1[k] = [d2[k][1]]
else:
d1[k] = d2[k]

def serialize_units(self, output):
for unit in self.unit_iter():
self.serialize_merge(output, unit.getvalue())
unit.storevalues(output)
78 changes: 32 additions & 46 deletions translate/storage/jsonl10n.py
Expand Up @@ -119,18 +119,14 @@ def __str__(self):
"""Converts to a string representation."""
return json.dumps(self.getvalue(), separators=(',', ': '), indent=4, ensure_ascii=False)

def getkey(self):
return self.getid().lstrip('.')

def converttarget(self):
if issubclass(self._type, str):
return self.target
else:
return self._type(self.target)

def getvalue(self):
"""Return value to be stored in JSON file."""
return {self.getkey(): self.converttarget()}
def storevalues(self, output):
self.storevalue(output, self.converttarget())


class JsonFile(base.DictStore):
Expand Down Expand Up @@ -158,7 +154,7 @@ def serialize(self, out):
out.write(json.dumps(units, **self.dump_args).encode(self.encoding))
out.write(b'\n')

def _extract_units(self, data, stop=None, prev="", name_node=None, name_last_node=None, last_node=None):
def _extract_units(self, data, stop=None, prev=None, name_node=None, name_last_node=None, last_node=None):
"""Recursive function to extract items from the data files

:param data: the current branch to walk down
Expand All @@ -168,21 +164,23 @@ def _extract_units(self, data, stop=None, prev="", name_node=None, name_last_nod
:param name_last_node: the name of the last node
:param last_node: the last list or dict
"""
if prev is None:
prev = self.UnitClass.IdClass([])
if isinstance(data, dict):
for k, v in data.items():
for x in self._extract_units(v, stop, "%s.%s" % (prev, k), k, None, data):
for x in self._extract_units(v, stop, prev + [('key', k)], k, None, data):
yield x
elif isinstance(data, list):
for i, item in enumerate(data):
for x in self._extract_units(item, stop, "%s[%s]" % (prev, i), i, name_node, data):
for x in self._extract_units(item, stop, prev + [('index', i)], i, name_node, data):
yield x
# apply filter
elif (stop is None or
(isinstance(last_node, dict) and name_node in stop) or
(isinstance(last_node, list) and name_last_node in stop)):

unit = self.UnitClass(data, name_node)
unit.setid(prev)
unit.set_unitid(prev)
yield unit

def parse(self, input):
Expand Down Expand Up @@ -211,17 +209,8 @@ def parse(self, input):

class JsonNestedUnit(JsonUnit):

def getkey(self):
return self.getid().lstrip('.').split('.')

def getvalue(self):
ret = self.converttarget()
for k in reversed(self.getkey()):
if '[' in k and k[-1] == ']':
k, pos = k[:-1].split('[')
ret = (int(pos), ret)
ret = OrderedDict({k: ret})
return ret
def storevalues(self, output):
self.storevalue(output, self.converttarget())


class JsonNestedFile(JsonFile):
Expand All @@ -231,15 +220,15 @@ class JsonNestedFile(JsonFile):


class WebExtensionJsonUnit(JsonUnit):
def getvalue(self):
def storevalues(self, output):
value = OrderedDict((
('message', self.target),
))
if self.notes:
value['description'] = self.notes
if self.placeholders:
value['placeholders'] = self.placeholders
return {self.getid(): value}
self.storevalue(output, value)


class WebExtensionJsonFile(JsonFile):
Expand All @@ -253,7 +242,7 @@ class WebExtensionJsonFile(JsonFile):

UnitClass = WebExtensionJsonUnit

def _extract_units(self, data, stop=None, prev="", name_node=None, name_last_node=None, last_node=None):
def _extract_units(self, data, stop=None, prev=None, name_node=None, name_last_node=None, last_node=None):
for item, value in data.items():
unit = self.UnitClass(
value.get('message', ''),
Expand Down Expand Up @@ -303,18 +292,12 @@ def get_plurals(count, base):
self._rich_target = None
self._target = target

def getvalue(self):
def storevalues(self, output):
if not isinstance(self.target, multistring):
return super().getvalue()

ret = OrderedDict()
for i, value in enumerate(self.target.strings):
ret[self._item[i]] = value

path = self.getid().lstrip('.').split('.')[:-1]
for k in reversed(path):
ret = {k: ret}
return ret
super().storevalues(output)
else:
for i, value in enumerate(self.target.strings):
self.storevalue(output, value, override_key=self._item[i])


class I18NextFile(JsonNestedFile):
Expand All @@ -325,7 +308,9 @@ class I18NextFile(JsonNestedFile):

UnitClass = I18NextUnit

def _extract_units(self, data, stop=None, prev="", name_node=None, name_last_node=None, last_node=None):
def _extract_units(self, data, stop=None, prev=None, name_node=None, name_last_node=None, last_node=None):
if prev is None:
prev = self.UnitClass.IdClass([])
if isinstance(data, dict):
plurals_multiple = [key.rsplit('_', 1)[0] for key in data if key.endswith('_0')]
plurals_simple = [key.rsplit('_', 1)[0] for key in data if key.endswith('_plural')]
Expand Down Expand Up @@ -359,11 +344,12 @@ def _extract_units(self, data, stop=None, prev="", name_node=None, name_last_nod
sources.append(data[key])
items.append(key)
unit = self.UnitClass(multistring(sources), items)
unit.setid("%s.%s" % (prev, plural_base))
newid = prev + [('key', plural_base)]
unit.set_unitid(newid)
yield unit
continue

for x in self._extract_units(v, stop, "%s.%s" % (prev, k), k, None, data):
for x in self._extract_units(v, stop, prev + [('key', k)], k, None, data):
yield x
else:
parent = super()._extract_units(
Expand Down Expand Up @@ -414,7 +400,7 @@ def plural_tags(self):
locale = "en"
return plural_tags.get(locale, plural_tags['en'])

def _extract_units(self, data, stop=None, prev="", name_node=None, name_last_node=None, last_node=None):
def _extract_units(self, data, stop=None, prev=None, name_node=None, name_last_node=None, last_node=None):
for value in data:
translation = value.get('translation', '')
if isinstance(translation, dict):
Expand Down Expand Up @@ -443,16 +429,16 @@ def __init__(self, source=None, item=None, notes=None, placeholders=None, metada
super().__init__(source, item, notes, placeholders, **kwargs)
self.metadata = metadata or {}

def getvalue(self):
def storevalues(self, output):
if self.notes:
self.metadata['description'] = self.notes
identifier = self.getid()
if identifier == "@":
return self.metadata
return OrderedDict((
(identifier, self.target),
('@{}'.format(identifier), self.metadata),
))
for key, value in self.metadata.items():
self.storevalue(output, value, override_key=key)
else:
self.storevalue(output, self.target, override_key=identifier)
self.storevalue(output, self.metadata, override_key='@{}'.format(identifier))

def isheader(self):
return self._id == "@"
Expand All @@ -477,7 +463,7 @@ def __init__(self, inputfile=None, filter=None, **kwargs):
'ensure_ascii': False,
}

def _extract_units(self, data, stop=None, prev="", name_node=None, name_last_node=None, last_node=None):
def _extract_units(self, data, stop=None, prev=None, name_node=None, name_last_node=None, last_node=None):
# Extract metadata as header
metadata = OrderedDict([(key, value) for key, value in data.items() if key.startswith("@@")])
if metadata:
Expand Down
28 changes: 28 additions & 0 deletions translate/storage/test_jsonl10n.py
Expand Up @@ -52,6 +52,25 @@
]
}
"""
JSON_COMPLEX = b"""{
"key": "value",
"key.key": "value",
"key[0]": "value2",
"key3": [
"one",
"two"
],
"key4": [
{
"nested": "one"
},
[
"one",
"two"
]
]
}
"""
JSON_GOI18N = b"""[
{
"id": "tag",
Expand Down Expand Up @@ -155,6 +174,15 @@ def test_bom(self):
store.serialize(out)
assert out.getvalue() == content

def test_complex(self):
store = self.StoreClass()
store.parse(JSON_COMPLEX)

out = BytesIO()
store.serialize(out)

assert out.getvalue() == JSON_COMPLEX


class TestJSONNestedResourceStore(test_monolingual.TestMonolingualUnit):
StoreClass = jsonl10n.JsonNestedFile
Expand Down
17 changes: 17 additions & 0 deletions translate/storage/test_yaml.py
Expand Up @@ -417,3 +417,20 @@ def test_anchors(self):
store.parse(data)
assert len(store.units) == 5
assert bytes(store).decode('ascii') == data

def test_type_change(self):
original = '''en:
days_on: '["Sunday", "Monday"]'
'''
changed = '''en:
days_on:
- Sunday
- Monday
'''
store = self.StoreClass()
store.parse(original)
update = self.StoreClass()
update.parse(changed)
for unit in update.units:
store.addunit(unit)
assert bytes(store).decode('ascii') == changed