Skip to content

Commit

Permalink
Minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
kojiishi committed Aug 30, 2021
1 parent 604552d commit 003f234
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 33 deletions.
6 changes: 6 additions & 0 deletions README.md
Expand Up @@ -65,6 +65,12 @@ of the [Unicode Character Database] in browsers.
Please see [LineBreak.js] for an example of the generated functions
and [LineBreak.html] for an example usage.

The following command generates a JavaScript file for the [Line_Break property]
using `js/template.js` as the template file:
```sh
unicodedata-reader lb -t js/template.js
```

[`UnicodeDataCompressor` class]: https://github.com/kojiishi/unicodedata-reader/blob/main/unicodedata_reader/compressor.py
[LineBreak.html]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/LineBreak.html
[LineBreak.js]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/LineBreak.js
5 changes: 2 additions & 3 deletions js/LineBreak.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 5 additions & 6 deletions js/template.js
@@ -1,14 +1,12 @@
const u${PROP_NAME}Values = [$VALUE_LIST];
const u${PROP_NAME}AsInt = (function () {
const bytes = atob("$BASE64");
const u${NAME}AsInt = (function () {
const bytes = atob("$BASE64BYTES");
const len = bytes.length;
const entries = []
let value = 0;
for (let i = 0; i < len; ++i) {
const byte = bytes.charCodeAt(i);
if (byte & 0x80) {
value |= byte & 0x7F;
value <<= 7;
value = (value | (byte & 0x7F)) << 7;
continue;
}
value |= byte;
Expand All @@ -24,4 +22,5 @@ const u${PROP_NAME}AsInt = (function () {
}
}
})();
function u${PROP_NAME}(c) { return u${PROP_NAME}Values[u${PROP_NAME}AsInt(c)]; }
const u${NAME}Values = [$VALUE_LIST];
function u${NAME}(c) { return u${NAME}Values[u${NAME}AsInt(c)]; }
2 changes: 2 additions & 0 deletions precommit.sh
@@ -1,6 +1,8 @@
#!/bin/bash
set -e

unicodedata-reader lb -vt js/template.js

yapf -ir -vv .
tox -p
pytype unicodedata_reader
3 changes: 0 additions & 3 deletions tests/entry_test.py
Expand Up @@ -49,7 +49,6 @@ def test_missing_directive():
'3000 ; U\n',
]
entries = UnicodeDataEntries(lines=lines)
entries.ensure_multi_iterable()
assert entries.value(0x001F) == 'R'
assert entries.value(0x2FFF) == 'R'
assert entries.value(0x3000) == 'U'
Expand All @@ -67,7 +66,6 @@ def test_missing_directive_lb():
'# @missing: 0000..10FFFF; XX\n',
]
entries = UnicodeLineBreakDataEntries(lines=lines)
entries.ensure_multi_iterable()
assert entries.value(0x33FF) == 'XX'
for code in range(0x3400, 0x4DC0):
assert entries.value(code) == 'ID'
Expand All @@ -85,7 +83,6 @@ def test_missing_directive_vo():
'# @missing: 0000..10FFFF; R\n',
]
entries = UnicodeVerticalOrientationDataEntries(lines=lines)
entries.ensure_multi_iterable()
assert entries.value(0x23FF) == 'R'
for code in range(0x2400, 0x2460):
assert entries.value(code) == 'U'
Expand Down
23 changes: 14 additions & 9 deletions unicodedata_reader/cli.py
Expand Up @@ -74,7 +74,7 @@ def _init_logging(verbose):

class UnicodeDataCli(object):
def __init__(self):
self.parse_args()
self._parse_args()

def _columns(self) -> Dict[str, Callable[[int, str], Any]]:
columns = self._core_columns()
Expand Down Expand Up @@ -119,17 +119,22 @@ def substitute_template(self, template: pathlib.Path,
compressor = UnicodeDataCompressor(entries)
compressor.substitute_template(template, name=self.name, output=output)

def parse_args(self):
def _parse_args(self):
parser = argparse.ArgumentParser()
parser.add_argument('text', nargs='*')
parser.add_argument('text',
nargs='*',
help='show properties for the text')
parser.add_argument('-f', '--no-cache', action='store_true')
parser.add_argument('-n', '--name')
parser.add_argument('-t', '--template', type=pathlib.Path)
parser.add_argument('--name', help='$NAME in the template')
parser.add_argument('-t',
'--template',
type=pathlib.Path,
help='generate a file from the template')
parser.add_argument('-o', '--output', type=pathlib.Path)
parser.add_argument("-v",
"--verbose",
help="increase output verbosity",
action="count",
parser.add_argument('-v',
'--verbose',
help='increase output verbosity',
action='count',
default=0)
parser.parse_args(namespace=self)
_init_logging(self.verbose) # pytype: disable=attribute-error
Expand Down
4 changes: 2 additions & 2 deletions unicodedata_reader/compressor.py
Expand Up @@ -82,8 +82,8 @@ def substitute_template(self,
len(bytes), len(base64bytes), len(values_for_int),
value_bits)
mapping = {
'PROP_NAME': name,
'BASE64': base64bytes.decode('ascii'),
'NAME': name,
'BASE64BYTES': base64bytes.decode('ascii'),
'VALUE_BITS': str(value_bits),
'VALUE_MASK': str((1 << value_bits) - 1),
'VALUE_LIST': ','.join(f'"{v}"' for v in values_for_int),
Expand Down
21 changes: 11 additions & 10 deletions unicodedata_reader/entry.py
Expand Up @@ -171,12 +171,13 @@ def __init__(self,
converter=None):
self._missing_entries = self._default_missing_entries()
self.name = name
self._values_for_int = None # type: list

if entries is not None:
self._entries = entries
else:
assert lines is not None
self._load_lines(lines, converter=converter)
self._values_for_int = None # type: list

def _default_missing_entries(self) -> List[UnicodeDataEntry]:
return []
Expand All @@ -193,16 +194,16 @@ def _load_comment(self, comment: str, start_index: int):
self._missing_entries.extend(entries)
assert self._missing_entries

def ensure_multi_iterable(self):
def _ensure_multi_iterable(self):
if isinstance(self._entries, types.GeneratorType):
self._entries = tuple(self._entries)

def __iter__(self):
self.ensure_multi_iterable()
self._ensure_multi_iterable()
return self._entries.__iter__()

def __len__(self):
self.ensure_multi_iterable()
self._ensure_multi_iterable()
return len(self._entries)

def missing_value(self, code: int):
Expand Down Expand Up @@ -235,12 +236,12 @@ def fill_missing_values(self):

def unicodes(self) -> Iterable[int]:
"""Returns a list of Unicode code points defined in this entries."""
self.ensure_multi_iterable()
self._ensure_multi_iterable()
return itertools.chain(*(e.range() for e in self._entries))

def value(self, code: int):
"""Returns the value for the given code point."""
self.ensure_multi_iterable()
self._ensure_multi_iterable()
for entry in self._entries:
if code < entry.min:
return self.missing_value(code)
Expand All @@ -254,7 +255,7 @@ def values_for_code(self) -> Iterable[Any]:
The list includes missing values,
so that `tuple(values_for_code())[code]` is equal to `value(code)`.
"""
self.ensure_multi_iterable()
self._ensure_multi_iterable()
return UnicodeDataEntry.values_for_code(self._entries,
self.missing_value)

Expand All @@ -278,8 +279,8 @@ def map_values_to_int(self):
On return, the original values are stored in `self.value_list`.
"""
assert self.values_for_int() is None
self.ensure_multi_iterable()
assert self._values_for_int is None
self._ensure_multi_iterable()
value_map = {}
for entry in self._entries:
assert not isinstance(entry.value, int)
Expand All @@ -295,7 +296,7 @@ def map_values_to_int(self):

def to_dict(self) -> Dict[int, Any]:
"""Returns a `dict` of values with a Unicode code point as the key."""
self.ensure_multi_iterable()
self._ensure_multi_iterable()
dict = {}
for entry in self._entries:
for code in entry.range():
Expand Down

0 comments on commit 003f234

Please sign in to comment.