/
entry_test.py
114 lines (95 loc) · 3.6 KB
/
entry_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from unicodedata_reader import *
def test_entry_eq():
assert UnicodeDataEntry(1, 3, 'A') == UnicodeDataEntry(1, 3, 'A')
assert UnicodeDataEntry(1, 3, 'A') != UnicodeDataEntry(1, 3, 'B')
assert UnicodeDataEntry(1, 3, 'A') != UnicodeDataEntry(2, 3, 'A')
assert UnicodeDataEntry(1, 3, 'A') != UnicodeDataEntry(1, 2, 'A')
def test_value():
entries = UnicodeDataEntries(entries=(
UnicodeDataEntry(1, 3, 'A'),
UnicodeDataEntry(5, 6, 'B'),
))
expect = (None, 'A', 'A', 'A', None, 'B', 'B')
for code, value in enumerate(expect):
assert entries.value(code) == value
assert entries.value(code + 1) is None
values_for_code = tuple(entries.values_for_code())
assert values_for_code == expect
def test_from_pairs():
entries = UnicodeDataEntry.from_pairs((
(1, 'A'),
(2, 'A'),
(3, 'B'),
(4, 'B'),
(6, 'C'),
(8, 'C'),
(9, 'C'),
(11, 'C'),
))
entries = tuple(entries)
expects = (UnicodeDataEntry(1, 2, 'A'), UnicodeDataEntry(3, 4, 'B'),
UnicodeDataEntry(6, 6, 'C'), UnicodeDataEntry(8, 9, 'C'),
UnicodeDataEntry(11, 11, 'C'))
assert entries == expects
def test_missing_directive():
lines = [
'# test\n',
'# @missing: 0000..10FFFF; R\n',
'0000..001F ; R\n',
'3000 ; U\n',
]
entries = UnicodeDataEntries(lines=lines)
assert entries.value(0x001F) == 'R'
assert entries.value(0x2FFF) == 'R'
assert entries.value(0x3000) == 'U'
assert entries.value(0x3001) == 'R'
assert entries._missing_entries[0] == UnicodeDataEntry(0, 0x10FFFF, 'R')
def test_missing_directive_lb():
lines = [
'# test\n',
'# - The unassigned code points in the following blocks default to "ID":\n',
'# CJK Unified Ideographs Extension A: U+3400..U+4DBF\n',
'# - The unassigned code points in the following block default to "PR":\n',
'# Currency Symbols: U+20A0..U+20CF\n',
'# @missing: 0000..10FFFF; XX\n',
]
entries = UnicodeLineBreakDataEntries(lines=lines)
assert entries.value(0x33FF) == 'XX'
for code in range(0x3400, 0x4DC0):
assert entries.value(code) == 'ID'
assert entries.value(0x4DC0) == 'XX'
assert entries.value(0x209F) == 'XX'
for code in range(0x20A0, 0x20D0):
assert entries.value(code) == 'PR'
assert entries.value(0x20D0) == 'XX'
def test_missing_directive_vo():
lines = [
'# test\n',
'# Control Pictures & OCR U+2400..U+245F\n',
'# @missing: 0000..10FFFF; R\n',
]
entries = UnicodeVerticalOrientationDataEntries(lines=lines)
assert entries.value(0x23FF) == 'R'
for code in range(0x2400, 0x2460):
assert entries.value(code) == 'U'
assert entries.value(0x2460) == 'R'
def test_normalie_no_changes():
entries = UnicodeDataEntries(entries=(
UnicodeDataEntry(1, 3, 'A'),
UnicodeDataEntry(5, 6, 'B'),
))
nomalized_entries = UnicodeDataEntries(entries=entries)
nomalized_entries.fill_missing_values()
assert tuple(entries) == tuple(nomalized_entries)
def test_fill_missing_values():
class TestEntries(UnicodeDataEntries):
def missing_value(self, code: int):
return 'B'
entries = TestEntries(entries=(
UnicodeDataEntry(0, 10, 'A'),
UnicodeDataEntry(12, 20, 'B'),
))
entries.fill_missing_values()
assert len(entries) == 2
assert entries._entries == (UnicodeDataEntry(0, 10, 'A'),
UnicodeDataEntry(11, 20, 'B'))