-
Notifications
You must be signed in to change notification settings - Fork 3
/
test_compression.py
71 lines (66 loc) · 2.1 KB
/
test_compression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# -*- coding: utf-8 -*-
"""Tests for compression/decompression."""
import graphtransliterator
import graphtransliterator.compression as compression
from graphtransliterator import GraphTransliterator
import json
import pytest
test_config = """
tokens:
a: [class_a]
b: [class_b]
c: [class_c]
" ": [wb]
d: []
Aa: [contrained_rule]
rules:
a: A
b: B
<class_c> <class_c> a: A(AFTER_CLASS_C_AND_CLASS_C)
(<class_c> b) a: A(AFTER_B_AND_CLASS_C)
(<class_c> b b) a a: AA(AFTER_BB_AND_CLASS_C)
a <class_c>: A(BEFORE_CLASS_C)
a b (c <class_b>): AB(BEFORE_C_AND_CLASS_B)
c: C
c c: C*2
a (b b b): A(BEFORE_B_B_B)
d (c <class_a>): D(BEFORE_C_AND_CLASS_A)
(b b) a: A(AFTER_B_B)
<wb> Aa: A(ONLY_A_CONSTRAINED_RULE)
d d: "<DD>"
d: "<D>"
" ": " "
onmatch_rules:
-
<class_a> <class_b> + <class_a> <class_b>: "!"
-
<class_a> + <class_b>: ","
whitespace:
default: ' '
consolidate: True
token_class: wb
"""
def test_compression():
gt = GraphTransliterator.from_yaml(test_config)
compressed_config = compression.compress_config(gt.dump())
decompressed_config = compression.decompress_config(compressed_config)
gt_from_decompressed = GraphTransliterator.load(decompressed_config)
# Compare JSON dumps with sorted keys.
assert json.dumps(gt.dump(), sort_keys=True) == json.dumps(
gt_from_decompressed.dump(), sort_keys=True
)
# Test bad compression level
with pytest.raises(ValueError):
gt.dump(compression_level=graphtransliterator.HIGHEST_COMPRESSION_LEVEL + 1)
# Test compression at level 0 (should likely not be called)
assert "compressed_settings" not in compression.compress_config(
gt.dump(), compression_level=0
)
# Test compression levels
assert '"tokens": ' in gt.dumps(compression_level=0)
assert '"compressed_settings"' in gt.dumps(compression_level=1)
assert '"compressed_settings"' in gt.dumps(compression_level=2)
for i in range(0, graphtransliterator.HIGHEST_COMPRESSION_LEVEL + 1):
x = gt.dumps(compression_level=i)
y = gt.loads(x)
assert y.transliterate("a") == "A"