-
Notifications
You must be signed in to change notification settings - Fork 3
/
test_ambiguity.py
82 lines (73 loc) · 2.35 KB
/
test_ambiguity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# -*- coding: utf-8 -*-
"""
Tests for ambiguity checking and reporting.
"""
from graphtransliterator import (
AmbiguousTransliterationRulesException,
GraphTransliterator,
TransliterationRule,
)
from graphtransliterator.ambiguity import _easyreading_rule
import pytest
def test_GraphParser_check_ambiguity():
"""Test for rules that can both match the same thing."""
yaml_for_test = r"""
tokens:
a: [token, class1, class2]
b: [token, class1, class2]
' ': [wb]
rules:
a <class1>: A<class1> # these should be ambiguous
a <class2>: A<class2>
<class1> a: <class1>A # these should be ambiguous
<class2> a: <class2>A # these should be ambiguous
(<class1> b) a (b <class2>): A # ambigous
(<class2> b) a (b <class1>): A # ambiguous
a: A # not ambiguous
whitespace:
default: ' '
token_class: 'wb'
consolidate: true
"""
with pytest.raises(AmbiguousTransliterationRulesException):
GraphTransliterator.from_yaml(yaml_for_test, check_for_ambiguity=True)
# check that ambiguity matches if rules are of different shape
yaml = """
tokens:
a: []
' ': [wb]
rules:
<wb> a: _A
a <wb>: A_
a: a
' ': ' '
whitespace:
default: " " # default whitespace token
consolidate: true # whitespace should be consolidated
token_class: wb # whitespace token class
"""
with pytest.raises(AmbiguousTransliterationRulesException):
GraphTransliterator.from_yaml(yaml, check_for_ambiguity=True)
def test_GraphTransliterator_easy_reading():
assert (
_easyreading_rule(
TransliterationRule("", ["class_a"], [], ["a"], [], ["class_a"], 0)
)
== "<class_a> a <class_a>"
)
assert (
_easyreading_rule(
TransliterationRule("", ["class_a"], [], ["a"], [], ["class_a"], 0)
)
== "<class_a> a <class_a>"
)
assert (
_easyreading_rule(TransliterationRule("", [], ["b"], ["a"], ["b"], [], 0))
== "(b) a (b)"
)
assert (
_easyreading_rule(
TransliterationRule("", ["class_a"], ["b"], ["a"], ["b"], ["class_a"], 0)
)
== "(<class_a> b) a (b <class_a>)"
)