-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_automata.py
109 lines (88 loc) · 2.91 KB
/
test_automata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import pytest
import random
import re
from automata.impl import *
from automata.parser import Parser
from automata.tree import *
from collections.abc import Iterator
ENGINES = [PositionAutomata, FollowAutomata]
REGEX_NON_MATCHES = {
"a": ["b"],
"ab": ["a"],
"a*": ["b", "bbb"],
"a|b": ["c", "ab"],
"(a*)*b": ["a", "aa"],
}
def generate_non_matches() -> Iterator[tuple[str, str, type[Automata]]]:
for pattern, matches in REGEX_NON_MATCHES.items():
for match in matches:
for engine in ENGINES:
yield pattern, match, engine
@pytest.mark.parametrize("pattern, non_match, engine", generate_non_matches())
def test_non_matches(pattern: str, non_match: str, engine: type[Automata]):
assert not automata_match(pattern, non_match, engine)
PATTERNS = {
"a",
"ab",
"a*",
"a|b",
"(a*)*b",
"a*b",
"ba|b",
"b|ab",
"b|a|b",
"b|ab|b",
"b|a*|b",
"((ab)c)",
"(a(bc))",
"a|(b*c)|a",
"a(ba)*b|a",
"a(ba*b)*",
"a|b*a",
"a*b*",
"(ac*)*|b*ac",
"ae|bf|cg|dh",
"(a|b)(a*|ba*|b*)*",
}
def generate_matches(amount: int) -> Iterator[tuple[str, str, type[Automata]]]:
for pattern in PATTERNS:
node = Parser(pattern).parse()
for match in {make_match(node, 5) for _ in range(amount)}:
for engine in ENGINES:
yield pattern, match, engine
@pytest.mark.parametrize("pattern, match, engine", generate_matches(10))
def test_generated_matches(pattern: str, match: str, engine: type[Automata]):
# Always check conformance to Python's stdlib regex matching
assert re.match(pattern, match) is not None
assert automata_match(pattern, match, engine=engine)
@pytest.mark.parametrize("engine", ENGINES)
def test_empty_pattern_match(engine: type[Automata]):
assert automata_match("", "", engine=engine)
# Utils and tests for them
def make_match(node: Node, loops: int = 2) -> str:
match node:
case Symbol(sym, _):
return sym
case Star(node):
iters = random.randrange(loops)
return "".join(make_match(node, loops) for _ in range(iters))
case Concat(left, right):
return f"{make_match(left, loops)}{make_match(right, loops)}"
case Alt(left, right):
if random.randint(1, 2) == 1:
return make_match(left, loops)
return make_match(right, loops)
MADE_MATCHES = {
"a": {"a"},
"ab": {"ab"},
"a|b": {"a", "b"},
"a*": {"a" * i for i in range(3)},
"a(b|c)d": {"abd", "acd"},
"a(ba)*b|a": {"a", "ab", "aa", "abab", "abaa", "ababab", "ababaa"},
}
@pytest.mark.parametrize("regex, matches", MADE_MATCHES.items())
def test_make_match(regex: str, matches: set[str]):
node = Parser(regex).parse()
# We've probably made enough examples in 100 iterations
made_matches = {make_match(node, 3) for _ in range(100)}
assert made_matches.issubset(matches)