In [42]:
example = """
0: 4 1 5
1: 2 3 | 3 2
2: 4 4 | 5 5
3: 4 5 | 5 4
4: "a"
5: "b"

ababbb
bababa
abbbab
aaabbb
aaaabbb
"""

with open("day19.txt", "r") as f:
    data = f.read()

In [67]:
from typing import Dict, List
from re import fullmatch

class Rule(object):
    def __init__(self, id: int):
        self.id = id

    def to_regex(self, rules: Dict[int, "Rule"]) -> str:
        raise NotImplementedError()

class PatternRule(Rule):
    def __init__(self, id: int, pattern: str):
        super().__init__(id)
        self.pattern = pattern

    def to_regex(self, rules: Dict[int, "Rule"]) -> str:
        return self.pattern

    def __eq__(self, other):
        return isinstance(other, PatternRule) and self.id == other.id and self.pattern == other.pattern

    def __repr__(self):
        return f"PatternRule({self.id}, '{self.pattern}')"

class SequencesRule(Rule):
    def __init__(self, id: int, sequences: List[List[int]]):
        super().__init__(id)
        self.sequences = sequences

    def to_regex(self, rules: Dict[int, "Rule"]) -> str:
        return '(?:' + "|".join(
            "".join(rules[seqid].to_regex(rules) for seqid in sequence)
            for sequence in self.sequences
        ) + ')'

    def __eq__(self, other):
        return isinstance(other, SequencesRule) and self.id == other.id and self.sequences == other.sequences

    def __repr__(self):
        return f"SequencesRule({self.id}, {repr(self.sequences)})"

class RegexRule(Rule):
    def __init__(self, id: int, pattern: str):
        self.id = id
        self.pattern = pattern

    def to_regex(self, rules: Dict[int, "Rule"]) -> str:
        return self.pattern

    def __eq__(self, other):
        return isinstance(other, RegexRule) and self.id == other.id and self.pattern == other.pattern

    def __repr__(self):
        return f"RegexRule({self.id}, '{self.pattern}')"

test_rules = {
    rule.id: rule
    for rule in [
        SequencesRule(0, [[4, 1, 5]]),
        SequencesRule(1, [[2, 3], [3, 2]]),
        SequencesRule(2, [[4, 4], [5, 5]]),
        SequencesRule(3, [[4, 5], [5, 4]]),
        PatternRule(4, "a"),
        PatternRule(5, "b")
    ]
}

test_cases = {
    4: r'a',
    5: r'b',
    2: r'(?:aa|bb)',
    3: r'(?:ab|ba)',
    1: r'(?:(?:aa|bb)(?:ab|ba)|(?:ab|ba)(?:aa|bb))',
    0: r'(?:a(?:(?:aa|bb)(?:ab|ba)|(?:ab|ba)(?:aa|bb))b)'
}

for test_id, test_regex in test_cases.items():
    print(f" #{test_id}: {test_rules[test_id].to_regex(test_rules)} (GOT)")
    print(f" vs: {test_regex} (EXPECTED)\n")

    assert test_rules[test_id].to_regex(test_rules) == test_regex

test_cases = [
    "aaaabb",
    "aaabab",
    "abbabb",
    "abbbab",
    "aabaab",
    "aabbbb",
    "abaaab",
    "ababbb"
]

for test_case in test_cases:
    print(f"Testing {test_case} (should match)")
    assert fullmatch(test_rules[0].to_regex(test_rules), test_case) is not None
    print("PASS\n")

 #4: a (GOT)
 vs: a (EXPECTED)

 #5: b (GOT)
 vs: b (EXPECTED)

 #2: (?:aa|bb) (GOT)
 vs: (?:aa|bb) (EXPECTED)

 #3: (?:ab|ba) (GOT)
 vs: (?:ab|ba) (EXPECTED)

 #1: (?:(?:aa|bb)(?:ab|ba)|(?:ab|ba)(?:aa|bb)) (GOT)
 vs: (?:(?:aa|bb)(?:ab|ba)|(?:ab|ba)(?:aa|bb)) (EXPECTED)

 #0: (?:a(?:(?:aa|bb)(?:ab|ba)|(?:ab|ba)(?:aa|bb))b) (GOT)
 vs: (?:a(?:(?:aa|bb)(?:ab|ba)|(?:ab|ba)(?:aa|bb))b) (EXPECTED)

Testing aaaabb (should match)
PASS

Testing aaabab (should match)
PASS

Testing abbabb (should match)
PASS

Testing abbbab (should match)
PASS

Testing aabaab (should match)
PASS

Testing aabbbb (should match)
PASS

Testing abaaab (should match)
PASS

Testing ababbb (should match)
PASS



In [68]:
from re import findall, MULTILINE

class Parser(object):
    @staticmethod
    def parse_rules(text: str) -> Dict[int, Rule]:
        return {
            rule.id: rule for rule in [
                *Parser.parse_pattern_rules(text),
                *Parser.parse_sequences_rules(text)
            ]
        }

    @staticmethod
    def parse_messages(text: str) -> List[str]:
        return [
            result for result in findall(r'^[^\:\n]+$', text, MULTILINE)
        ]

    @staticmethod
    def parse_pattern_rules(text: str) -> List[PatternRule]:
        return [
            PatternRule(int(result[0]), result[1]) for result in findall(r'^(\d+): "(\w+)"$', text, MULTILINE)
        ]

    @staticmethod
    def parse_sequences_rules(text: str) -> List[SequencesRule]:
        return [
            SequencesRule(int(result[0]), [
                [int(entry.strip()) for entry in group.strip().split(' ')]
                for group in result[1].split('|')
            ]) for result in findall(r'^(\d+): (\d+(?: \d+)*(?: \| \d+(?: \d+)*)*)$', text, MULTILINE)
        ]

assert Parser.parse_pattern_rules(example) == [
    PatternRule(4, "a"),
    PatternRule(5, "b")
]

assert Parser.parse_sequences_rules(example) == [
    SequencesRule(0, [[4, 1, 5]]),
    SequencesRule(1, [[2, 3], [3, 2]]),
    SequencesRule(2, [[4, 4], [5, 5]]),
    SequencesRule(3, [[4, 5], [5, 4]]),
]

assert Parser.parse_messages(example) == [
    "ababbb",
    "bababa",
    "abbbab",
    "aaabbb",
    "aaaabbb",
]


In [78]:
example = """
42: 9 14 | 10 1
9: 14 27 | 1 26
10: 23 14 | 28 1
1: "a"
11: 42 31
5: 1 14 | 15 1
19: 14 1 | 14 14
12: 24 14 | 19 1
16: 15 1 | 14 14
31: 14 17 | 1 13
6: 14 14 | 1 14
2: 1 24 | 14 4
0: 8 11
13: 14 3 | 1 12
15: 1 | 14
17: 14 2 | 1 7
23: 25 1 | 22 14
28: 16 1
4: 1 1
20: 14 14 | 1 15
3: 5 14 | 16 1
27: 1 6 | 14 18
14: "b"
21: 14 1 | 1 14
25: 1 1 | 1 14
22: 14 14
8: 42
26: 14 22 | 1 20
18: 15 15
7: 14 5 | 1 21
24: 14 1

abbbbbabbbaaaababbaabbbbabababbbabbbbbbabaaaa
bbabbbbaabaabba
babbbbaabbbbbabbbbbbaabaaabaaa
aaabbbbbbaaaabaababaabababbabaaabbababababaaa
bbbbbbbaaaabbbbaaabbabaaa
bbbababbbbaaaaaaaabbababaaababaabab
ababaaaaaabaaab
ababaaaaabbbaba
baabbaaaabbaaaababbaababb
abbbbabbbbaaaababbbbbbaaaababb
aaaaabbaabaaaaababaa
aaaabbaaaabbaaa
aaaabbaabbaaaaaaabbbabbbaaabbaabaaa
babaaabbbaaabaababbaabababaaab
aabbbbbaabbbaaaaaabbbbbababaaaaabbaaabba
"""

from re import fullmatch
rules = Parser.parse_rules(example)
messages = Parser.parse_messages(example)

print(f"Parsed {len(rules)} rules and {len(messages)} messages")

rule0_regex = rules[0].to_regex(rules)
matching_messages = sum(1 for message in messages if fullmatch(rule0_regex, message) is not None)
print(f"Matching Messages (Example 1): {matching_messages}")

# We can cheat here by manually introducing the regexes required to match these repeating patterns and letting
# the regex engine do the heavy lifting for us. Due to the way our rules are defined, this is pretty easy to
# accomplish.
rules[8] = RegexRule(8, f"(?:{rules[42].to_regex(rules)})+")
rules[11] = RegexRule(11, '(?:' + "|".join(rules[42].to_regex(rules)*i + rules[31].to_regex(rules)*i for i in range(1, 10)) + ')')
rule0_regex = rules[0].to_regex(rules)
matching_messages = sum(1 for message in messages if fullmatch(rule0_regex, message) is not None)
print(f"Matching Messages (Example 2): {matching_messages}")

Parsed 31 rules and 15 messages
Matching Messages (Example 1): 3
Matching Messages (Example 2): 12


In [79]:
from re import fullmatch
rules = Parser.parse_rules(data)
messages = Parser.parse_messages(data)

print(f"Parsed {len(rules)} rules and {len(messages)} messages")

rule0_regex = rules[0].to_regex(rules)
matching_messages = sum(1 for message in messages if fullmatch(rule0_regex, message) is not None)
print(f"Matching Messages (Part 1): {matching_messages}")

rules[8] = RegexRule(8, f"(?:{rules[42].to_regex(rules)})+")
rules[11] = RegexRule(11, '(?:' + "|".join(rules[42].to_regex(rules)*i + rules[31].to_regex(rules)*i for i in range(1, 10)) + ')')
rule0_regex = rules[0].to_regex(rules)
matching_messages = sum(1 for message in messages if fullmatch(rule0_regex, message) is not None)
print(f"Matching Messages (Part 2): {matching_messages}")

Parsed 132 rules and 332 messages
Matching Messages (Part 1): 147
Matching Messages (Part 2): 263
