# Day 19
## Part 1

Create a regular expression.

In [45]:
import re
from functools import lru_cache
# Import hashable dictionary for caching
from pyrsistent import pmap


def parse_data(data):
    rules, messages = data.split('\n\n')
    return (parse_rules(rules.splitlines()), messages.strip().splitlines())


def parse_rules(lines):
    rules = {}
    for line in lines:
        k, v = line.split(': ')
        rules[k] = v
    return pmap(rules)


@lru_cache(maxsize=None)
def rule_regex(rules, n):
    if rules[n][0] == '"':
        return rules[n][1]
    else:
        alternates = rules[n].split(' | ')
        alt_regexes = [
            ''.join(rule_regex(rules, x) for x in alt.split())
            for alt in alternates
        ]
        if len(alt_regexes) == 1:
            return alt_regexes[0]
        else:
            return f"({'|'.join(alt_regexes)})"
    
    
def part_1(data):
    rules, messages = parse_data(data)
    rule = rule_regex(rules, "0")
    print(rule)
    regex = re.compile(rule)
    return sum(1 for m in messages if re.fullmatch(regex, m))

In [46]:
test_data = '''0: 4 1 5
1: 2 3 | 3 2
2: 4 4 | 5 5
3: 4 5 | 5 4
4: "a"
5: "b"

ababbb
bababa
abbbab
aaabbb
aaaabbb
'''

part_1(test_data)

a((aa|bb)(ab|ba)|(ab|ba)(aa|bb))b


2

In [47]:
data = open('input').read()
part_1(data)

((b((a(a((b|a)(b|a)a|abb)|b(b(bb|ab)|abb))|b((aaa|(ba|bb)b)a|(aab|b(a(b|a)|ba))b))a|(a(b((a(b|a)|ba)b|(bb|ab)a)|a((a(b|a)|bb)a|(ab|aa)b))|b(a(b(ab|aa)|aba)|b(b(a(b|a)|bb)|a(ba|bb))))b)|a(b(((a(b|a)|ba)aa|(b(bb|aa)|aab)b)a|(((bb|aa)a|aab)a|((a(b|a)|ba)b|(ba|ab)a)b)b)|a((abaa|b((bb|aa)a|aab))b|(a(a(bb|ab)|bab)|b((a(b|a)|ba)b|aba))a)))b|((b(a((b(ba|bb)|a(ba|(b|a)b))b|((ba|(b|a)b)a|(ba|bb)b)a)|b(a((bb|(b|a)a)a|(a(b|a)|ba)b)|b((a(b|a)|ba)b|(ba|bb)a)))|a(a((a(ba|bb)|b(ba|ab))b|((bb|aa)a|bbb)a)|b(((bb|ab)b|(bb|aa)a)b|((bb|(b|a)a)a|(bb|aa)b)a)))b|((b(a(bbb|a(ba|(b|a)b))|b((bb|aa)b|bba))|a((a(bb|ab)|b(ba|(b|a)b))b|((ab|aa)b|(a(b|a)|ba)a)a))a|(a(((ab|aa)b|(bb|ab)a)b|aaaa)|b(((ba|(b|a)b)a|(ba|bb)b)a|((ab|aa)b|(b|a)(b|a)a)b))b)a)a)((b((a(a((b|a)(b|a)a|abb)|b(b(bb|ab)|abb))|b((aaa|(ba|bb)b)a|(aab|b(a(b|a)|ba))b))a|(a(b((a(b|a)|ba)b|(bb|ab)a)|a((a(b|a)|bb)a|(ab|aa)b))|b(a(b(ab|aa)|aba)|b(b(a(b|a)|bb)|a(ba|bb))))b)|a(b(((a(b|a)|ba)aa|(b(bb|aa)|aab)b)a|(((bb|aa)a|aab)a|((a(b|a)|ba)b|(ba|ab)a)b)b)|a((a

210

## Part 2

The loops means that the elements are repeated any number of times. I'm afraid I'm going to be lazy and hardcode this.

In [72]:
@lru_cache(maxsize=None)
def rule_regex_2(rules, n):
    if rules[n][0] == '"':
        return rules[n][1]
    elif n == '8':
        return f'({rule_regex_2(rules, "42")})+'
    elif n == '11':
        # Need to match 42 31, 42 42 31 31, 42 42 42 31 31 31 etc
        # I can't find a way of doing this within a regular
        # expression so...
        alt_regexes = [
            f'(({rule_regex_2(rules, "42")}){{{i}}}({rule_regex_2(rules, "31")}){{{i}}})'
            for i in range(1, 11)
        ]
        return f"({'|'.join(alt_regexes)})"
    else:
        alternates = rules[n].split(' | ')
        alt_regexes = [
            ''.join(rule_regex_2(rules, x) for x in alt.split())
            for alt in alternates
        ]
        if len(alt_regexes) == 1:
            return alt_regexes[0]
        else:
            return f"({'|'.join(alt_regexes)})"
    
    
def part_2(data):
    rules, messages = parse_data(data)
    rule = rule_regex_2(rules, "0")
    print(rule)
    regex = re.compile(rule)
    return sum(1 for m in messages if re.fullmatch(regex, m))

In [73]:
test_data_2 = '''42: 9 14 | 10 1
9: 14 27 | 1 26
10: 23 14 | 28 1
1: "a"
11: 42 31
5: 1 14 | 15 1
19: 14 1 | 14 14
12: 24 14 | 19 1
16: 15 1 | 14 14
31: 14 17 | 1 13
6: 14 14 | 1 14
2: 1 24 | 14 4
0: 8 11
13: 14 3 | 1 12
15: 1 | 14
17: 14 2 | 1 7
23: 25 1 | 22 14
28: 16 1
4: 1 1
20: 14 14 | 1 15
3: 5 14 | 16 1
27: 1 6 | 14 18
14: "b"
21: 14 1 | 1 14
25: 1 1 | 1 14
22: 14 14
8: 42
26: 14 22 | 1 20
18: 15 15
7: 14 5 | 1 21
24: 14 1

abbbbbabbbaaaababbaabbbbabababbbabbbbbbabaaaa
bbabbbbaabaabba
babbbbaabbbbbabbbbbbaabaaabaaa
aaabbbbbbaaaabaababaabababbabaaabbababababaaa
bbbbbbbaaaabbbbaaabbabaaa
bbbababbbbaaaaaaaabbababaaababaabab
ababaaaaaabaaab
ababaaaaabbbaba
baabbaaaabbaaaababbaababb
abbbbabbbbaaaababbbbbbaaaababb
aaaaabbaabaaaaababaa
aaaabbaaaabbaaa
aaaabbaabbaaaaaaabbbabbbaaabbaabaaa
babaaabbbaaabaababbaabababaaab
aabbbbbaabbbaaaaaabbbbbababaaaaabbaaabba'''

part_1(test_data_2)

((b(a(bb|ab)|b(a|b)(a|b))|a(bbb|a(bb|a(a|b))))b|(((aa|ab)a|bbb)b|((a|b)a|bb)aa)a)((b(a(bb|ab)|b(a|b)(a|b))|a(bbb|a(bb|a(a|b))))b|(((aa|ab)a|bbb)b|((a|b)a|bb)aa)a)(b(b(aba|baa)|a(b(ab|(a|b)a)|a(ba|ab)))|a(b((ab|(a|b)a)b|((a|b)a|bb)a)|a(bab|(ba|bb)a)))


3

In [74]:
test_data_3 = '''42: 9 14 | 10 1
9: 14 27 | 1 26
10: 23 14 | 28 1
1: "a"
11: 42 31 | 42 11 31
5: 1 14 | 15 1
19: 14 1 | 14 14
12: 24 14 | 19 1
16: 15 1 | 14 14
31: 14 17 | 1 13
6: 14 14 | 1 14
2: 1 24 | 14 4
0: 8 11
13: 14 3 | 1 12
15: 1 | 14
17: 14 2 | 1 7
23: 25 1 | 22 14
28: 16 1
4: 1 1
20: 14 14 | 1 15
3: 5 14 | 16 1
27: 1 6 | 14 18
14: "b"
21: 14 1 | 1 14
25: 1 1 | 1 14
22: 14 14
8: 42 | 42 8
26: 14 22 | 1 20
18: 15 15
7: 14 5 | 1 21
24: 14 1

abbbbbabbbaaaababbaabbbbabababbbabbbbbbabaaaa
bbabbbbaabaabba
babbbbaabbbbbabbbbbbaabaaabaaa
aaabbbbbbaaaabaababaabababbabaaabbababababaaa
bbbbbbbaaaabbbbaaabbabaaa
bbbababbbbaaaaaaaabbababaaababaabab
ababaaaaaabaaab
ababaaaaabbbaba
baabbaaaabbaaaababbaababb
abbbbabbbbaaaababbbbbbaaaababb
aaaaabbaabaaaaababaa
aaaabbaaaabbaaa
aaaabbaabbaaaaaaabbbabbbaaabbaabaaa
babaaabbbaaabaababbaabababaaab
aabbbbbaabbbaaaaaabbbbbababaaaaabbaaabba'''

part_2(test_data_3)

(((b(a(bb|ab)|b(a|b)(a|b))|a(bbb|a(bb|a(a|b))))b|(((aa|ab)a|bbb)b|((a|b)a|bb)aa)a))+(((((b(a(bb|ab)|b(a|b)(a|b))|a(bbb|a(bb|a(a|b))))b|(((aa|ab)a|bbb)b|((a|b)a|bb)aa)a)){1}((b(b(aba|baa)|a(b(ab|(a|b)a)|a(ba|ab)))|a(b((ab|(a|b)a)b|((a|b)a|bb)a)|a(bab|(ba|bb)a)))){1})|((((b(a(bb|ab)|b(a|b)(a|b))|a(bbb|a(bb|a(a|b))))b|(((aa|ab)a|bbb)b|((a|b)a|bb)aa)a)){2}((b(b(aba|baa)|a(b(ab|(a|b)a)|a(ba|ab)))|a(b((ab|(a|b)a)b|((a|b)a|bb)a)|a(bab|(ba|bb)a)))){2})|((((b(a(bb|ab)|b(a|b)(a|b))|a(bbb|a(bb|a(a|b))))b|(((aa|ab)a|bbb)b|((a|b)a|bb)aa)a)){3}((b(b(aba|baa)|a(b(ab|(a|b)a)|a(ba|ab)))|a(b((ab|(a|b)a)b|((a|b)a|bb)a)|a(bab|(ba|bb)a)))){3})|((((b(a(bb|ab)|b(a|b)(a|b))|a(bbb|a(bb|a(a|b))))b|(((aa|ab)a|bbb)b|((a|b)a|bb)aa)a)){4}((b(b(aba|baa)|a(b(ab|(a|b)a)|a(ba|ab)))|a(b((ab|(a|b)a)b|((a|b)a|bb)a)|a(bab|(ba|bb)a)))){4})|((((b(a(bb|ab)|b(a|b)(a|b))|a(bbb|a(bb|a(a|b))))b|(((aa|ab)a|bbb)b|((a|b)a|bb)aa)a)){5}((b(b(aba|baa)|a(b(ab|(a|b)a)|a(ba|ab)))|a(b((ab|(a|b)a)b|((a|b)a|bb)a)|a(bab|(ba|bb)a)))){5})|((((b

12

In [75]:
data_2 = open('input2').read()
part_2(data_2)

(((b((a(a((b|a)(b|a)a|abb)|b(b(bb|ab)|abb))|b((aaa|(ba|bb)b)a|(aab|b(a(b|a)|ba))b))a|(a(b((a(b|a)|ba)b|(bb|ab)a)|a((a(b|a)|bb)a|(ab|aa)b))|b(a(b(ab|aa)|aba)|b(b(a(b|a)|bb)|a(ba|bb))))b)|a(b(((a(b|a)|ba)aa|(b(bb|aa)|aab)b)a|(((bb|aa)a|aab)a|((a(b|a)|ba)b|(ba|ab)a)b)b)|a((abaa|b((bb|aa)a|aab))b|(a(a(bb|ab)|bab)|b((a(b|a)|ba)b|aba))a)))b|((b(a((b(ba|bb)|a(ba|(b|a)b))b|((ba|(b|a)b)a|(ba|bb)b)a)|b(a((bb|(b|a)a)a|(a(b|a)|ba)b)|b((a(b|a)|ba)b|(ba|bb)a)))|a(a((a(ba|bb)|b(ba|ab))b|((bb|aa)a|bbb)a)|b(((bb|ab)b|(bb|aa)a)b|((bb|(b|a)a)a|(bb|aa)b)a)))b|((b(a(bbb|a(ba|(b|a)b))|b((bb|aa)b|bba))|a((a(bb|ab)|b(ba|(b|a)b))b|((ab|aa)b|(a(b|a)|ba)a)a))a|(a(((ab|aa)b|(bb|ab)a)b|aaaa)|b(((ba|(b|a)b)a|(ba|bb)b)a|((ab|aa)b|(b|a)(b|a)a)b))b)a)a))+(((((b((a(a((b|a)(b|a)a|abb)|b(b(bb|ab)|abb))|b((aaa|(ba|bb)b)a|(aab|b(a(b|a)|ba))b))a|(a(b((a(b|a)|ba)b|(bb|ab)a)|a((a(b|a)|bb)a|(ab|aa)b))|b(a(b(ab|aa)|aba)|b(b(a(b|a)|bb)|a(ba|bb))))b)|a(b(((a(b|a)|ba)aa|(b(bb|aa)|aab)b)a|(((bb|aa)a|aab)a|((a(b|a)|ba)b|(ba|ab)a)b)b

422

That was hard work. I'm debugging regular expressions on the first day of a fortnight off work, I need to rethink my life.