In [131]:
# Functions to load and transform data
import regex


def load_data(filename = "../data/day19test.txt"):
    """Load data and transform to dictionary of rules and list of messages."""
    with open(filename, "r") as f:
        input = f.read()

    parts = input.split("\n\n")
    rules = parts[0]
    msgs = parts[1].split("\n")

    rule_dict = {}
    for rule in rules.split("\n"):
        n = int(rule.split(":")[0])
        rule_to_parse = rule.split(":")[1].strip()
        if "|" in rule_to_parse:
            rule_type = "two_sequences"
            parts = rule_to_parse.split("|")
            value = [[int(value) for value in part.strip().split(" ")] for part in parts]
        elif "a" in rule_to_parse or "b" in rule_to_parse:
            rule_type = "letter"
            value = rule_to_parse.strip('"')
        else:
            rule_type = "sequence"
            value = [int(value) for value in rule_to_parse.strip().split(" ")]
        rule_dict[n] = (rule_type, value)

    return rule_dict, msgs


def solve_with_regex(parser, filename = "../data/day19.txt"):
    """Wrapper function to solve puzzle."""
    rule_dict, msgs = load_data(filename)
    regex_result_str = parser(rule_dict)
    #print(regex_result_str)
    regex_str = regex.compile("^" + regex_result_str + "$")
    valid_msgs = [msg for msg in msgs if regex_str.match(msg)]
    print(str(len(valid_msgs)) + " out of " + str(len(msgs)) + " messages are allowed")
    return len(valid_msgs)


In [132]:
# Puzzle 1 with regex

def parse_rule(rule_dict_parsed, n=0):
    rule_to_parse = rule_dict_parsed[n]
    rule_type = rule_to_parse[0]
    rule_value = rule_to_parse[1]

    if rule_type == "two_sequences":
        result_list_0 = [parse_rule(rule_dict_parsed, value) for value in rule_value[0]]
        result_list_1 = [parse_rule(rule_dict_parsed, value) for value in rule_value[1]]
        result = "(" + "".join(result_list_0) + "|" + "".join(result_list_1) + ")"
    if rule_type == "sequence":
        result_list = [parse_rule(rule_dict_parsed, value) for value in rule_value]
        result = "".join(result_list)
    if rule_type == "letter":
        result = rule_value
    return result


solve_with_regex(parse_rule, "../data/day19test.txt")
solve_with_regex(parse_rule, "../data/day19.txt")

2 out of 5 messages are allowed
124 out of 368 messages are allowed


124

In [133]:
# Puzzle 2 with regex

# Parts of it can be repeated
# 8: 42 | 42 8 > first part repeated n times
# 11: 42 31 | 42 11 31 > nested parts

# 42 31
# 42 42 31 31
# 42 42 42 31 31 31
# 42 42 42 42 31 31 31 31
# ...
# so only an equal number!
# ab
# aabb
# aaabbb
# https://stackoverflow.com/questions/17053438/use-regex-to-match-axbx-where-x-is-the-number-of-times-a-b-appear
# Works in regex101 but needs numbered reference to group which gets difficult when there are more groups (maybe named reference?)
# ^(a(?-1)?b)$ with relative reference -1 matches ab, aabb, aaabbb in regex101 but does not work in Python re or regex.
# Use named groups finally, and Python regex for that.

# New parser function
def parse_rule_2(rule_dict_parsed, n=0):
    rule_to_parse = rule_dict_parsed[n]
    rule_type = rule_to_parse[0]
    rule_value = rule_to_parse[1]

    if n == 8:
        result_42 = parse_rule_2(rule_dict_parsed, 42)
        result = "(" + result_42 + ")+" # repeat rule 42 >0 times
    elif n == 11:
        result_42 = parse_rule_2(rule_dict_parsed, 42)
        result_31 = parse_rule_2(rule_dict_parsed, 31)
        result = "(?P<group>" + result_42 + "(?P>group)?" + result_31 + ")" # 42 31 or 42 42 31 31 or 42 42 42 31 31 31, etc.
    else:
        if rule_type == "two_sequences":
            result_list_0 = [parse_rule_2(rule_dict_parsed, value) for value in rule_value[0]]
            result_list_1 = [parse_rule_2(rule_dict_parsed, value) for value in rule_value[1]]
            result = "(" + "".join(result_list_0) + "|" + "".join(result_list_1) + ")"
        if rule_type == "sequence":
            result_list = [parse_rule_2(rule_dict_parsed, value) for value in rule_value]
            result = "".join(result_list)
        if rule_type == "letter":
            result = rule_value
    return result


solve_with_regex(parse_rule, "../data/day19test2.txt")
solve_with_regex(parse_rule_2, "../data/day19test2.txt")
solve_with_regex(parse_rule_2, "../data/day19.txt")


3 out of 15 messages are allowed
12 out of 15 messages are allowed
228 out of 368 messages are allowed


228

In [137]:
# Some regex tests

# Works
regex_str = regex.compile("^(a(?1)?b)$") 
print(regex_str.match("aabb"))

# Does not work because of incorrect group reference
regex_str = regex.compile("^(x)(a(?1)?b)$") 
print(regex_str.match("xaabb"))

# Works
regex_str = regex.compile("^(x)(a(?2)?b)$") 
print(regex_str.match("xaabb"))

# Does not work, ?-1 not supported by Python regex
#regex_str = regex.compile("^(a(?-1)?b)$") 
#regex_str.match("aabb")

# Works
regex_str = regex.compile("^(?P<bla>a(?P>bla)?b)$") 
print(regex_str.match("aaabbb"))

# Works
regex_str = regex.compile("^((x)(?P<group>a(?P>group)?b))(y)$") 
print(regex_str.match("xaaabbby"))


<regex.Match object; span=(0, 4), match='aabb'>
None
<regex.Match object; span=(0, 5), match='xaabb'>
<regex.Match object; span=(0, 6), match='aaabbb'>
<regex.Match object; span=(0, 8), match='xaaabbby'>
