In [356]:
DATA = """
42: 9 14 | 10 1
9: 14 27 | 1 26
10: 23 14 | 28 1
1: "a"
11: 42 31
5: 1 14 | 15 1
19: 14 1 | 14 14
12: 24 14 | 19 1
16: 15 1 | 14 14
31: 14 17 | 1 13
6: 14 14 | 1 14
2: 1 24 | 14 4
0: 8 11
13: 14 3 | 1 12
15: 1 | 14
17: 14 2 | 1 7
23: 25 1 | 22 14
28: 16 1
4: 1 1
20: 14 14 | 1 15
3: 5 14 | 16 1
27: 1 6 | 14 18
14: "b"
21: 14 1 | 1 14
25: 1 1 | 1 14
22: 14 14
8: 42
26: 14 22 | 1 20
18: 15 15
7: 14 5 | 1 21
24: 14 1

abbbbbabbbaaaababbaabbbbabababbbabbbbbbabaaaa
bbabbbbaabaabba
babbbbaabbbbbabbbbbbaabaaabaaa
aaabbbbbbaaaabaababaabababbabaaabbababababaaa
bbbbbbbaaaabbbbaaabbabaaa
bbbababbbbaaaaaaaabbababaaababaabab
ababaaaaaabaaab
ababaaaaabbbaba
baabbaaaabbaaaababbaababb
abbbbabbbbaaaababbbbbbaaaababb
aaaaabbaabaaaaababaa
aaaabbaaaabbaaa
aaaabbaabbaaaaaaabbbabbbaaabbaabaaa
babaaabbbaaabaababbaabababaaab
aabbbbbaabbbaaaaaabbbbbababaaaaabbaaabba
""".strip()

In [324]:
import re, six

In [372]:
class Solver:
    
    """
    Utility to solve AoC's 2020 19th task.
    """
    
    def __init__(self, data, applyTransforms=False):
        
        """
        Receives the task's input textual data and whether or not
        to apply the transformations detailed in the second part.
        
        :param data: str
        :param applyTransforms: bool
        """
        
        self.rules = {}
        self.messages = set()
        
        # The two known transformations are "telescoped" from
        # recursive form into a regex compliant form; however,
        # while the 8th can conveniently be stated as per the
        # below, I am not familiar with any that allows closed
        # form for the 11th. This would be '42 repeated N times
        # and 31 repeated N times'. While manually specifying
        # the first few realisations of this seems to work, a
        # more generic solution for converting the given recursive
        # form into closed regex work would be ideal.
        
        transforms = {
            8:  '(42)+',
            11: '42 31 | 42 42 31 31 | 42 42 42 31 31 31 | 42 42 42 42 31 31 31 31 | 42 42 42 42 42 31 31 31 31 31 | 42 42 42 42 42 42 31 31 31 31 31 31 | 42 42 42 42 42 42 42 31 31 31 31 31 31 31 | 42 42 42 42 42 42 42 31 31 31 31 31 31 31'
        } if applyTransforms else {}
        
        # Rules are stored by creating textual placeholders for
        # the rules they reference.
        
        for line in data.splitlines():
            if ': ' in line:
                i, content = line.split(': ')
                i, content = int(i), transforms.get(int(i), content).replace('"', '')
                self.rules[i] = content if content.isalpha() else re.sub(r'([0-9]+)', r'{r\1}', content).replace(' ', '')
            elif line.strip():
                self.messages.add(line.strip())
                
        self.patterns, self.pattern = self.makePatterns()
                
    def makePatterns(self):
        
        """
        Using the task's input, it constructs the regular expressions
        to eventually find the 0th expression which is used to validate
        the input messages.
        
        :return: dict(int, re.Pattern), re.Pattern
        """
        
        p = re.compile('{r([0-9]+)}')
        patterns = {i: v for i, v in six.iteritems(self.rules) if v.isalpha()}
        
        # The rules are known to be built as a dependency graph,
        # with the exception of some self-cycles in the second
        # task. Aside from those, it means that repeated iterations
        # can be formed by filling in the numeric placeholders
        # of the pattern with their actual regex patterns as
        # they become available with successive iterations.
        
        # For instance, two patterns are already given (i.e. 'a' and
        # 'b'): at the first iteration, all the pattern which use
        # at most 'a' and 'b' are formed, then all the ones which
        # use 'a', 'b' and any of the patterns formed at the 
        # previous iteration and so on.
        
        while len(patterns) != len(self.rules):
            for i in sorted(set(self.rules).difference(set(patterns))):
                pattern = self.rules[i]
                dependencies = set(map(int, p.findall(pattern))).difference({i,})
                if any(d not in patterns for d in dependencies): continue
                for d in dependencies:
                    replacement = '(' + patterns[d] + ')'
                    replacement = replacement.replace('(', '').replace(')', '') if '|' not in replacement else replacement
                    pattern = pattern.replace('{r' + str(d) + '}', replacement)
                patterns[i] = pattern
            
        patterns = {i: re.compile('^{}$'.format(v)) for i, v in six.iteritems(patterns)}
        return patterns, patterns[0]
    
    def count(self):
        return sum(bool(self.pattern.match(message)) for message in self.messages)

In [373]:
Solver(DATA, applyTransforms=False).count()

3

In [374]:
Solver(DATA, applyTransforms=True).count()

12