In [1]:
import os
from pathlib import Path
from collections import Counter

FOLDER = Path(os.path.dirname(os.path.realpath("__file__"))) / 'data'
in_file = 'day14.txt'

with open(FOLDER / in_file) as f:
    template = f.readline().strip()
    next(f)
    subs = dict(s.strip().split(' -> ') for s in f)
    
template, subs['CO']

('VPPHOPVVSFSVFOCOSBKF', 'B')

#### Keeping track of count of each pair

The input string is divided into pairs: `VP`, `PP`, `PH`, etc.

On each iteratation each of those pairs contributes two new pairs. The goal is to track only the counts of the pairs. For example, if our initial data has 2 `VP -> B` pairs, the next iteration will have 2 `VB` and 2 `BP` pairs. The counts will go from `{VP: 2}` to `{VB: 2, BP: 2}`. 

At the end be mindful that the second letter in each pair occurs twice. So `{VB: 2, BP: 2}` only represents 2 B characters not 4 becuase each`VP` was transformed into `VBP`. When counting letters, just count the first of each pair. This will leave the final letter uncounted, so add it back.

In [24]:
# The last character of the template is always the last character after 
# substitutions. This character will be counted one too few times in the code below

LAST_CHAR = template[-1]

# initial pair counts
initial_counts = Counter(''.join(p) for p in zip(template, template[1:]))

# map each letter pair to the two pairs it creates
# i.e: {'CO': ('CB', 'BO'),...}
pair_lookup = {k: (k[0]+v, v+k[1]) for k,v in subs.items()}

def increase_counts(current_counts, lookup):
    ''''
    On each iteration the counts of the values will increase by
    the number of times we have that pair in current_counts.
    '''
    counter = Counter()
    
    for pair,current_count in current_counts.items():
        for new_pair in lookup[pair]:
            counter[new_pair] += current_count
    
    return counter


def run_substitutions(n, counts, lookup):
    '''Iterate n times updating counts'''
    for _ in range(n):
        counts = increase_counts(counts, lookup)
    return counts


def count_leters_from_pairs(counts, LAST_CHAR):
    '''
    The count of the letters will be the first of each pair 
    because the second in each pair is repeated. This leaves
    the final letter uncounted, so add it back
    '''
    total_counts = Counter()

    for (letter1, letter2), total in counts.items():
        total_counts[letter1] += total
    total_counts[LAST_CHAR] += 1
    
    return total_counts.most_common(1)[0][1], total_counts.most_common()[-1][1]



### Problem One
Run 10 times

In [25]:
pair_counts = run_substitutions(10, initial_counts, pair_lookup)
most, least = count_leters_from_pairs(pair_counts, LAST_CHAR)
most - least

2233

### Problem Two
Run 40 times

In [36]:
pair_counts = run_substitutions(40, initial_counts, pair_lookup)
most, least = count_leters_from_pairs(pair_counts, LAST_CHAR)
most - least

2884513602164