# --- Day 14: Extended Polymerization --- 

https://adventofcode.com/2021/day/14

## Get Input Data

In [1]:
def parse_data(filename):
    """Read in polymer template and pair insertion data."""
    
    with open(f'../inputs/{filename}') as file:
        lines = [line.strip() for line in file.readlines()]

    polymer_template = lines[0]

    pair_insertion_rules = {}
    for line in lines[2:]:
        # For each key, create a value that contains list of two items.
        # v[0] => The first two characters (this will get used most of the time)
        # v[1] => The full insertion string with all three characters
        # For most insertions, use v[0]; for the last insertion use v[1]
        pair_insertion_rules[line[:2]] = {
            '2_chars' : line[0] + line[-1], 
            '3_chars' : line[0] + line[-1] + line[1],
            'new_pairs' : [line[0] + line[-1], line[-1] + line[1]]
        }

    return polymer_template, pair_insertion_rules

In [2]:
test_polymer_template, test_pair_insertion_rules = parse_data('test_polymer_data.txt')
test_polymer_template, test_pair_insertion_rules

('NNCB',
 {'CH': {'2_chars': 'CB', '3_chars': 'CBH', 'new_pairs': ['CB', 'BH']},
  'HH': {'2_chars': 'HN', '3_chars': 'HNH', 'new_pairs': ['HN', 'NH']},
  'CB': {'2_chars': 'CH', '3_chars': 'CHB', 'new_pairs': ['CH', 'HB']},
  'NH': {'2_chars': 'NC', '3_chars': 'NCH', 'new_pairs': ['NC', 'CH']},
  'HB': {'2_chars': 'HC', '3_chars': 'HCB', 'new_pairs': ['HC', 'CB']},
  'HC': {'2_chars': 'HB', '3_chars': 'HBC', 'new_pairs': ['HB', 'BC']},
  'HN': {'2_chars': 'HC', '3_chars': 'HCN', 'new_pairs': ['HC', 'CN']},
  'NN': {'2_chars': 'NC', '3_chars': 'NCN', 'new_pairs': ['NC', 'CN']},
  'BH': {'2_chars': 'BH', '3_chars': 'BHH', 'new_pairs': ['BH', 'HH']},
  'NC': {'2_chars': 'NB', '3_chars': 'NBC', 'new_pairs': ['NB', 'BC']},
  'NB': {'2_chars': 'NB', '3_chars': 'NBB', 'new_pairs': ['NB', 'BB']},
  'BN': {'2_chars': 'BB', '3_chars': 'BBN', 'new_pairs': ['BB', 'BN']},
  'BB': {'2_chars': 'BN', '3_chars': 'BNB', 'new_pairs': ['BN', 'NB']},
  'BC': {'2_chars': 'BB', '3_chars': 'BBC', 'new_pairs'

In [3]:
polymer_template, pair_insertion_rules = parse_data('polymer_data.txt')

## Part 1
---

In [4]:
from more_itertools import pairwise
from collections import Counter

In [5]:
def calc_part_1(template, rules, num_steps):
    """Return the difference of the most and least common counts of a polymer template, 
    after pair insertion rules have been applied for num_steps.
    """

    for _ in range(num_steps):
        pairs = list(pairwise(template))

        next_template = ''
        for i, pair in enumerate(pairs):
            
            # Most insertions, append just the first two characters
            if i != len(pairs) - 1:
                next_template += ''.join(rules[''.join(pair)]['2_chars'])
            
            # For the last last insertion, insertion, append all three
            else:
                next_template += ''.join(rules[''.join(pair)]['3_chars'])

        template = next_template

    counts = Counter(template)

    most_common = counts.most_common()[0][1]
    least_common = counts.most_common()[-1][1]
    diff = most_common - least_common

    return diff

### Run on Test Data

In [6]:
calc_part_1(test_polymer_template, test_pair_insertion_rules, 10)  # Should return 1588

1588

### Run on Input Data

In [7]:
calc_part_1(polymer_template, pair_insertion_rules, 10)

3118

## Part 2
---

Part 2 is another "Your answer to Part 1 was probably too naive/inefficient." type of problem.

Gotta run this one out 40 steps (instead of 10).

I'm thinking I need to just keep track of the counts, and **NOT** try to build a new template at each step.

In [8]:
from copy import copy 

def calc_part_2(template, rules, num_steps):
    """Return the difference of the most and least common counts of a polymer template, 
    after pair insertion rules have been applied for num_steps.
    """

    first_char = template[0]

    pairs = [''.join(p) for p in pairwise(template)]
    pairs_counts = Counter(pairs)

    for _ in range(num_steps):

        old_pairs = pairs_counts.copy()
        new_pairs = Counter()

        for pair in list(pairs_counts):

            # Calc new pairs
            for p in rules[pair]['new_pairs']:
                new_pairs[p] += pairs_counts[pair]

        # Subtract pre-insertion pairs and add in new pairs
        pairs_counts = pairs_counts - old_pairs + new_pairs

    # After all the steps, only count up the second portion of each pair
    single_char_counts = Counter()
    for p in pairs_counts:
        single_char_counts[p[1]] += pairs_counts[p]

    # Then add in a count for the first character in the template
    single_char_counts[first_char] += 1

    most_common = single_char_counts.most_common()[0][1]
    least_common = single_char_counts.most_common()[-1][1]
    diff = most_common - least_common

    return diff

### Run on Test Data

In [9]:
calc_part_2(test_polymer_template, test_pair_insertion_rules, 10)  # Should return 1588, just like in part 1

1588

In [10]:
calc_part_2(test_polymer_template, test_pair_insertion_rules, 40)  # Should return 2188189693529

2188189693529

### Run on Input Data

In [11]:
calc_part_2(polymer_template, pair_insertion_rules, 40)

4332887448171