In [46]:
import re
from copy import deepcopy

def get_data(set = 0):
    data = open(r"data" + str(set) + ".txt").readlines()
    return list(map(str.strip, data))


PART 1 - Apply 10 steps of pair insertion to the polymer template and find the most and least common elements in the result. What do you get if you take the quantity of the most common element and subtract the quantity of the least common element?

PART 2 - Apply 40 steps of pair insertion to the polymer template and find the most and least common elements in the result. What do you get if you take the quantity of the most common element and subtract the quantity of the least common element?

In [49]:
### Part 1 and Part 2 ###

data_set = 1

starting_sequence = []
rules = {}

# parse data
for row in get_data(data_set):
    if m:= re.match('^[A-Z]+$', row):
        starting_sequence = [e for e in m[0]]

    if m := re.match('^([A-Z]{2}) -> ([A-Z])$', row):
        rules[m[1]] = m[2]


# rules_map dictionary stores the result of one sustitution in gene terms
# one step of CH gene results in one CB and one BH gene
rules_map = {}
for k in rules.keys():
    result1 = k[0] + rules[k]
    result2 = rules[k] +k[1]
    rules_map[k] = {result1:1, result2:1}

# where the magic happens
def polymer_steps(max_steps, rules, starting_sequence):

    # zero out our results dictionaries
    calculated_element_count = {v:0 for v in rules.values()}
    gene_counts = next_gene_counts = {k:0 for k in rules.keys()}

    # starting conditions

    # genes in the starting sequence
    for i, g in enumerate(starting_sequence):
        # stop just before the last element
        if i == len(starting_sequence) - 1: break
        gene_counts[starting_sequence[i] + starting_sequence[i+1]] += 1

    # elements in the starting sequence
    for e in starting_sequence:
        calculated_element_count[e] += 1

    # polymerize for max_steps
    for step in range(max_steps):

        # clear next gene counts
        next_gene_counts = {k:0 for k in rules.keys()}

        # for each gene
        for g in gene_counts.keys():

            # for each rule map result for each gene
            for h in rules_map[g].keys():

                # count of each parent gene add to child genes
                next_gene_counts[h] += gene_counts[g]

            # each parent gene adds one element count to the total
            calculated_element_count[rules[g]] += gene_counts[g]

        # reset gene counts for the next step
        gene_counts = deepcopy(next_gene_counts)

    # return the results dictionaries
    return gene_counts, calculated_element_count

print(f"Starting Sequence: {''.join(starting_sequence)}")

part_1_steps = 10

gene_counts, calculated_element_count = polymer_steps(part_1_steps, rules, starting_sequence)

print(f"PART 1 - The count of the most common element minus the count "
    + f"of the least common element is " + 
    f"{max(calculated_element_count.values()) - min(calculated_element_count.values())}.")


part_2_steps = 40

gene_counts, calculated_element_count = polymer_steps(part_2_steps, rules, starting_sequence)

print(f"PART 2 - The count of the most common element minus the count "
    + f"of the least common element is " + 
    f"{max(calculated_element_count.values()) - min(calculated_element_count.values())}.")



Starting Sequence: HHKONSOSONSVOFCSCNBC
PART 1 - The count of the most common element minus the count of the least common element is 2657.
PART 2 - The count of the most common element minus the count of the least common element is 2911561572630.
