# **Day 8: Haunted Wasteland**

# Setup
The cells below will set up the rest of the notebook. 

I'll start by configuring my kernel:

In [1]:
# Changing the current working directory
%cd ..

# Enabling the autoreload extension
%load_ext autoreload
%autoreload 2

d:\data\programming\advent-of-code-2023


Now, I'm going to import some libraries:

In [14]:
# Import statements
import pandas as pd
from tqdm import tqdm
import re

Finally, I'll load in the data for this puzzle. 

In [3]:
# Load in the data for the puzzle
day = 8
input_data_path = f"data/input-files/day-{day:02d}-input.txt"
example_data_path = f"data/example-input/day-{day:02d}-example.txt"
with open(input_data_path, "r") as txt_file:
    input_data = txt_file.readlines()

# Parsing the Puzzle Input
This one seems deceptively easy... I'm a little suspicuous about Part 2. 

In [4]:
# Parse a DataFrame from the input_data
direction_sequence = input_data[0].strip()
node_info_str_list = [x.strip() for x in input_data[2:]]
node_info_df = pd.DataFrame.from_records(
    [
        {
            "node_name": node_info.split(" = ")[0],
            "left_node": node_info.split(" = ")[1].split(",")[0][1:],
            "right_node": node_info.split(" = ")[1].split(",")[1][1:-1],
        }
        for node_info in node_info_str_list
    ]
)

# Make a dictionary version of the data from the node_info_df
node_dict = {row.node_name: {
    "L": row.left_node,
    "R": row.right_node,
    } for row in node_info_df.itertuples()}

# Traversal Method
Now, I'm going to write a method that'll traverse the nodes from AAA to ZZZ. 

In [5]:
def traverse_nodes(directions, nodes):
    """
    This method will follow a set of directions through a set of nodes.
    """
    
    # Create a while loop 
    visited_nodes = 0
    visited_nodes_path = []
    cur_node = "AAA"
    while cur_node != "ZZZ": 
        cur_direction_idx = visited_nodes % len(directions)
        cur_direction = directions[cur_direction_idx]
        
        visited_nodes_path.append(cur_node)
        cur_node = nodes.get(cur_node).get(cur_direction)
        visited_nodes += 1

    # Return the number of nodes traveled, as well as the path 
    return visited_nodes, visited_nodes_path

# Run the input data through the method 
n_visited_nodes, visited_nodes_path = traverse_nodes(direction_sequence, node_dict)

# Print the number of visited nodes
print(f"You encounter '{n_visited_nodes}' nodes during your travels.")

You encounter '14429' nodes during your travels.


# Part 2: Simultaneous Node Travel
In the second part, it seems like you need to simultaneously travel through all of the nodes that end with "A", and then figure out when all of the nodes end up at a node ending in "Z". 

I feel like there may be a mathy way to do this, but I'd first need to understand the patterns behind each of the nodes. I'll start by modifying my traversal method from Part 1 a bit. 

In [11]:
def traverse_nodes_controlled(directions, nodes, start_node="AAA", n_steps=100000):
    """
    This method will follow a set of directions through a set of nodes.
    """
    
    # Create a while loop 
    visited_nodes = 0
    visited_nodes_path = []
    cur_node = start_node
    while visited_nodes < n_steps: 
        cur_direction_idx = visited_nodes % len(directions)
        cur_direction = directions[cur_direction_idx]
        visited_nodes_path.append(cur_node)
        cur_node = nodes.get(cur_node).get(cur_direction)
        visited_nodes += 1

    # Return the number of nodes traveled, as well as the path 
    return visited_nodes, visited_nodes_path

With this method in hand, I'll try and determine how often each node touches an "end" node: 

In [19]:
# Determine all of the starting nodes
start_nodes = [node for node in node_dict.keys() if node.endswith("A")]

# Iterate through each of them and find their paths
n_iterations = 10000000
start_node_to_path_dict = {}
for node in tqdm(start_nodes):
    _, path = traverse_nodes_controlled(
        direction_sequence, node_dict, start_node=node, n_steps=n_iterations
    )
    start_node_to_path_dict[node] = path

# Make a DataFrame of the results and the number of steps
start_node_path_period_df = pd.DataFrame(
    [
        {
            "start_node": start_node,
            "n_steps_for_end_nodes": [
                idx for idx, node in enumerate(path) if node.endswith("Z")
            ],
        }
        for start_node, path in start_node_to_path_dict.items()
    ]
)

start_node_path_period_df["period"] = start_node_path_period_df["n_steps_for_end_nodes"].apply(
    lambda x: [val - x[idx-1] for idx, val in enumerate(x) if idx > 0]
)

start_node_path_period_df

100%|██████████| 6/6 [00:10<00:00,  1.75s/it]


Unnamed: 0,start_node,n_steps_for_end_nodes,period
0,DNA,"[20569, 41138, 61707, 82276, 102845, 123414, 1...","[20569, 20569, 20569, 20569, 20569, 20569, 205..."
1,HNA,"[18727, 37454, 56181, 74908, 93635, 112362, 13...","[18727, 18727, 18727, 18727, 18727, 18727, 187..."
2,AAA,"[14429, 28858, 43287, 57716, 72145, 86574, 101...","[14429, 14429, 14429, 14429, 14429, 14429, 144..."
3,LMA,"[13201, 26402, 39603, 52804, 66005, 79206, 924...","[13201, 13201, 13201, 13201, 13201, 13201, 132..."
4,VGA,"[18113, 36226, 54339, 72452, 90565, 108678, 12...","[18113, 18113, 18113, 18113, 18113, 18113, 181..."
5,LLA,"[22411, 44822, 67233, 89644, 112055, 134466, 1...","[22411, 22411, 22411, 22411, 22411, 22411, 224..."


This seems like a least common multiple problem. Shouldn't be too hard - I just need to find the prime factorization of each of the numbers, and then I should be good! 

In [28]:
def prime_factorization(number):
    """
    This method will find the prime factorization of a particular number. 
    I took this from https://stackoverflow.com/a/22808285. 
    """
    cur_factor = 2
    factors = []
    while cur_factor**2 <= number:
        if (number % cur_factor) != 0:
            cur_factor += 1
        else:
            number //= cur_factor
            factors.append(cur_factor)
    if number > 1:
        factors.append(number)
    return factors

# Determine the prime factorization of each starting node's period
start_node_path_period_df["period_prime_factorization"] = start_node_path_period_df["period"].apply(
    lambda x: prime_factorization(x[0])
)

start_node_path_period_df

Unnamed: 0,start_node,n_steps_for_end_nodes,period,period_prime_factorization
0,DNA,"[20569, 41138, 61707, 82276, 102845, 123414, 1...","[20569, 20569, 20569, 20569, 20569, 20569, 205...","[67, 307]"
1,HNA,"[18727, 37454, 56181, 74908, 93635, 112362, 13...","[18727, 18727, 18727, 18727, 18727, 18727, 187...","[61, 307]"
2,AAA,"[14429, 28858, 43287, 57716, 72145, 86574, 101...","[14429, 14429, 14429, 14429, 14429, 14429, 144...","[47, 307]"
3,LMA,"[13201, 26402, 39603, 52804, 66005, 79206, 924...","[13201, 13201, 13201, 13201, 13201, 13201, 132...","[43, 307]"
4,VGA,"[18113, 36226, 54339, 72452, 90565, 108678, 12...","[18113, 18113, 18113, 18113, 18113, 18113, 181...","[59, 307]"
5,LLA,"[22411, 44822, 67233, 89644, 112055, 134466, 1...","[22411, 22411, 22411, 22411, 22411, 22411, 224...","[73, 307]"


In [31]:
def lcm_from_prime_factorizations(list_of_prime_factorizations):
    """
    This will calculate the LCM of a set of numbers, given by their prime factorization. 
    """
    
    # Determine each of the unique numbers across all of the sets
    unique_numbers = [item for sublist in list_of_prime_factorizations for item in sublist]
    
    # Determine the maximum counts associated with each of the numbers
    max_counts_per_unique_number = {num: 1 for num in unique_numbers}
    for cur_num in unique_numbers:
        for prime_factorization in list_of_prime_factorizations:
            cur_num_ct = prime_factorization.count(cur_num)
            if  cur_num_ct > max_counts_per_unique_number.get(cur_num, 1):
                max_counts_per_unique_number[cur_num] = cur_num_ct
    
    # Now, return the product of all of these repeated factors
    final_product = 1
    for cur_num, ct in max_counts_per_unique_number.items():
        final_product *= cur_num**ct
    return final_product

In [32]:
lcm_from_prime_factorizations(list(start_node_path_period_df["period_prime_factorization"]))

10921547990923