## Setup

In [None]:
# Get raw advent-of-code data
from aocd.models import Puzzle

puzzle = Puzzle(year=2025, day=4)
input_data = puzzle.input_data
example = puzzle.examples[0]

In [None]:
import sys
from pathlib import Path

sys.path.append(str(Path.cwd().parent))

from common.utils.perf_check import time_solution

## Part a

### Iterative approach
As always, let's start with a simple iterative approach. I'll use some basic grid utilities for parsing the input data and generation 2D directions.

In [None]:
# Imports
from common.utils.dict_grid import OCTAGONAL_OFFSETS_COMPLEX, text_to_grid_dict

In [16]:
# Constants
PAPER_ROLL = "@"

In [None]:
# Functions
def find_reachable_paper_rolls_iterative(grid: dict[complex, str]) -> list[complex]:
    """Find paper rolls that have less than 4 neighbors by iterating over a dictionary."""
    return [
        position
        for position, value in grid.items()  # For each position in the grid
        if value == PAPER_ROLL  # Check if the current position is a paper roll
        # Check if the number of neighboring paper rolls is less than 4
        and sum(grid.get(position + d, None) == PAPER_ROLL for d in OCTAGONAL_OFFSETS_COMPLEX) < 4
    ]


def calc_solution_a_iterative(input_data: str) -> int:
    """Find number of paper rolls that have less than 4 neighbors by iterating over a dictionary."""
    return len(find_reachable_paper_rolls_iterative(text_to_grid_dict(input_data)))

There seems to be something wrong with fetching the example answers: 
```python
print(examples[0].answer_a)
> x.x.@@@.x.
```
So I just compared the output of the function to the expected answer manually.

In [None]:
# Correctness check
calc_solution_a_iterative(example.input_data) == 13

True

In [None]:
# Performance check
iterative_time_a = time_solution(calc_solution_a_iterative, input_data)
print(f"The iterative implementation takes {iterative_time_a:.1f} ms per run.")

The iterative implementation takes 12.5 ms per run.


### Vectorized approach
Let's see if we can speed things up with NumPy and Convolve2D. We'll convert the grid to a 2D array and use convolution to count neighbors efficiently.

In [None]:
# Imports
import numpy as np
from common.utils.numpy_grid import OCTAGONAL_KERNEL, text_to_array_grid
from scipy.signal import convolve2d

In [None]:
# Functions
def find_reachable_paper_rolls_vectorized(
    paper_roll_arr: np.ndarray[tuple[int, int], np.dtype[np.int8]],
) -> np.ndarray[tuple[int, int], np.dtype[np.bool]]:
    """Find paper rolls that have less than 4 neighbors by using 2D convolution."""
    # Calculate number of neighboring paper rolls using 2D convolution
    neighbor_counts = convolve2d(paper_roll_arr, OCTAGONAL_KERNEL, mode="same", boundary="fill")

    # Return boolean array (True where PAPER_ROLL with less than 4 neighbors, False otherwise)
    return (paper_roll_arr == 1) & (neighbor_counts < 4)


def calc_solution_a_vectorized(input_data: str) -> int:
    """Find number of paper rolls that have less than 4 neighbors by using 2D convolution."""
    # Create binary array (1 where PAPER_ROLL, 0 otherwise)
    paper_rolls = (text_to_array_grid(input_data) == PAPER_ROLL).astype(np.int8)

    # Find the total amount of reachable paper rolls
    return int(np.sum(find_reachable_paper_rolls_vectorized(paper_rolls)))

In [None]:
# Correctness check
calc_solution_a_vectorized(example.input_data) == 13

True

In [None]:
# Performance check
vectorized_time_a = time_solution(calc_solution_a_vectorized, input_data)
print(f"The vectorized implementation takes {vectorized_time_a:.2f} ms per run.")
print(f"This is {iterative_time_a / vectorized_time_a:.1f}x faster than the iterative version.")

The vectorized implementation takes 1.07 ms per run.
This is 11.8x faster than the iterative version.


In [None]:
# Submit answer
puzzle.answer_a = calc_solution_a_vectorized(input_data)

[32mThat's the right answer!  You are one gold star closer to decorating the North Pole. [Continue to Part Two][0m


## Part b

### Iterative approach
Let's start with the iterative approach again. We'll simply loop the valid paper roll count and remove them until no more can be removed.

In [None]:
# Functions
def calc_solution_b_iterative(input_data: str) -> int:
    """Find number of paper rolls that can be removed from the grid by iterating over a dictionary."""
    grid = text_to_grid_dict(input_data)

    total_removed_rolls = 0

    while True:
        reachable_rolls_positions = find_reachable_paper_rolls_iterative(grid)

        # Break if no more rolls can be removed
        if (newly_removed := len(reachable_rolls_positions)) == 0:
            break

        for position in reachable_rolls_positions:
            # Remove the paper roll
            grid[position] = "."

        total_removed_rolls += newly_removed

    return total_removed_rolls

In [None]:
# Correctness check
str(calc_solution_b_iterative(example.input_data)) == example.answer_b

True

In [None]:
# Performance check
iterative_time_b = time_solution(calc_solution_b_iterative, input_data)
print(f"The iterative implementation takes {iterative_time_b:.0f} ms per run.")

The iterative implementation takes 304 ms per run.


### Vectorized approach
Initially, I reused the looping logic from the iterative approach and just replaced the neighbor-counting function with the convolution-based one. However, this means that the convolution is performed for each loop iteration, which is not very efficient.

In [None]:
# Functions
def calc_solution_b_vectorized(input_data: str) -> int:
    """Find number of paper rolls that can be removed from the grid by iterating over a dictionary."""
    # Create binary array (1 where PAPER_ROLL, 0 otherwise)
    paper_rolls = (text_to_array_grid(input_data) == PAPER_ROLL).astype(np.int8)

    # Initialize counters
    total_removed_rolls = 0

    while True:
        reachable_rolls_mask = find_reachable_paper_rolls_vectorized(paper_rolls)

        # Break if no more rolls can be removed
        if (removable_roll_count := int(np.sum(reachable_rolls_mask))) == 0:
            break

        # Remove reachable rolls
        paper_rolls[reachable_rolls_mask] = 0

        total_removed_rolls += removable_roll_count

    return total_removed_rolls

In [None]:
# Correctness check
str(calc_solution_b_vectorized(example.input_data)) == example.answer_b

True

In [None]:
# Performance check
vectorized_time_b = time_solution(calc_solution_b_vectorized, input_data)
print(f"The vectorized implementation takes {vectorized_time_b:.2f} ms per run.")
print(f"This is {iterative_time_b / vectorized_time_b:.0f}x faster than the iterative implementation.")

The vectorized implementation takes 36.44 ms per run.
This is 8x faster than the iterative implementation.


### Optimized vectorized approach

In [30]:
# Imports
from common.utils.numpy_grid import OCTAGONAL_OFFSETS_TUPLE, shift2d

In [None]:
def calc_solution_b_vectorized_convolve_once(input_data: str) -> int:
    """Find number of paper rolls that can be removed from the grid, only convolving once at the start."""
    # Create binary array (1 where paper_rolls_ROLL, 0 otherwise)
    paper_rolls = (text_to_array_grid(input_data) == PAPER_ROLL).astype(np.int8)

    # Initial neighbor counts
    neighbor_counts = convolve2d(paper_rolls, OCTAGONAL_KERNEL, mode="same", boundary="fill").astype(np.int16)

    # Initialize counters
    total_removed_rolls = 0

    while True:
        removable_mask = (paper_rolls == 1) & (neighbor_counts < 4)

        # Break if no more rolls can be removed
        if (removable_roll_count := int(removable_mask.sum())) == 0:
            break

        # Remove the paper rolls
        paper_rolls[removable_mask] = 0

        total_removed_rolls += removable_roll_count

        # Cast boolean mask to int for updating neighbor counts
        removable_int = removable_mask.astype(np.int8)

        for dx, dy in OCTAGONAL_OFFSETS_TUPLE:
            # Decrement neighbor counts for the neighbors of removed rolls
            neighbor_counts -= shift2d(removable_int, dx=dx, dy=dy)

    return total_removed_rolls

In [None]:
# Correctness check
str(calc_solution_b_vectorized_convolve_once(example.input_data)) == example.answer_b

True

In [None]:
# Performance check
vectorized_optimized_time_b = time_solution(calc_solution_b_vectorized_convolve_once, input_data)
print(f"The optimized vectorized implementation takes {vectorized_optimized_time_b:.2f} ms per run.")
print(f"This is {iterative_time_b / vectorized_optimized_time_b:.1f}x faster than the iterative implementation.")
print(
    f"This is {vectorized_time_b / vectorized_optimized_time_b:.1f}x faster than the "
    "previous vectorized implementation that uses convolution to count neighbors at every step in the loop."
)

The optimized vectorized implementation takes 8.34 ms per run.
This is 36.4x faster than the iterative implementation.
This is 4.4x faster than the previous vectorized implementation that uses convolution to count neighbors at every step in the loop.


In [None]:
# Submit answer
puzzle.answer_b = calc_solution_b_vectorized_convolve_once(input_data)

[32mThat's the right answer!  You are one gold star closer to decorating the North Pole.You have completed Day 4! You can [Shareon
  Bluesky
Twitter
Mastodon] this victory or [Return to Your Advent Calendar].[0m
