## Setup

In [46]:
# Get raw advent-of-code data
from aocd.models import Puzzle

puzzle = Puzzle(year=2025, day=9)
input_data = puzzle.input_data
example = puzzle.examples[0]

In [47]:
import sys
from pathlib import Path

sys.path.append(str(Path.cwd().parent))

from common.utils.perf_check import check_time

## Part a
Looks like pair-wise distance calculations are needed again. I'll start with the scipy pdist implementation this time.

In [48]:
# Imports
import numpy as np
from common.utils.numpy_grid import find_closest_pair_indices

In [None]:
# Functions
def parse_input(input_data: str) -> np.ndarray:
    """Parse input data into a numpy array of box coordinates."""
    return np.loadtxt(input_data.splitlines(), delimiter=",", dtype=np.int32)


def calculate_furthest_box_area(input_data: str) -> int:
    """Calculate the area of the rectangle defined by the furthest box pair."""
    # Load input data
    tiles = parse_input(input_data)

    # Get the furthest box pairs using a common scipy-based function
    furthest_box_pairs = find_closest_pair_indices(tiles, invert_order=True)

    # Get the inclusive rectangle dimensions defined by the furthest tile pair
    a, b = furthest_box_pairs[0]
    diff = np.abs(tiles[a] - tiles[b]) + np.array([1, 1])

    # Calculate the area of the rectangle defined by the furthest tile pair
    return int(np.prod(diff.astype(np.int32)))

Again something went wrong with fetching the example answer. I've pasted the correct answer below.

In [66]:
# Correctness check
calculate_furthest_box_area(example.input_data) == 50

True

In [67]:
# Performance check
scipy_time_a = check_time(calculate_furthest_box_area, input_data)
print(f"The scipy implementation takes {scipy_time_a:.1f} ms per run.")


The scipy implementation takes 2.8 ms per run.


In [68]:
# Submit answer
puzzle.answer_a = calculate_furthest_box_area(input_data)

[32mThat's the right answer!  You are one gold star closer to decorating the North Pole. [Continue to Part Two][0m


## Part b
I think this requires more of a grid-based approach rather than pair-wise distances.

In [None]:
red_tiles = parse_input(input_data)
grid = dict.fromkeys([tuple(tile) for tile in red_tiles[np.lexsort((red_tiles[:, 1], red_tiles[:, 0]))]], 1)
grid


Memory usage of grid: 18.1 KB


In [None]:
for tile in list(grid.keys())[0]:
    print(tile)

1828
47303


In [23]:
class UnionFind:
    """Union-find algorithm with path compression and union by size."""

    def __init__(self, n: int):
        self.roots = list(range(n))
        self.sizes = [1] * n
        self.components = n

    def find(self, x: int) -> int:
        p = self.roots
        while p[x] != x:
            # Traverse up the tree
            p[x] = p[p[x]]
            # Compress path
            x = p[x]
        return x

    def union(self, a: int, b: int) -> bool:
        root_a, root_b = self.find(a), self.find(b)

        # Already connected
        if root_a == root_b:
            return False

        # Ensure root_a is the larger tree
        if self.sizes[root_a] < self.sizes[root_b]:
            root_a, root_b = root_b, root_a

        # Merge smaller tree into larger tree
        self.roots[root_b] = root_a
        self.sizes[root_a] += self.sizes[root_b]

        # Decrease component count
        self.components -= 1
        return True

In [None]:
def find_final_box_pair_union_find(box_pairs: list[tuple[int, int]], boxes_count: int) -> tuple[int, int]:
    """Find final connection that connects all boxes into a single circuit using UnionFind."""
    uf = UnionFind(boxes_count)
    for a, b in box_pairs:
        # If there is only one component (connected circuit) left, return the current pair
        if uf.union(a, b) and uf.components == 1:
            return a, b
    msg = "There were not enough connections to connect all boxes into a single circuit."
    raise ValueError(msg)


def find_last_connection_union_find(input_data: str) -> int:
    """Form circuits by connecting the closest boxes. Return the product of the sizes of the three largest circuits."""
    # Load input data
    boxes = parse_input(input_data)

    # Construct upper triangle pairs and calculate their distances
    dists = calc_pairwise_distances_scipy(boxes)

    # Get all box pairs, sorted by distance
    i_idx, j_idx = np.triu_indices(len(boxes), k=1)
    sorted_box_pairs = find_closest_box_pairs(dists, i_idx, j_idx, cut_off_higher_partition=False)

    # Find the final box pair that connects all boxes into a single circuit
    final_box_pair = find_final_box_pair_union_find(sorted_box_pairs, len(boxes))

    # Multiply the X-coordinates of the final connected boxes
    return int(boxes[final_box_pair[0]][0].astype(np.int64) * boxes[final_box_pair[1]][0].astype(np.int64))

In [25]:
# Correctness check
str(find_last_connection_union_find(example.input_data)) == example.answer_b

True

In [None]:
# Performance check
time_union_find_b = check_time(find_last_connection_union_find, input_data, number=5)
print(f"The union-find approach takes {time_union_find_b:.2f} ms per run.")

The union-find approach takes 50.33 ms per run.


### Kruskal's algorithm with k-NN graph
The union find approach still takes 50 ms per run, which is decent but I think we can do better. Going full in on k-NN graphs should help here.

In [None]:
import heapq
from math import log2

from scipy.spatial import cKDTree


In [68]:
def find_last_connection_knn(input_data: str) -> int:
    """Use a k-NN graph to find neighbors.

    We reuse the union-find structure to keep track of connected components.
    """
    # Load input data
    boxes = parse_input(input_data).astype(np.int64)
    N = len(boxes)

    # Build a k-D tree and union-find structure for efficient nearest neighbour search
    tree = cKDTree(boxes)
    uf = UnionFind(N)

    # Initialize heap and seen set
    heap = []
    seen = set()  # pid = a*N + b for unordered pair (a<b)

    # Set the initial k at roughly sqrt(N)
    k = int(2 ** (-(-log2(N) // 2)))
    while uf.components > 1:
        if not heap:
            # Ensure k does not exceed N-1
            k = min(k, N - 1)

            # Fetch k+1 nearest neighbors (including self). Shape (N, k+1)
            dists, idxs = tree.query(boxes, k=k + 1)

            # Construct edges excluding self
            rows = np.repeat(np.arange(N), k)
            cols = idxs[:, 1:].ravel()

            # Distances
            ds = dists[:, 1:].ravel()

            if rows.size:
                # Add edges to the heap
                mn = np.minimum(rows, cols).astype(np.int16)
                mx = np.maximum(rows, cols).astype(np.int16)
                for a, b, d in zip(mn, mx, ds, strict=False):
                    # Unique pair id
                    pid = int(a) * N + int(b)
                    if pid in seen:
                        continue
                    seen.add(pid)

                    # Push to heap
                    heapq.heappush(heap, (float(d), int(a), int(b)))

            if k >= N - 1 and not heap:
                msg = "No edges available to connect points"
                raise ValueError(msg)

            if not heap:
                # Increase k and retry
                k = min(N - 1, k * 2)
                continue

        # Process the smallest edge
        d, a, b = heapq.heappop(heap)

        # If there is only one component (connected circuit) left, return the current pair
        if uf.union(a, b) and uf.components == 1:
            return int(boxes[a, 0] * boxes[b, 0])

    msg = "Could not connect all points."
    raise ValueError(msg)


In [53]:
# Correctness check
str(find_last_connection_knn(example.input_data)) == example.answer_b

True

In [69]:
# Performance check
time_knn_b = check_time(find_last_connection_knn, input_data, number=10)
print(f"The k-NN approach takes {time_knn_b:.2f} ms per run.")
print(f"This is {time_union_find_b / time_knn_b:.1f}x faster than the union-find implementation.")

The k-NN approach takes 15.32 ms per run.
This is 3.3x faster than the union-find implementation.


In [None]:
# Submit answer
puzzle.answer_b = find_last_connection_knn(input_data)

coerced int64 value np.int64(8465902405) to '8465902405'


[32mThat's the right answer!  You are one gold star closer to decorating the North Pole.You have completed Day 8! You can [Shareon
  Bluesky
Twitter
Mastodon] this victory or [Return to Your Advent Calendar].[0m
