## Setup

In [3]:
# Get raw advent-of-code data
from aocd.models import Puzzle

puzzle = Puzzle(year=2025, day=5)
input_data = puzzle.input_data
example = puzzle.examples[0]

In [7]:
import sys
from pathlib import Path

sys.path.append(str(Path.cwd().parent))

from common.utils.perf_check import time_solution

## Part a

### Iterative approach
Let's see how far we get with  python builtins first.

In [4]:
# Functions
def parse_input_to_lists(input_data: str, *, return_ids: bool = True) -> tuple[list[tuple[int, ...]], list[int]]:
    """Parse input data into fresh ingredient ranges and available ingredient IDs."""
    ranges_str, ids_str = input_data.split("\n\n")
    ranges = [tuple(map(int, line.split("-"))) for line in ranges_str.splitlines()]
    ids = [*map(int, ids_str.splitlines())] if return_ids else []

    return ranges, ids


def find_available_fresh_ingredients_iterative(input_data: str) -> int:
    """Find number of fresh ingredients available."""
    fresh_ingredient_ranges, available_ingredient_ids = parse_input_to_lists(input_data)

    total_fresh_ingredients = 0
    for ing in available_ingredient_ids:
        for start, end in fresh_ingredient_ranges:
            if start <= ing <= end:
                total_fresh_ingredients += 1
                break
    return total_fresh_ingredients

In [None]:
# Correctness check
str(find_available_fresh_ingredients_iterative(example.input_data)) == example.answer_a

True

In [9]:
# Performance check
iterative_time_a = time_solution(find_available_fresh_ingredients_iterative, input_data)
print(f"The iterative implementation takes {iterative_time_a:.1f} ms per run.")

The iterative implementation takes 3.6 ms per run.


### Vectorized approach
Let's see if we can speed things up with NumPy.

In [None]:
# Imports
import numpy as np

In [None]:
# Functions
def parse_input_to_np_arrays(
    input_data: str, *, return_ids: bool = True
) -> tuple[np.ndarray[tuple[int, int], np.dtype[np.int64]], np.ndarray[tuple[int], np.dtype[np.int64]]]:
    """Parse input data into fresh ingredient ranges and available ingredient IDs."""
    ranges_str, ids_str = input_data.split("\n\n")
    ranges = np.loadtxt(ranges_str.splitlines(), delimiter="-", dtype=np.int64)
    ids = np.fromstring(ids_str, dtype=np.int64, sep="\n") if return_ids else np.array([], dtype=np.int64)

    return ranges, ids


def find_available_fresh_ingredients_vectorized(input_data: str) -> int:
    """Find number of fresh ingredients available using numpy."""
    # Parse input data into numpy arrays
    fresh_ingredient_ranges, available_ingredient_ids = parse_input_to_np_arrays(input_data)

    return int(
        (
            (  # Check if the available ingredient ids fall within the fresh ingredient range
                (fresh_ingredient_ranges[:, 0] <= available_ingredient_ids[:, None])
                & (available_ingredient_ids[:, None] <= fresh_ingredient_ranges[:, 1])
            ).any(axis=1)  # For any fresh ingredient range
        ).sum()  # Sum up the total number of fresh ingredients found
    )

In [None]:
# Correctness check
str(find_available_fresh_ingredients_vectorized(example.input_data)) == example.answer_a

True

In [23]:
# Performance check
vectorized_time_a = time_solution(find_available_fresh_ingredients_vectorized, input_data)
print(
    f"The vectorized implementation takes {vectorized_time_a:.2f} ms per run."
    f"\nThis is {iterative_time_a / vectorized_time_a:.1f}x faster than the iterative version."
)

The vectorized implementation takes 0.35 ms per run.
This is 10.2x faster than the iterative version.


In [None]:
# Submit answer
puzzle.answer_a = find_available_fresh_ingredients_vectorized(input_data)

## Part b

### Iterative approach
Let's start with a pure-python, iterative approach again.

In [47]:
# Functions
def find_all_fresh_ingredients_iterative(input_data: str) -> int:
    """Find all fresh ingredients available iteratively."""
    # Parse input data
    fresh_ingredient_ranges, _ = parse_input_to_lists(input_data, return_ids=False)

    # Sort ranges and initialize merged ranges list
    ranges = sorted(fresh_ingredient_ranges)
    merged_ranges = [list(ranges[0])]

    # Iterate through sorted ranges and merge overlapping ones
    for start, end in ranges[1:]:
        if start <= merged_ranges[-1][1]:
            # If the start of the current range is less than or equal to the end of the last adjusted range,
            # extend the end of the last adjusted range if needed
            merged_ranges[-1][1] = max(merged_ranges[-1][1], end)
        else:
            # Otherwise, add the current range as a new merged range
            merged_ranges.append([start, end])

    return sum(end - start + 1 for start, end in merged_ranges)

In [None]:
# Correctness check
str(find_all_fresh_ingredients_iterative(example.input_data)) == example.answer_b

True

In [51]:
# Performance check
iterative_time_b = time_solution(find_all_fresh_ingredients_iterative, input_data)
print(f"The iterative implementation takes {iterative_time_b:.2f} ms per run.")

The iterative implementation takes 0.13 ms per run.


In [126]:
# Submit answer
puzzle.answer_b = find_all_fresh_ingredients_iterative(input_data)

[32mThat's the right answer!  You are one gold star closer to decorating the North Pole.You have completed Day 5! You can [Shareon
  Bluesky
Twitter
Mastodon] this victory or [Return to Your Advent Calendar].[0m


### Vectorized approach
See how far we get with NumPy again. I basically just tried to vectorize the logic from the iterative approach.

In [None]:
# Functions
def find_all_fresh_ingredients_vectorized(input_data: str) -> int:
    """Find all fresh ingredients by merging overlapping ranges with NumPy."""
    # Sort ranges and unpack into starts and ends
    ranges, _ = parse_input_to_np_arrays(input_data, return_ids=False)
    ranges.sort(axis=0)
    starts, ends = ranges[:, 0], ranges[:, 1]

    # Find where new merged ranges begin (when current start > previous end)
    merge_boundaries = np.concatenate(([0], np.where(starts[1:] > ends[:-1])[0] + 1))

    # Get the maximum end value for each merged range group
    merged_ends = np.maximum.reduceat(ends, merge_boundaries)

    # Calculate total: sum of (max_end - start + 1) for each merged range
    return int(np.sum(merged_ends - starts[merge_boundaries] + 1))

In [None]:
# Correctness check
str(find_all_fresh_ingredients_vectorized(example.input_data)) == example.answer_b

True

In [54]:
# Performance check
vectorized_time_b = time_solution(find_all_fresh_ingredients_vectorized, input_data)

print(f"The vectorized implementation takes {vectorized_time_b:.3f} ms per run.")
print(f"This is {iterative_time_b / vectorized_time_b:.1f}x faster than the iterative implementation.")

The vectorized implementation takes 0.078 ms per run.
This is 1.6x faster than the iterative implementation.


In [55]:
# Submit answer
puzzle.answer_b = find_all_fresh_ingredients_vectorized(input_data)