# --- Day 12: Hill Climbing Algorithm ---

[Puzzle Description](https://adventofcode.com/2022/day/12)

In [14]:
import numpy as np
import string

In [15]:
rows = []
with open("day_12_input.txt") as file:
    while line := file.readline().strip():
        row = [c for c in line]
        rows.append(row)

grid = np.array(rows)
end_state = np.argwhere(grid == "E").squeeze()
start_state = np.argwhere(grid == "S").squeeze()
q_values = np.zeros((*grid.shape, 4)) * 10
height = {string.ascii_lowercase[i]: i for i in range(len(string.ascii_lowercase))}
height["S"] = height["a"]
height["E"] = height["z"]

In [16]:
for n in range(grid.shape[0] * grid.shape[1]):
    steps = 0
    for i in range(grid.shape[0]):
        for j in range(grid.shape[1]):
            state = np.array([i, j])
            if np.all(state == end_state):
                continue
            for action in range(4):
                reward = -1
                if action == 0:
                    new_state = state - [1, 0]
                    if (
                        new_state[0] < 0
                        or height[grid[tuple(new_state)]] - height[grid[tuple(state)]]
                        > 1
                    ):
                        reward -= 10 * grid.shape[0] * grid.shape[1]
                        new_state = state
                elif action == 1:
                    new_state = state + [0, 1]
                    if (
                        new_state[1] == grid.shape[1]
                        or height[grid[tuple(new_state)]] - height[grid[tuple(state)]]
                        > 1
                    ):
                        reward -= 10 * grid.shape[0] * grid.shape[1]
                        new_state = state
                elif action == 2:
                    new_state = state + [1, 0]
                    if (
                        new_state[0] == grid.shape[0]
                        or height[grid[tuple(new_state)]] - height[grid[tuple(state)]]
                        > 1
                    ):
                        reward -= 10 * grid.shape[0] * grid.shape[1]
                        new_state = state
                elif action == 3:
                    new_state = state - [0, 1]
                    if (
                        new_state[1] < 0
                        or height[grid[tuple(new_state)]] - height[grid[tuple(state)]]
                        > 1
                    ):
                        reward -= 10 * grid.shape[0] * grid.shape[1]
                        new_state = state
                if np.all(new_state == end_state):
                    reward += 5 * grid.shape[0] * grid.shape[1]
                q_values[tuple(list(state) + [action])] += alpha * (
                    reward
                    + q_values[tuple(new_state)].max()
                    - q_values[tuple(list(state) + [action])]
                )
                steps += 1
    if np.any(q_values[tuple(start_state)] > 0):
        break

## Part One

In [17]:
state = start_state
steps = 0

while not np.all(state == end_state):
    action = q_values[tuple(state)].argmax()
    if action == 0:
        state = state - [1, 0]
    elif action == 1:
        state = state + [0, 1]
    elif action == 2:
        state = state + [1, 0]
    elif action == 3:
        state = state - [0, 1]
    steps += 1

print(f"Part One: {steps}")

Part One: 456


## Part Two

In [18]:
avail_values = q_values.max(axis=2) * (np.vectorize(height.get)(grid) == 0)
i = avail_values.max(axis=1).argmax()
j = avail_values.max(axis=0).argmax()
state = np.array([i, j])
steps = 0

while not np.all(state == end_state):
    action = q_values[tuple(state)].argmax()
    if action == 0:
        state = state - [1, 0]
    elif action == 1:
        state = state + [0, 1]
    elif action == 2:
        state = state + [1, 0]
    elif action == 3:
        state = state - [0, 1]
    steps += 1

print(f"Part Two: {steps}")

Part Two: 454
