# Day 16
## Part 1

In [17]:
import re

def parse_data(s):
    tunnels = {}
    flows = {}
    for line in s.strip().splitlines():
        m = re.match(
            r"Valve (\w\w) has flow rate=(\d+); tunnels? leads? to valves? (.*)", 
            line
        )
        valve = m.group(1)
        flows[valve] = int(m.group(2))
        tunnels[valve] = m.group(3).split(", ")
    return (tunnels, flows)

test_string = """
Valve AA has flow rate=0; tunnels lead to valves DD, II, BB
Valve BB has flow rate=13; tunnels lead to valves CC, AA
Valve CC has flow rate=2; tunnels lead to valves DD, BB
Valve DD has flow rate=20; tunnels lead to valves CC, AA, EE
Valve EE has flow rate=3; tunnels lead to valves FF, DD
Valve FF has flow rate=0; tunnels lead to valves EE, GG
Valve GG has flow rate=0; tunnels lead to valves FF, HH
Valve HH has flow rate=22; tunnel leads to valve GG
Valve II has flow rate=0; tunnels lead to valves AA, JJ
Valve JJ has flow rate=21; tunnel leads to valve II
"""

test_data = parse_data(test_string)
test_data

({'AA': ['DD', 'II', 'BB'],
  'BB': ['CC', 'AA'],
  'CC': ['DD', 'BB'],
  'DD': ['CC', 'AA', 'EE'],
  'EE': ['FF', 'DD'],
  'FF': ['EE', 'GG'],
  'GG': ['FF', 'HH'],
  'HH': ['GG'],
  'II': ['AA', 'JJ'],
  'JJ': ['II']},
 {'AA': 0,
  'BB': 13,
  'CC': 2,
  'DD': 20,
  'EE': 3,
  'FF': 0,
  'GG': 0,
  'HH': 22,
  'II': 0,
  'JJ': 21})

Parsing that was surprisingly annoying.

Hmm, the valves with zero flow can be removed from the tunnels, apart from the starting position AA, and replaced with weighted paths representing the minutes taken to move from valve to valve. These weights should be the shortest distance between each valve. Remove paths back to AA as they're not needed.

Thinking about it, while we're at it add 1 to each weight as we're only visiting these valves to turn them on, which will take an extra minute.

In [44]:
from collections import deque, defaultdict

def trim_tunnels(tunnels, flows):
    working_valves = {v for v in flows if flows[v] > 0}
    working_valves.add('AA')
    trimmed = defaultdict(dict)
    
    for v in working_valves:
        q = deque([(u, 1) for u in tunnels[v]])
        visited = {v}
        
        while q and len(trimmed[v]) < len(working_valves):
            valve, minutes = q.popleft()
            if valve not in visited:
                visited.add(valve)
                if valve in working_valves and valve not in trimmed[v]:
                    trimmed[v][valve] = minutes
                for u in tunnels[valve]:
                    if u not in visited:
                        q.append((u, minutes + 1))
                    
    for v in trimmed:
        if 'AA' in trimmed[v]:
            del trimmed[v]['AA']
        for u in trimmed[v]:
            trimmed[v][u] += 1
            
    return trimmed


trim_tunnels(*test_data)

defaultdict(dict,
            {'AA': {'DD': 2, 'BB': 2, 'CC': 3, 'EE': 3, 'JJ': 3, 'HH': 6},
             'HH': {'EE': 4, 'DD': 5, 'CC': 6, 'BB': 7, 'JJ': 8},
             'JJ': {'DD': 4, 'BB': 4, 'CC': 5, 'EE': 5, 'HH': 8},
             'EE': {'DD': 2, 'CC': 3, 'HH': 4, 'BB': 4, 'JJ': 5},
             'DD': {'CC': 2, 'EE': 2, 'BB': 3, 'JJ': 4, 'HH': 5},
             'CC': {'DD': 2, 'BB': 2, 'EE': 3, 'JJ': 5, 'HH': 6},
             'BB': {'CC': 2, 'DD': 3, 'EE': 4, 'JJ': 4, 'HH': 7}})

So now the gained flow from visiting a node is the time left to 30 minus the time taken to get to the node and switch on the valve multiplied by the valve's flow rate. Which doesn't get away from this being a Travelling Salesman's Problem with a twist which needs an optimal solution.

Let's try a dynamic programming approach. Create a recursive cached function that calculates the optimal flow for a location, remaining unvisited valves, and time left and hope that makes it more tractable.

In [51]:
from functools import cache
def create_optimal_flow(tunnels, flows):
    @cache
    def optimal_flow(position, unvisited, time_left):
        added_flow = []
        for valve in tunnels[position]:
            if valve in unvisited and tunnels[position][valve] < time_left:
                new_time_left = time_left - tunnels[position][valve]
                added_flow.append(
                    new_time_left * flows[valve] + optimal_flow(
                        valve, unvisited - {valve}, new_time_left
                    )
                )
        if added_flow:
            return max(added_flow)
        else: 
            return 0
    
    return optimal_flow

                    
def part_1(tunnels, flows):
    trimmed_tunnels = trim_tunnels(tunnels, flows)
    let_it_flow = create_optimal_flow(trimmed_tunnels, flows)
    return let_it_flow('AA', frozenset({t for t in tunnels if t != 'AA'}), 30)


part_1(*test_data)

1651

Bloody hell! Worked first time.

In [52]:
data = parse_data(open('input').read())
part_1(*data)

2265

In [53]:
%%timeit
part_1(*data)

160 ms ± 3.42 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Part 2
The approach I took in part 1 is particularly unsuitable for part 2. Hmm.

Actually no it isn't. Partition the valves between me and the elephant for all possible partitions and solve as above. It will take a while as the number of partitions is 

In [62]:
2**(len([x for x in data[1] if data[1][x] != 0]) - 1)

16384

but that will no doubt be quicker than coding a better solution.

In [89]:
from itertools import product

def part_2(tunnels, flows):
    trimmed_tunnels = trim_tunnels(tunnels, flows)
    let_it_flow = create_optimal_flow(trimmed_tunnels, flows)
    working_valves = [v for v in flows if flows[v] > 0]
    working_valves_set = frozenset(working_valves)
    max_flow = 0
    partitions = product([True, False], repeat=len(working_valves) - 1)
    for p in partitions:
        me = frozenset({working_valves[0]} | {v for v, is_me in zip(working_valves[1:], p) if is_me})
        elephant = working_valves_set - me
        opt_me = let_it_flow('AA', me, 26)
        opt_elephant = let_it_flow('AA', elephant, 26)
        flow = opt_me + opt_elephant
        max_flow = max(max_flow, flow)
    return max_flow
        
part_2(*test_data)

1707

In [90]:
part_2(*data)

2811

That one wasn't too bad if you spend a day and a half thinking about it. 

In [91]:
%%timeit
part_2(*data)

18.5 s ± 240 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
