In [17]:
import sys
import re
import numpy as np
import pandas as pd
from collections import Counter
from pyvis.network import Network
from anytree import Node, RenderTree, AsciiStyle, PreOrderIter
from collections import deque

pd.set_option('display.max_columns', None)


In [70]:

# Sample Input:
# Valve AA has flow rate=0; tunnels lead to valves DD, II, BB
# Valve BB has flow rate=13; tunnels lead to valves CC, AA
# Valve CC has flow rate=2; tunnels lead to valves DD, BB
# Valve DD has flow rate=20; tunnels lead to valves CC, AA, EE
# Valve EE has flow rate=3; tunnels lead to valves FF, DD
# Valve FF has flow rate=0; tunnels lead to valves EE, GG
# Valve GG has flow rate=0; tunnels lead to valves FF, HH
# Valve HH has flow rate=22; tunnel leads to valve GG
# Valve II has flow rate=0; tunnels lead to valves AA, JJ
# Valve JJ has flow rate=21; tunnel leads to valve II


# Rules:
# 1. It takes 1 minute to open a valve.
# 2. All valves start closed.
# 3. It takes 1 minute to move to a new valve.
# 4. You start at valve AA
# 5. When a valve is open, it begins releasing pressure at its given flow rate once per minute
# 6. There is 30 minutes to release as much pressure as possible.
# 7. A valve does not have to be opened to move to a different tunnel
# 8. A valve stays open once it is opened.

def load_input(filename):
    """
    Loads the input file and returns a dictionary of valves.
    """
    valves = {}
    with open(filename) as f:
        for line in f:
            # Parse with regex:
            # Valve AA has flow rate=0; tunnels lead to valves DD, II, BB
            m = re.match(r'Valve (\w+) has flow rate=(\d+); tunnels? leads? to valves? (.*)', line)
            if m:
                valve_name = m.group(1)
                flow_rate = int(m.group(2))
                tunnels = m.group(3).split(', ')
                valves[valve_name] = {'flow_rate': flow_rate, 'tunnels': tunnels}
            else:
                print('Error parsing line: {}'.format(line))
    return valves

# input = load_input('day16-sampleinput.txt')
input = load_input('day16-input.txt')
display(input)

input_df = pd.DataFrame(input).T
display(input_df)

{'QP': {'flow_rate': 0, 'tunnels': ['IS', 'DG']},
 'MC': {'flow_rate': 0, 'tunnels': ['XX', 'QQ']},
 'OT': {'flow_rate': 7, 'tunnels': ['OE', 'BL', 'DJ', 'JS', 'LS']},
 'CZ': {'flow_rate': 0, 'tunnels': ['IC', 'ZL']},
 'GI': {'flow_rate': 0, 'tunnels': ['OM', 'GF']},
 'YB': {'flow_rate': 0, 'tunnels': ['DQ', 'MX']},
 'EJ': {'flow_rate': 0, 'tunnels': ['GB', 'ES']},
 'IS': {'flow_rate': 19, 'tunnels': ['AS', 'OB', 'QP']},
 'WI': {'flow_rate': 21, 'tunnels': ['SS', 'AK']},
 'JS': {'flow_rate': 0, 'tunnels': ['OT', 'HV']},
 'UR': {'flow_rate': 0, 'tunnels': ['OM', 'ZI']},
 'UC': {'flow_rate': 0, 'tunnels': ['QX', 'NG']},
 'BL': {'flow_rate': 0, 'tunnels': ['YW', 'OT']},
 'AK': {'flow_rate': 0, 'tunnels': ['WI', 'AL']},
 'QQ': {'flow_rate': 16, 'tunnels': ['MC', 'WH', 'MS', 'IY']},
 'PW': {'flow_rate': 0, 'tunnels': ['ZL', 'EK']},
 'AS': {'flow_rate': 0, 'tunnels': ['IS', 'MS']},
 'ZL': {'flow_rate': 9, 'tunnels': ['CD', 'QX', 'PW', 'CZ', 'PQ']},
 'OB': {'flow_rate': 0, 'tunnels': ['HS', '

Unnamed: 0,flow_rate,tunnels
QP,0,"[IS, DG]"
MC,0,"[XX, QQ]"
OT,7,"[OE, BL, DJ, JS, LS]"
CZ,0,"[IC, ZL]"
GI,0,"[OM, GF]"
YB,0,"[DQ, MX]"
EJ,0,"[GB, ES]"
IS,19,"[AS, OB, QP]"
WI,21,"[SS, AK]"
JS,0,"[OT, HV]"


In [71]:
def find_shortest_path(valves):
    """
    Returns the list of tunnels in the shortest paths between all valves.
    """
    valve_paths = {}
    for valve in valves.keys():
        queue = [(valve, [])]
        visited = set()
        shortest_paths = {}
        while queue:
            node, path = queue.pop(0)
            if node not in visited:
                visited.add(node)
                if len(path) > 0:# and len(path) < len(shortest_paths.get(node, [])):
                    shortest_paths[node] = path
                for tunnel in valves[node]['tunnels']:
                    queue.append((tunnel, path + [tunnel]))
        valve_paths[valve] = shortest_paths
    return valve_paths

valve_paths = find_shortest_path(input)

In [None]:
net_output = 'day16-graph.html'
net = Network(notebook=True, cdn_resources='remote', height='100%', width='100%')
# add the valves as labeled nodes
for valve in input.keys():
    if valve == 'AA':
        net.add_node(valve, label=valve, shape='circle', color='red', borderWidth=20)
    elif input[valve]['flow_rate'] > 0:
        net.add_node(valve, label=valve, shape='circle', color='lightblue')
    else:
        net.add_node(valve, label=valve, shape='text', borderWidth=input[valve]['flow_rate'])
# add the tunnels as edges
for valve in input.keys():
    for tunnel in input[valve]['tunnels']:
        net.add_edge(valve, tunnel)

# net.show_buttons(filter_=['physics'])
net.barnes_hut(spring_length=0, spring_strength=0.115, damping=0.17)
net.show(net_output)

In [99]:
distance = pd.DataFrame(index=valve_paths.keys(), columns=valve_paths.keys())
for valve in valve_paths.keys():
    for tunnel in valve_paths[valve].keys():
        distance.loc[valve, tunnel] = len(valve_paths[valve][tunnel])
# display(distance)

# remove paths from valve_paths that have a valve in the middle of the path with a higher flow rate than the end of the path
for valve in list(valve_paths.keys()):
    for tunnel in list(valve_paths[valve].keys()):
        if input[tunnel]['flow_rate'] == 0:
            valve_paths[valve].pop(tunnel)
            distance.loc[valve, tunnel] = np.nan
            continue
        # path = valve_paths[valve][tunnel]
        # for i in range(len(path)-1):
        #     if input[path[i]]['flow_rate'] > input[path[-1]]['flow_rate']:
        #         valve_paths[valve].pop(tunnel)
        #         distance.loc[valve, tunnel] = np.nan
        #         break
# display(valve_paths)


flow_rate    60
tunnels      60
dtype: int64

In [75]:
def flow_frame(current_valve, valve_paths, input_df, distance):
    relative_flow = distance.loc[current_valve, valve_paths[current_valve].keys()].to_frame().rename(columns={current_valve: 'time_taken'})
    relative_flow['time_taken'] = relative_flow['time_taken'] + 1
    relative_flow['flow_rate'] = input_df.loc[valve_paths[current_valve].keys(), 'flow_rate']
    relative_flow = relative_flow.sort_values(by=['time_taken', 'flow_rate'], ascending=[True, False])
    relative_flow = relative_flow.reset_index().rename(columns={'index': 'destination'})
    relative_flow['source'] = current_valve
    # relative_flow['remaining_time'] = 30 - relative_flow['time_taken'] # Part 1
    relative_flow['remaining_time'] = 26 - relative_flow['time_taken'] # Part 2
    relative_flow['pressure_released'] = relative_flow['remaining_time'] * relative_flow['flow_rate']
    return relative_flow.sort_values(by=['time_taken', 'pressure_released', 'flow_rate'], ascending=[True, False, False])

# display(distance)

frames = []
aa = flow_frame('AA', valve_paths, input_df, distance)
# display('AA', aa)
frames.append(aa)

non_zero_nodes = set(input_df[input_df['flow_rate'] > 0].index)
for current_valve in non_zero_nodes:
    current_valve_frame = flow_frame(current_valve, valve_paths, input_df, distance).head(6)
    # display(current_valve, current_valve_frame)
    frames.append(current_valve_frame)

potential_paths = pd.concat(frames).sort_values(by=['time_taken', 'pressure_released', 'flow_rate'], ascending=[True, False, False])
potential_paths['path'] = ''
# display(potential_paths)

print("Potential path segments: {}".format(len(potential_paths)))

# Recurse the valve tree paths until the end is reached
# Create a source frame from potential_paths for the current valve
# Create a destination frame from potential_paths for each destination in the current valve source frame
# Update the destination frame with a label of the source and destination path
# Update the destination frame to have time_remaining updated to be the time remaining from the current valve source frame
# Update the destination frame pressure_released based on updated time_remaining
current_valve = 'AA'
start_path = potential_paths[potential_paths['source'] == current_valve].copy()
start_path['path'] = start_path.apply(lambda x: '"{}","{}"'.format(x['source'], x['destination']), axis=1)
start_path['total_pressure_released'] = start_path['pressure_released']
# display('start', start_path)
walked_paths = deque()
walking_paths = deque([start_path])
while len(walking_paths) > 0:
    current_path = walking_paths.pop()
    walked_paths.append(current_path)
    # print("current path\n", current_path)
    for i in range(len(current_path)):
        destination = current_path.iloc[i]['destination']
        source_path = current_path.iloc[i]['path']
        eval_list = 'list([{}])'.format(current_path.iloc[i]['path'])
        visited_path = eval(eval_list)
        destination_path = potential_paths[(potential_paths['source'] == destination) & (~potential_paths['destination'].isin(visited_path))].copy()
        if len(destination_path) == 0:
            continue
        destination_path['path'] = destination_path.apply(lambda x: '{},"{}"'.format(source_path, x['destination']), axis=1)
        destination_path['remaining_time'] = current_path.iloc[i]['remaining_time'] - destination_path['time_taken']
        destination_path['pressure_released'] = destination_path['remaining_time'] * destination_path['flow_rate']
        destination_path['total_pressure_released'] = destination_path['pressure_released'] + current_path.iloc[i]['total_pressure_released']
        destination_path = destination_path[destination_path['remaining_time'] >= 0].copy()
        # print("destination paths ({})\n".format(len(destination_path)), destination_path)
        if len(destination_path) > 0:
            walking_paths.append(destination_path)
    
walked_paths = pd.concat(walked_paths).sort_values(by='total_pressure_released', ascending=False)
display(walked_paths.head(20))


Potential path segments: 105


Unnamed: 0,destination,time_taken,flow_rate,source,remaining_time,pressure_released,path,total_pressure_released
4,NG,4,13,ZL,3,39,"""AA"",""YW"",""OM"",""VX"",""WI"",""ZL"",""NG""",1302
1,OT,3,7,IC,1,7,"""AA"",""YW"",""OM"",""VX"",""WI"",""ZL"",""IC"",""OT""",1290
2,GB,3,6,IC,1,6,"""AA"",""YW"",""OM"",""VX"",""WI"",""ZL"",""IC"",""GB""",1289
3,HV,3,4,IC,1,4,"""AA"",""YW"",""OM"",""VX"",""WI"",""ZL"",""IC"",""HV""",1287
1,IC,3,5,ZL,4,20,"""AA"",""YW"",""OM"",""VX"",""WI"",""ZL"",""IC""",1283
5,OT,5,7,ZL,2,14,"""AA"",""YW"",""OM"",""VX"",""WI"",""ZL"",""OT""",1277
1,ZL,4,9,WI,7,63,"""AA"",""YW"",""OM"",""VX"",""WI"",""ZL""",1263
1,IS,4,19,NG,0,0,"""AA"",""YW"",""OM"",""VX"",""WI"",""NG"",""IS""",1252
2,ZL,4,9,NG,0,0,"""AA"",""YW"",""OM"",""VX"",""WI"",""NG"",""ZL""",1252
5,NG,7,13,WI,4,52,"""AA"",""YW"",""OM"",""VX"",""WI"",""NG""",1252


In [76]:
"""
My long path: AA II JJ *  II AA BB *  CC *
My short path: AA JJ BB CC
Elephant path: AA DD *  EE FF GG HH *  GG FF EE * 12 
Elephant short path: AA DD HH EE

1707 pressure
"""
# search walked_paths path for my short path as quoted values in a string '"xx","yy","zz"'
short_time = walked_paths.copy()

# short_time['remaining_time'] = short_time['remaining_time'] - 4 # training time
# short_time = short_time[short_time['remaining_time'] >= 0].copy()
# short_time['pressure_released'] = short_time['remaining_time'] * short_time['flow_rate']
short_time['path_len'] = short_time['path'].apply(lambda x: len(x.split(',')) - 1)
short_time['path_steps'] = short_time['path'].apply(lambda x: x.split(',')[1:])

# update pressure released to be the pressure released from each step of the path
# for i in range(len(short_time)):
#     eval_list = 'list([{}])'.format(short_time.iloc[i]['path'])
#     path_list = eval(eval_list)
#     # Find all of the steps in the CSV path column that match the steps of the path in the path_list
#     # Sum the pressure released for each step in the path
#     # Update the pressure released for the path to be the sum of the pressure released for each step
#     total_pressure = 0
#     for j in range(len(path_list)):
#         path = ','.join(['"{}"'.format(x) for x in path_list[:j+1]])
#         path_step = short_time[short_time['path'] == path]
#         if len(path_step) > 0:
#             total_pressure += short_time[short_time['path'] == path]['pressure_released'].iloc[0]
#             short_time.iloc[i, short_time.columns.get_loc('total_pressure_released')] = total_pressure

short_time


Unnamed: 0,destination,time_taken,flow_rate,source,remaining_time,pressure_released,path,total_pressure_released,path_len,path_steps
4,NG,4,13,ZL,3,39,"""AA"",""YW"",""OM"",""VX"",""WI"",""ZL"",""NG""",1302,6,"[""YW"", ""OM"", ""VX"", ""WI"", ""ZL"", ""NG""]"
1,OT,3,7,IC,1,7,"""AA"",""YW"",""OM"",""VX"",""WI"",""ZL"",""IC"",""OT""",1290,7,"[""YW"", ""OM"", ""VX"", ""WI"", ""ZL"", ""IC"", ""OT""]"
2,GB,3,6,IC,1,6,"""AA"",""YW"",""OM"",""VX"",""WI"",""ZL"",""IC"",""GB""",1289,7,"[""YW"", ""OM"", ""VX"", ""WI"", ""ZL"", ""IC"", ""GB""]"
3,HV,3,4,IC,1,4,"""AA"",""YW"",""OM"",""VX"",""WI"",""ZL"",""IC"",""HV""",1287,7,"[""YW"", ""OM"", ""VX"", ""WI"", ""ZL"", ""IC"", ""HV""]"
1,IC,3,5,ZL,4,20,"""AA"",""YW"",""OM"",""VX"",""WI"",""ZL"",""IC""",1283,6,"[""YW"", ""OM"", ""VX"", ""WI"", ""ZL"", ""IC""]"
...,...,...,...,...,...,...,...,...,...,...
1,YW,4,8,AA,22,176,"""AA"",""YW""",176,1,"[""YW""]"
2,OT,4,7,AA,22,154,"""AA"",""OT""",154,1,"[""OT""]"
3,GB,4,6,AA,22,132,"""AA"",""GB""",132,1,"[""GB""]"
0,IC,3,5,AA,23,115,"""AA"",""IC""",115,1,"[""IC""]"


In [89]:
len(short_time)


17773

In [108]:


half_path = len(non_zero_nodes) // 2
top_n = 130

path1_options = short_time.head(200)
# path1_options = pd.concat([
#     short_time[short_time['path_len'] == half_path - 4].head(top_n),
#     short_time[short_time['path_len'] == half_path - 3].head(top_n),
#     short_time[short_time['path_len'] == half_path - 2].head(top_n),
#     short_time[short_time['path_len'] == half_path - 1].head(top_n),
#     short_time[short_time['path_len'] == half_path].head(top_n),
#     short_time[short_time['path_len'] == half_path + 1].head(top_n),
#     short_time[short_time['path_len'] == half_path + 2].head(top_n),
#     ])
path2_options = short_time.head(1000)

display('list sizes: {} {}'.format(len(path1_options), len(path2_options)))

# find 1 path from path1_options and 1 path from path2_options that have non-overlapping path_steps
disjointed_pairs = []
for i in range(len(path1_options)):
    path1 = path1_options.iloc[i]['path_steps']
    for j in range(len(path2_options)):
        path2 = path2_options.iloc[j]['path_steps']
        # print(path1, path2, set(path1).intersection(set(path2)))
        if len(set(path1).intersection(set(path2))) == 0:
            disjointed_pairs.append({'path1': path1, 'path2': path2, 'total_pressure_released': path1_options.iloc[i]['total_pressure_released'] + path2_options.iloc[j]['total_pressure_released']})

# find the pair with the highest total pressure released
pd.DataFrame(disjointed_pairs).sort_values(by='total_pressure_released', ascending=False)

'list sizes: 200 1000'

Unnamed: 0,path1,path2,total_pressure_released
0,"[""YW"", ""OM"", ""VX"", ""WI"", ""ZL"", ""IC"", ""OT""]","[""HV"", ""MX"", ""QQ"", ""DG"", ""IS"", ""NG""]",2207
2,"[""YW"", ""OM"", ""VX"", ""WI"", ""ZL"", ""IC"", ""GB""]","[""HV"", ""MX"", ""QQ"", ""DG"", ""IS"", ""NG""]",2206
13,"[""YW"", ""OM"", ""VX"", ""WI"", ""IC"", ""OT""]","[""ZL"", ""NG"", ""IS"", ""DG"", ""QQ"", ""FM""]",2202
5,"[""YW"", ""OM"", ""VX"", ""WI"", ""ZL"", ""IC""]","[""HV"", ""MX"", ""QQ"", ""DG"", ""IS"", ""NG""]",2200
21,"[""YW"", ""OM"", ""VX"", ""WI"", ""IC"", ""GB""]","[""ZL"", ""NG"", ""IS"", ""DG"", ""QQ"", ""FM""]",2200
...,...,...,...
935,"[""ZL"", ""WI"", ""VX"", ""OM""]","[""MX"", ""QQ"", ""DG"", ""IS"", ""NG""]",1974
990,"[""OT"", ""OM"", ""VX"", ""WI"", ""YW""]","[""MX"", ""QQ"", ""DG"", ""IS"", ""NG""]",1971
972,"[""YW"", ""OM"", ""VX"", ""ZL"", ""IC""]","[""MX"", ""QQ"", ""DG"", ""IS"", ""NG""]",1971
1008,"[""OT"", ""OM"", ""VX"", ""WI"", ""YW"", ""GB""]","[""MX"", ""QQ"", ""DG"", ""IS"", ""NG""]",1971
