In [1]:
import re

import numpy as np
from aocd import get_data, submit

DAY = 7
YEAR = 2022

In [2]:
raw = get_data(day=DAY, year=YEAR)

# print(raw)

In [3]:
def parse_data(data):
    data = data.split("$ ")
    data = [[x for x in d.split("\n") if x != ""] for d in data if d != ""]
    return data


data = parse_data(raw)

In [4]:
class Node:
    def __init__(self, name, size=None, content=None, parent=None, depth=0):
        self.name = name
        self.size = size
        self.content = content
        self.parent = parent
        self.depth = depth

    # def __repr__(self):
    #     out = f'{" "*self.depth} - {self.name} ({"file" if self.size is not None else "dir"}: {self.disk_size()})\n'
    #     if self.content is not None:
    #         for name, node in self.content.items():
    #             out += f'{" "*(node.depth)}{node}'
    #         return out
    #     return out

    def add_content(self, node):
        if self.content is not None and isinstance(self.content, dict):
            self.content[node.name] = node

    def disk_size(self):
        return calc_size(self)


def calc_size(node):
    if node.size is not None:
        return node.size
    else:
        return sum([calc_size(child) for child in node.content.values()])

In [5]:
FILE_REGEX = re.compile("(^\d+)\s(.+)$")
DIR_REGEX = re.compile("^dir\s(.+)$")
CMD_REGEX = re.compile("^\$\s(cd|ls)\s*(.*)")

current = None
root = Node(name="/", content={})

# create tree
depth = 0
for line in data[1:]:
    cmd, *content = line
    current = root if current is None else current
    if cmd.startswith("cd"):
        name = re.search("cd\s(.*)", cmd).group(1)
        if name == "..":
            current = current.parent
        else:
            current = current.content[name]
    else:
        for c in content:
            file_re = FILE_REGEX.search(c)
            dir_re = DIR_REGEX.search(c)
            if file_re is not None:
                size, name = file_re.group(1), file_re.group(2)
                current.content[name] = Node(name=name, size=int(size), parent=current, depth=current.depth + 1)
            else:
                name = dir_re.group(1)
                current.add_content(Node(name=name, content={}, parent=current, depth=current.depth + 1))

# Part 1

In [6]:
def collect(node, candidates=[]):
    if node.content is None:  # skip
        return candidates

    if node.disk_size() <= 1e5:
        candidates.append(node.disk_size())

    for child in node.content.values():
        collect(child, candidates)

    return candidates


result = sum(collect(root))
result

1490523

In [7]:
# submit(result, part="a", day=DAY, year=YEAR)

# Part 2

In [8]:
total = 70000000
required = 30000000
used = root.disk_size()
unused = total - used
to_free = required - unused


def collect(node, candidates=[]):
    if node.content is None:  # skip
        return candidates

    candidates.append([node, node.disk_size()])

    for child in node.content.values():
        collect(child, candidates)

    return candidates


candidates = collect(root)
candidates = np.array(sorted(candidates, key=lambda x: x[-1]))  # sort by size
result = candidates[candidates[:, 1] >= to_free][0][1]  # first one above threshold
result

12390492

In [9]:
# submit(result, part="b", day=DAY, year=YEAR)