In [9]:
with open('input') as f:
    data = f.read().strip()

In [10]:
class CWDTracker():
    
    def __init__(self):
        self.cwd = ''

    def cd(self, path):
        if path == '/':
            self.cwd = path
        elif path == '..':
            self.cwd = '/'.join(self.cwd.rstrip('/').split('/')[:-1]) + '/'
        else: 
            self.cwd += path + '/'

    def abspath(self, name):
        return self.cwd + name

    @staticmethod
    def parent_dir(path):
        return '/'.join(path.rstrip('/').split('/')[:-1]) + '/'

class FileTree():

    def __init__(self):
        self.path2node = dict()
        self.id_tracker = 0

    def add_node(self, path, node_type, size=0):
        if path not in self.path2node.keys():
            node = Node(path, node_type, size=size)
            node.parent = self.path2node[CWDTracker.parent_dir(path)] if path != '/' else None
            self.path2node[path] = node

    def get_node(self, path):
        return self.path2node[path]

class Node():
    
    def __init__(self, path, node_type, size=0):
        assert node_type in ['dir', 'file']
        self.node_type = node_type
        self.path = path
        self.size = size
        self.depth = len(path.split('/'))
        self.parent = None

In [11]:
def parse_line(line, filetree, wd_tracker):

    splitted = line.split()

    if splitted[0] == '$':  # found command
        command = splitted[1]
        if command == 'cd':  # cd command
            _, _, path = splitted
            wd_tracker.cd(path)
            filetree.add_node(path=wd_tracker.cwd, node_type='dir')
        elif command == 'ls':  # ls command
            pass

    elif splitted[0] == 'dir':  # found directory
        _, name = splitted
        dir_path = wd_tracker.abspath(name)
        filetree.add_node(path=dir_path, node_type='dir')

    else:  # found file
        size, name = splitted
        file_path = wd_tracker.abspath(name)
        filetree.add_node(path=file_path, node_type='file', size=int(size))


def run_history(lines, filetree, wd_tracker):

    for line in lines:
        parse_line(line, filetree, wd_tracker)

# Part 1

In [12]:
# init parser
filetree = FileTree()
wd_tracker = CWDTracker()

run_history(data.splitlines(), filetree, wd_tracker)

deepest_nodes = [node for node in sorted(filetree.path2node.values(), key=lambda x:x.depth)][::-1]

for node in deepest_nodes[:-1]:  # [:-1] to exclude root node
    node.parent.size += node.size

In [13]:
selected_nodes = [(node.path, node.size) for node in filetree.path2node.values() if (node.node_type == 'dir' and node.size <= 100000)]
sum([n[1] for n in selected_nodes])

1513699

# Part 2

In [14]:
total_space = 70000000
total_needed = 30000000

root_node = filetree.get_node('/')
free_space = total_space - root_node.size

needed = total_needed - free_space

sufficiently_large_nodes = [(node.path, node.size) for node in sorted(filetree.path2node.values(), key=lambda x:x.size) if node.size >= needed]

In [15]:
sufficiently_large_nodes

[('/pntzm/mbtsvblj/csqcnmtc/mbtsvblj/', 7991939),
 ('/pntzm/mbtsvblj/zcrrtlh/dhsmmlt/wffbp/mbtsvblj/mbtsvblj/', 8113815),
 ('/thfgwwsp/', 8389675),
 ('/pntzm/mbtsvblj/csqcnmtc/', 9405481),
 ('/pntzm/mbtsvblj/zcrrtlh/dhsmmlt/wffbp/mbtsvblj/', 10264814),
 ('/pntzm/mbtsvblj/zcrrtlh/dhsmmlt/wffbp/', 12324512),
 ('/pntzm/mbtsvblj/zcrrtlh/dhsmmlt/', 13246500),
 ('/pntzm/mbtsvblj/zcrrtlh/', 14682970),
 ('/pntzm/mbtsvblj/', 27924493),
 ('/pntzm/', 29757629),
 ('/', 46876531)]

In [16]:
sufficiently_large_nodes[0][1]

7991939