In [20]:
class TreeNode:
    def __init__(self, filename=None, filesize=None, parent=None):
        self.filename = filename # data
        self.filesize = filesize
        self.parent = parent
        self.children = {} # references to other nodes: filename to TreeNode objects

    def add_child(self, child_node):
        # creates parent-child relationship
        self.children[child_node.filename] = child_node

    def traverse(self):
        # moves through each node referenced from self downwards
        nodes_to_visit = [self]
        while len(nodes_to_visit) > 0:
            current_node = nodes_to_visit.pop()
            print(current_node.filename)
            nodes_to_visit += current_node.children.values()
            
    def total_size(self):
        # compute total size of the subtree from self downwards
        total_size = 0
        nodes_to_visit = [self]
        while len(nodes_to_visit) > 0:
            current_node = nodes_to_visit.pop()
            if current_node.filesize != None:
                total_size += int(current_node.filesize)
            nodes_to_visit += current_node.children.values()
        return total_size
    
    def full_path(self):
        if self.filename == '/':
            return '/'
        cur_node = self
        dirs = [self.filename]
        while cur_node.parent != None:
            cur_node = cur_node.parent
            dirs.append(cur_node.filename)
        dirs.reverse()
        return '/'.join(dirs)[1:]  # grossss

def make_tree(my_input):
    root = TreeNode(filename='/')
    cur_node = root
    
    for x in my_input:
#         print(x)
#         print(cur_node.full_path())
#         print('\n')
        
        # if we're in change-directory mode, use this to navigate dirs as needed
        if x.startswith('$ cd'):
            dirname = x[5:]
            if dirname == '/':  # back to root
                cur_node = root
            elif dirname != '..':
                # child dir should already have been created as the result of an ls command below
                assert dirname in cur_node.children.keys()
                cur_node = cur_node.children[dirname]
            else:
                # go up a level
                assert dirname == '..'
                cur_node = cur_node.parent

        elif x.startswith('$ ls'):
            continue

        else:
            # we're in "ls" mode, make new files as needed
            filesize, filename = x.split(' ')
            if filesize == 'dir':
                # add if we don't already know about it
                if filename not in cur_node.children.keys():
                    new_node = TreeNode(filename=filename, parent=cur_node)
                    cur_node.add_child(new_node)
                continue
                
            new_node = TreeNode(filename=filename, filesize=filesize, parent=cur_node)
            cur_node.add_child(new_node)
        
    return root

In [21]:
example_input = """$ cd /
$ ls
dir a
14848514 b.txt
8504156 c.dat
dir d
$ cd a
$ ls
dir e
29116 f
2557 g
62596 h.lst
$ cd e
$ ls
584 i
$ cd ..
$ cd ..
$ cd d
$ ls
4060174 j
8033020 d.log
5626152 d.ext
7214296 k
"""

example_input = example_input.split('\n')[:-1]
example_input

['$ cd /',
 '$ ls',
 'dir a',
 '14848514 b.txt',
 '8504156 c.dat',
 'dir d',
 '$ cd a',
 '$ ls',
 'dir e',
 '29116 f',
 '2557 g',
 '62596 h.lst',
 '$ cd e',
 '$ ls',
 '584 i',
 '$ cd ..',
 '$ cd ..',
 '$ cd d',
 '$ ls',
 '4060174 j',
 '8033020 d.log',
 '5626152 d.ext',
 '7214296 k']

In [22]:
tree = make_tree(example_input)

In [23]:
# find all directories whose total size is <= 100000

def find_small_dirs(root):
    name_to_size = {}
    nodes_to_visit = [root]
    while len(nodes_to_visit) > 0:
        current_node = nodes_to_visit.pop()
        if current_node.filesize is not None:  # not a directory so we don't care
            continue
        total_size = current_node.total_size()
        if total_size <= 100000:
            name_to_size[current_node.filename] = total_size
        nodes_to_visit += current_node.children.values()
    return name_to_size

sum(find_small_dirs(tree).values())

95437

In [24]:
my_input = open('input7.txt').read().split('\n')[:-1]
my_tree = make_tree(my_input)
sum(find_small_dirs(my_tree).values())

1646948

## Ignore stuff below this line

In [25]:
def get_node_by_dir(node, dirname):
    dirs = dirname.split('/')[1:]
    print(dirs)

get_node_by_dir(my_tree, '/wlqhpwqv/zfhnw/zdv/pjdhn/rvbw/cjdhwbv')

['wlqhpwqv', 'zfhnw', 'zdv', 'pjdhn', 'rvbw', 'cjdhwbv']


In [26]:
len(my_input)

1010

In [27]:
sum(find_small_dirs(my_tree).values())

1646948

In [28]:
dirs = [x for x in my_input if x.startswith('dir')]
print(len(dirs))
print(len(set(dirs)))

186
140


In [29]:
# how many leaves are there in the input?