In [87]:
from dataclasses import dataclass
from typing import List, Optional, Union
from collections import defaultdict
import json
import os
import string
from string import ascii_letters
from textwrap import dedent

import numpy as np
import pandas as pd



from aoc2022.utils import read_aoc_day_data_file

In [7]:
data = """
$ cd /
$ ls
dir a
14848514 b.txt
8504156 c.dat
dir d
$ cd a
$ ls
dir e
29116 f
2557 g
62596 h.lst
$ cd e
$ ls
584 i
$ cd ..
$ cd ..
$ cd d
$ ls
4060174 j
8033020 d.log
5626152 d.ext
7214296 k
""".strip()


In [3]:
@dataclass
class File:
    """Represent a File which will have a name, extension, and size."""
    name: str
    extension: str
    size: int


class Dir:
    """Represent a directory which may contain other ``Directories`` and ``Files``."""
    def __init__(self, name: str, parent: Optional['Dir'], children: Optional[List[Union['Dir', File]]]):
        self.name = name
        self.parent = parent
        self.children = children

In [86]:
# $ cd /
# $ ls
# dir a
# 14848514 b.txt
# 8504156 c.dat
# dir d
# $ cd a
# $ ls
# dir e
# 29116 f
# 2557 g
# 62596 h.lst
# $ cd e
# $ ls
# 584 i
# $ cd ..

class DataParser:
    """Parse input data into commands."""
    def __init__(self, data: str):
        self.process_list = list(data.splitlines())
        self.parsed_process_list = self.parse_process_list()
        self.tree = {"/": {"dir_size": 0}}

        self.generate_dir_tree_structure()

    def parse_process_list(self) -> List[List[str]]:
        """Parse input data into commands."""
        current_line_idx = 0
        current_line = None
        parsed_commands_list = []
        cwd = ""

        while current_line_idx < len(self.process_list) - 1:
            current_line = self.process_list[current_line_idx]
            
            if current_line[0] == "$":
                # Line is a command.

                if current_line[2:4] == "cd":
                    # Change directory...
                    split_command = current_line.split(" ")
                    cwd = os.path.abspath(os.path.join(cwd, split_command[2]))
                    parsed_commands_list.append(split_command[1:3])

                    current_line_idx += 1

                elif current_line[2:4] == "ls":
                    # List directory... 
                    ls_cmd = ["ls", cwd]
                    
                    while (
                        current_line_idx < len(self.process_list) - 1
                        and self.process_list[current_line_idx + 1][0] != "$"         
                    ):
                        current_line_idx += 1
                        current_line = self.process_list[current_line_idx]
                        ls_cmd.append(current_line)
                    
                    parsed_commands_list.append(ls_cmd)
                    current_line_idx += 1
            else:
                raise ValueError("Should not be processing a non-command.")

        return parsed_commands_list

    def generate_dir_tree_structure(self):
        cwd = ''
        for cmd in self.parsed_process_list:
            if cmd[0] == "cd":
                cwd = os.path.abspath(os.path.join(cwd, cmd[1]))

            elif cmd[0] == "ls":
                current_branch_items = self.tree
                if cmd[1] == "/":
                    path = "/"
                else:
                    path = cmd[1].split("/")

                for d in path:
                    if d == "":
                        d = "/"
                    current_branch_items = current_branch_items[d]
                
                for item in cmd[2:]:
                    if item[:4] == "dir ":
                        current_branch_items[item.split(" ")[1]] = {"dir_size": 0}
                    else:
                        # if "files" not in current_branch_items.keys():
                        #     current_branch_items["files"] = []

                        file_size, file_name = item.split(" ")
                        if "." in file_name:
                            file_name, file_ext = file_name.split(".")
                        else:
                            file_ext = ''
                        # current_branch_items["files"].append({"name": file_name, "ext": file_ext, "size": file_size})
                        current_branch_items["dir_size"] += int(file_size)

DataParser(data).tree

{'/': {'dir_size': 23352670,
  'a': {'dir_size': 94269, 'e': {'dir_size': 584}},
  'd': {'dir_size': 24933642}}}

In [None]:
[['cd', '/'],
 ['ls', '/', 'dir a', '14848514 b.txt', '8504156 c.dat', 'dir d'],
 ['cd', 'a'],
 ['ls', '/a', 'dir e', '29116 f', '2557 g', '62596 h.lst'],
 ['cd', 'e'],
 ['ls', '/a/e', '584 i'],
 ['cd', '..'],
 ['cd', '..'],
 ['cd', 'd'],
 ['ls', '/d', '4060174 j', '8033020 d.log', '5626152 d.ext', '7214296 k']]

In [48]:
tree = {"A": {"B": {}}}
part_of_tree = tree["A"]
part_of_tree["C"] = ["D"]

tree

{'A': {'B': {}, 'C': ['D']}}

In [89]:
with open(f"../data/day_07.txt", encoding="utf-8") as file:
    d = file.read()

In [91]:
DataParser(d).tree

{'/': {'dir_size': 474107,
  'cvt': {'dir_size': 835544,
   'bbgsthsd': {'dir_size': 455345},
   'chhdjtlw': {'dir_size': 1084924,
    'mbdrgfzs': {'dir_size': 164331},
    'pbmcnpzf': {'dir_size': 102120}},
   'cpcfcc': {'dir_size': 361609,
    'dqc': {'dir_size': 100486,
     'cpcfcc': {'dir_size': 0, 'hvfvt': {'dir_size': 596769}},
     'qmhnvmh': {'dir_size': 469100},
     'snqqcjlw': {'dir_size': 182851}},
    'glm': {'dir_size': 188702},
    'jbszm': {'dir_size': 187645},
    'vcptbw': {'dir_size': 0, 'mgqd': {'dir_size': 426391}},
    'zrtm': {'dir_size': 736469, 'zft': {'dir_size': 603170}}},
   'dch': {'dir_size': 313336,
    'djfww': {'dir_size': 732482,
     'lbrhbc': {'dir_size': 1229072, 'djfww': {'dir_size': 24794}}},
    'fzqgwwnf': {'dir_size': 233720},
    'hqvmq': {'dir_size': 167073,
     'cdld': {'dir_size': 293421},
     'cpcfcc': {'dir_size': 595426},
     'djfww': {'dir_size': 201830},
     'qnc': {'dir_size': 305356,
      'zrtm': {'dir_size': 0,
       'bdhdrr'