In [72]:
from dataclasses import dataclass, field
from typing import List, Optional, Union
from collections import defaultdict
import json
import os
import string
from string import ascii_letters
from textwrap import dedent

import numpy as np
import pandas as pd

In [83]:
@dataclass
class File:
    """Represent a File which will have a name, extension, and size."""
    name: str
    extension: str
    size: int


@dataclass
class Directory:
    """Represent a directory which may contain other ``Directory``s and ``File``s."""
    def __init__(self, name: str, parent: Optional['Directory'], children: Optional[List[Union['Directory', File]]]):
        self.name = name
        self.parent = parent
        self.children = children

@dataclass
class FileSystem:
    """Represents the file system containing ``Directory``s and ``File``s."""

    dirs: List[Directory]

    def add_child_directory(self, target_dir_name: str, parent_dir_name: Directory):
        if target_dir_name not in [d.name for d in self.dirs if isinstance(d, Directory)]:
            parent_dir = [d for d in self.dirs if d.name == parent_dir_name][0]
            self.dirs.append(Directory(name=target_dir_name, parent=parent_dir, children=[]))
    
    def add_files_to_directory(self, target_dir_name: str, file: File):
        target_dir = [d for d in self.dirs if d.name == target_dir_name][0]
        if file.name not in [f for f in target_dir.children if isinstance(f, File)]:
            target_dir.children.append(file)
        
    


In [78]:
def parse_process_list(input_stream) -> List[List[str]]:
    """Parse input data into commands."""
    current_line_idx = 0
    current_line = None
    parsed_commands_list = []
    cwd = ""
    process_list = list(input_stream.splitlines())
    while current_line_idx < len(process_list) - 1:
        current_line = process_list[current_line_idx]
        
        if current_line[0] == "$":
            # Line is a command.

            if current_line[2:4] == "cd":
                # Change directory...
                split_command = current_line.split(" ")
                cwd = os.path.abspath(os.path.join(cwd, split_command[2]))
                parsed_commands_list.append(split_command[1:3])

                current_line_idx += 1

            elif current_line[2:4] == "ls":
                # List directory... 
                ls_cmd = ["ls", cwd]
                
                while (
                    current_line_idx < len(process_list) - 1
                    and process_list[current_line_idx + 1][0] != "$"         
                ):
                    current_line_idx += 1
                    current_line = process_list[current_line_idx]
                    ls_cmd.append(current_line)
                
                parsed_commands_list.append(ls_cmd)
                current_line_idx += 1
            else:
                current_line_idx += 1
        else:
            raise ValueError("Should not be processing a non-command.")

    return parsed_commands_list

In [79]:
data = """$ cd /
$ ls
dir a
14848514 b.txt
8504156 c.dat
dir d
$ cd a
$ ls
dir e
29116 f
2557 g
62596 h.lst
$ cd e
$ ls
584 i
$ cd ..
$ cd ..
$ cd d
$ ls
4060174 j
8033020 d.log
5626152 d.ext
7214296 k""".strip()

In [103]:
parsed_cmds = parse_process_list(data)

fs = FileSystem(dirs=[Directory(name="/", parent=None, children=[])])
cwd = '/'
for cmd in parsed_cmds:
    if cmd[0] == "cd":
        new_dir_loc = os.path.abspath(os.path.join(cwd, cmd[1]))
        fs.add_child_directory(new_dir_loc, parent_dir_name=cwd)
        cwd = new_dir_loc
    if cmd[0] == "ls":
        cwd = cmd[1]
        for item in cmd[2:]:
            if item[:4] == "dir ":
                fs.add_child_directory(item[4:], cwd)
            else:
                fs.add_files_to_directory(cwd, File(name=item, extension="0", size="0"))

In [106]:
for d in fs.dirs:
    if not any([isinstance(child, Directory) for child in d.children]) and d.children != []:
        print(d.name, d.children)
        

/ [File(name='14848514 b.txt', extension='0', size='0'), File(name='8504156 c.dat', extension='0', size='0')]
/a [File(name='29116 f', extension='0', size='0'), File(name='2557 g', extension='0', size='0'), File(name='62596 h.lst', extension='0', size='0')]
/a/e [File(name='584 i', extension='0', size='0')]
/d [File(name='4060174 j', extension='0', size='0'), File(name='8033020 d.log', extension='0', size='0'), File(name='5626152 d.ext', extension='0', size='0'), File(name='7214296 k', extension='0', size='0')]
