# FileCube
The following provides a simple interface to construct directories defined by products of lists. For example, passing 
```
    ['dir1','dir2'],
    ['dirA','dirB','dirC']
```
as ordered arguments results in:

```

root
├── dir1
│   ├── dirA
│   ├── dirB
│   └── dirC
└── dir2
    ├── dirA
    ├── dirB
    └── dirC
        
```
This may be helpful in the data science project where, for example, the first level may be different datasets and the second level different ML models. The module therefore organises these into a nice hierarchy.

In [97]:
from treelib import Tree
from collections import OrderedDict
from pathlib import Path

class FileCube():


    def __init__(self, root_node_name, ordered_dict):
        """
        takes in name of root and ordered dict, defining the hyper cube
        """
        
        self.dirs = Tree()
        
        unique_id = 0
        
        root_node = self.dirs.create_node(root_node_name, identifier=unique_id, data=Path(root_node_name))
        unique_id += 1
        
        
        assert(type(ordered_dict) is OrderedDict)

        def add_branch(parent_node, new_children, unique_id):
            """
            for a given parent id add new children
            and return the child ids
            """
            child_nodes = []
            for child in new_children:
                folder = parent_node.data / child
                new_node = self.dirs.create_node(child, 
                    parent=parent_node,
                    identifier=unique_id,
                    data=folder)
                unique_id += 1
                child_nodes += [new_node]
                
            return child_nodes, unique_id

        
        old_parent_ids = [root_node]

        for name, values in ordered_dict.items():

            new_parent_nodes = []
            new_children = values

            for parent_node in old_parent_ids:
                child_nodes, unique_id = add_branch(parent_node, new_children, unique_id)
                
                new_parent_nodes += child_nodes

            old_parent_ids = new_parent_nodes.copy()
            setattr(self, name, old_parent_ids)


    def makedirs(self, exist_ok=True):
        """
        Write the directories to disk
        """
        paths = [i.data for i in self.dirs.all_nodes()]
        for path in paths:

            path.mkdir(parents=True, exist_ok=exist_ok)

            # to implement... pass arguments through to makedirs

        print('\nWritten directory tree -- (exist_ok={})'.format(exist_ok))


    def check_faithful(self, rootdir, subset=False):
        """
        Checks whether the directories in memory exist on disk
        """

        def listdirs(record, rootdir):
            for path in Path(rootdir).iterdir():
                if path.is_dir():
                    record += [path]
                    record = listdirs(record, path)
            return record

        record = []
        record = listdirs(record, rootdir)

        unique_record = set(record + [Path(rootdir)])
        unique_tree = set([i.data for i in self.dirs.all_nodes()])
        unique_intersect = unique_record & unique_tree
        
        cond1 = len(unique_intersect) == len(unique_record)
        cond2 = len(unique_intersect) == len(unique_tree)
        
        if not subset: 
            if cond1 and cond2:
                print('\nDirectory identical to in-memory tree')
            else:
                raise ValueError('Discrepancy in directories')
        else:
            if cond1:
                print('\nIn-memory tree exists on disk')
            else:
                raise ValueError('Discrepancy in directories')





In [98]:
from collections import OrderedDict


root_dir = 'project_name'

ordered_dict = OrderedDict(
    timestamps=['01-01-2022','02-01-2022'],
    datasets=['dataset_1','dataset_2'],
    model_parameters=['param_val_1','param_val_2','param_val_3']
)


t = FileCube(root_dir, ordered_dict)

t.dirs.show()

t.makedirs()

project_name
├── 01-01-2022
│   ├── dataset_1
│   │   ├── param_val_1
│   │   ├── param_val_2
│   │   └── param_val_3
│   └── dataset_2
│       ├── param_val_1
│       ├── param_val_2
│       └── param_val_3
└── 02-01-2022
    ├── dataset_1
    │   ├── param_val_1
    │   ├── param_val_2
    │   └── param_val_3
    └── dataset_2
        ├── param_val_1
        ├── param_val_2
        └── param_val_3


Written directory tree -- (exist_ok=True)


In [99]:
t.check_faithful(root_dir, subset=True)


In-memory tree exists on disk
