## Directory Tree Convert to Dataframe

<br>

### Development Environment

In [None]:
%pip install directory_tree

In [63]:
import re
import pandas as pd
from directory_tree import display_tree

### Directory Tree in HuggingFace Transformers

In [113]:
customPath = 'transformers'
directory_tree = display_tree(customPath, max_depth=5, string_rep=True, show_hidden=True)
print(directory_tree)

transformers/
├── .circleci/
│   ├── config.yml
│   ├── create_circleci_config.py
│   └── TROUBLESHOOT.md
├── .coveragerc
├── .git/
│   ├── config
│   ├── description
│   ├── HEAD
│   ├── hooks/
│   │   ├── applypatch-msg.sample
│   │   ├── commit-msg.sample
│   │   ├── fsmonitor-watchman.sample
│   │   ├── post-update.sample
│   │   ├── pre-applypatch.sample
│   │   ├── pre-commit.sample
│   │   ├── pre-merge-commit.sample
│   │   ├── pre-push.sample
│   │   ├── pre-rebase.sample
│   │   ├── pre-receive.sample
│   │   ├── prepare-commit-msg.sample
│   │   ├── push-to-checkout.sample
│   │   └── update.sample
│   ├── index
│   ├── info/
│   │   └── exclude
│   ├── logs/
│   │   ├── HEAD
│   │   └── refs/
│   │       ├── heads/
│   │       │   └── main
│   │       └── remotes/
│   │           └── origin/
│   ├── objects/
│   │   ├── info/
│   │   └── pack/
│   │       ├── pack-9f66381849476d086d8361fe4d5cb50b96c410cd.idx
│   │       └── pack-9f66381849476d086d8361fe4d5cb50b96c410cd.pack

### Directory Tree Convert to Dataframe

In [120]:
def make_dircetory_dictionary(directory_tree):

    branches = directory_tree.split("\n")

    depth0_dict = {}; depth1_dict = {}; depth2_dict = {}
    depth3_dict = {}; depth4_dict = {}; depth5_dict = {}

    for idx, branch_text in enumerate(branches):

        branch = re.sub(r"[a-zA-Z0-9/]", "", branch_text)

        if idx == 0:
            depth0_dict[idx] = branch_text

        elif len(branch) >= 0 and len(branch) <= 7:
            depth1_dict[idx] = branch_text

        elif len(branch) >= 8 and len(branch) <= 11:
            depth2_dict[idx] = branch_text

        elif len(branch) >= 12 and len(branch) <= 15:
            depth3_dict[idx] = branch_text

        elif len(branch) >= 16 and len(branch) <= 19:
            depth4_dict[idx] = branch_text

        elif len(branch) >= 20:
            depth5_dict[idx] = branch_text

    directory_dicts = [depth0_dict, depth1_dict, depth2_dict, depth3_dict, depth4_dict, depth5_dict]        

    return directory_dicts

In [121]:
def check_length_of_dictionary_and_number_of_branches(branches, directory_dicts):

    directory_tree_length = len(branches)
    parsing_tree_length = 0

    for depth_dict in directory_dicts:
        parsing_tree_length += len(depth_dict)

    print(directory_tree_length == parsing_tree_length)

In [122]:
def next_key(dict, key):
    keys = iter(dict)
    key in keys
    return next(keys, False)

In [143]:
def make_dircetory_list(branches, directory_dicts):

    directory_lists = []

    for depth_dict in directory_dicts:
        depth_list = [""] * len(branches)

        for key, value in depth_dict.items():
            depth_list[key] = value

        directory_lists.append(depth_list) 

    return directory_lists

In [139]:
def make_dircetory_dataframe(directory_lists):

    directory_df = pd.DataFrame({'depth0' : directory_lists[0], 'depth1' : directory_lists[1], 'depth2' : directory_lists[2],
    'depth3' : directory_lists[3], 'depth4' : directory_lists[4], 'depth5' : directory_lists[5]})

    return directory_df

In [125]:
directory_dicts = make_dircetory_dictionary(directory_tree)
check_length_of_dictionary_and_number_of_branches(branches, directory_dicts)

True


In [144]:
directory_lists = make_dircetory_list(branches, directory_dicts)
directory_df = make_dircetory_dataframe(directory_lists)

In [172]:
directory_df.to_excel("directory_df.xlsx")

In [170]:
pd.set_option('display.max_rows', None)
directory_df

Unnamed: 0,depth0,depth1,depth2,depth3,depth4,depth5
0,transformers/,,,,,
1,,├── .circleci/,,,,
2,,,│ ├── config.yml,,,
3,,,│ ├── create_circleci_config.py,,,
4,,,│ └── TROUBLESHOOT.md,,,
5,,├── .coveragerc,,,,
6,,├── .git/,,,,
7,,,│ ├── config,,,
8,,,│ ├── description,,,
9,,,│ ├── HEAD,,,


### Reference

<b>Paper</b>
<br>[Hugo Touvron et al. LLaMA: Open and Efficient Foundation Language Models](https://arxiv.org/abs/2302.13971)