In [2]:
import pandas as pd
import random

# ノード名の生成
def generate_node_name(depth, parent_name):
    if depth == 0:
        return "1"
    else:
        num_children = 2 if depth < 6 else 0
        child_names = [f"{parent_name}.{i+1}" for i in range(num_children)]
        return child_names

# ツリーの構築
def build_tree(depth, parent_name=""):
    if depth > 6:
        return []
    
    node_name = generate_node_name(depth, parent_name)
    if isinstance(node_name, str):
        node_name = [node_name]
    
    nodes = []
    for name in node_name:
        nodes.append((name, parent_name))
        children = build_tree(depth + 1, name)
        nodes.extend(children)
    return nodes

# ランダムに二分木を生成
tree_nodes = build_tree(0)

# DataFrameの作成
df = pd.DataFrame(tree_nodes, columns=["ID", "parentID"])
df['parentID'] = df['parentID'].replace("", None)  # ルートノードの親IDをNoneに設定
df['scientificName'] = df['ID']  # scientificNameをIDと同じに設定
df['rank'] = "node"  # 全てのrankを"node"に設定

# 結果を表示
df


Unnamed: 0,ID,parentID,scientificName,rank
0,1,,1,node
1,1.1,1,1.1,node
2,1.1.1,1.1,1.1.1,node
3,1.1.1.1,1.1.1,1.1.1.1,node
4,1.1.1.1.1,1.1.1.1,1.1.1.1.1,node
...,...,...,...,...
58,1.2.2.2.1.1,1.2.2.2.1,1.2.2.2.1.1,node
59,1.2.2.2.1.2,1.2.2.2.1,1.2.2.2.1.2,node
60,1.2.2.2.2,1.2.2.2,1.2.2.2.2,node
61,1.2.2.2.2.1,1.2.2.2.2,1.2.2.2.2.1,node


In [11]:
df['value'] = 0  # 初期値を0に設定
# リーフノードの値を1に設定
def set_leaf_values(df):
    df.loc[df['children'].apply(len) == 0, 'value'] = 1

# ノードの値を子孫ノードの値の合計に設定
def update_values(df):
    node_dict = df.set_index('n').to_dict('index')
    
    def calculate_value(node):
        if node_dict[node]['value'] != 0:
            return node_dict[node]['value']
        
        children = node_dict[node]['children']
        if not children:
            return node_dict[node]['value']
        
        value = 0
        for child in children:
            value += calculate_value(child)
        
        node_dict[node]['value'] += value
        return node_dict[node]['value']
    
    # valueが0のノードのみ計算する
    for node in df[df['value'] == 0]['n']:
        calculate_value(node)
    
    # ノードのvalueを更新
    df.update(pd.DataFrame.from_dict(node_dict, orient='index'))

# リーフノードに値を設定
set_leaf_values(df)
update_values(df)
df

KeyError: 'children'

In [3]:
import json
import os

# DataFrameを構造体に変換
def build_tree_structure(df):
    ROOT = 'ROOT of LIVES'

    # CoL ID -> index の対応表
    index = {id: i for i, id in enumerate(df.ID)}
    index[ROOT] = -1

    # Life の木構造
    lives = [{'n': n, 'name': name, 'parent': -1, 'children': []}
             for n, name in enumerate(df.scientificName)]
    lives.append({'n': -1, 'name': ROOT, 'parent': -1, 'children': []})  # ToL[-1]

    # 学名 -> index の対応表
    lookup = {life['name']: life['n'] for life in lives}

    orphans = []

    for _, life in df.iterrows():
        _life = lives[index[life.ID]]
        try:
            if pd.isna(life.parentID): 
                print(f'Root of life: {life.scientificName}')
            parent = lives[index[life.parentID]]
            _life['parent'] = parent['n']
            parent['children'].append(_life['n'])
        except: 
            orphans.append({'id': life.ID, 'name': life.scientificName})  # parentID が登録されていない Life は無視する

    return dict(lives=lives, index=index, lookup=lookup, orphans=orphans)

tree_structure = build_tree_structure(df)

# ディレクトリが存在しない場合は作成
os.makedirs(os.path.dirname('data/simple.json'), exist_ok=True)

# JSONファイルに保存
with open('data/simple.json', 'w') as f:
    json.dump(tree_structure, f, ensure_ascii=False, indent=2)

Root of life: 1


In [5]:
class Simple:
    #PROJECT = os.path.dirname(os.path.abspath(__file__))  # 現在のスクリプトファイルのディレクトリ
    data_path = os.path.join('data', 'simple.json')
    
    with open(data_path, 'r') as f:
        ToL = json.load(f)

    def __init__(self):
        ToL = self.ToL
        self.lives = ToL['lives']
        self.index = ToL['index']
        self.lookup = ToL['lookup']
        self.orphans = ToL['orphans']

    def life(self, name=None, n=None):
        if type(name) == str: n = self.lookup[name]
        if type(n) == int:
            return self.lives[n]

    def subtree(self, name=None, n=None, depth=2^32):
        if type(name) == str: life = self.life(name=name)
        elif type(n) == int: life = self.life(n=n)
        else: return
        
        if depth <= 0: return copy.deepcopy(life)
        life = dict(n=life['n'], parent=life['parent'], name=life['name'],
                    children=[self.subtree(n=c, depth=depth-1) for c in life['children']])
        return life
    
    def subtrees(self, ns=[], depth=1):
        return dict(zip(ns, [self.subtree(n=n, depth=depth) for n in ns]))

In [6]:
try: sim
except:
    sim = Simple()

In [7]:
sim.life(n=1)

{'n': 1, 'name': '1.1', 'parent': 0, 'children': [2, 17]}