<a href="https://colab.research.google.com/github/okuno-mari/okuno-Phylogenetic-tree-generator/blob/main/PhyligeneticTreeGenerator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas matplotlib --upgrade --force-reinstall

Collecting pandas
  Downloading pandas-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (91 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/91.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting matplotlib
  Downloading matplotlib-3.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting numpy>=1.23.2 (from pandas)
  Downloading numpy-2.3.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.1/62.1 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting python-dateutil>=2.8.2 (from pandas)
  Downloading python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata (8.4 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import math

def create_and_draw_phylogenetic_tree_custom_matplotlib(file_path):
    """
    Excel/CSVファイルから系統樹を生成し、Matplotlibでカスタム描画して画像として保存する。
    種の多さをノードの大きさで視覚化し、描画の重なりを解消する。
    """
    df = None
    try:
        df = pd.read_excel(file_path)
        print(f"'{file_path}' をExcelファイルとして読み込みました。")
    except Exception as e_excel:
        print(f"'{file_path}' をExcelファイルとして読み込めませんでした: {e_excel}")
        try:
            df = pd.read_csv(file_path)
            print(f"'{file_path}' をCSVファイルとして読み込みました。")
        except Exception as e_csv:
            print(f"エラー: '{file_path}' をCSVファイルとしても読み込めませんでした: {e_csv}")
            print("ファイルが存在するか、正しい形式か確認してください。")
            return

    if df is None:
        print("データフレームの読み込みに失敗しました。処理を中断します。")
        return

    required_columns = ['祖先', '門名', '特性', '種数']
    for col in required_columns:
        if col not in df.columns:
            print(f"エラー: 必須の列 '{col}' がファイル '{file_path}' に見つかりません。")
            print(f"現在の列名: {df.columns.tolist()}")
            return

    children_of = {}
    all_children_nodes = set()
    all_parents_nodes = set()

    for index, row in df.iterrows():
        parent = str(row['祖先']).strip()
        child = str(row['門名']).strip()

        if parent == 'なし' or parent == '':
            children_of.setdefault('ROOT_PLACEHOLDER', []).append(child)
        else:
            children_of.setdefault(parent, []).append(child)
            all_parents_nodes.add(parent)
        all_children_nodes.add(child)

    root_node_name = None
    all_phyla_in_data = set(df['門名'].unique())
    nodes_without_parent = [
        node for node in all_phyla_in_data
        if node not in all_children_nodes and node not in all_parents_nodes
    ]

    if 'ROOT_PLACEHOLDER' in children_of and children_of['ROOT_PLACEHOLDER']:
        if len(children_of['ROOT_PLACEHOLDER']) == 1:
            root_node_name = children_of['ROOT_PLACEHOLDER'][0]
        else:
            root_node_name = "Life"
            children_of[root_node_name] = children_of['ROOT_PLACEHOLDER']
        del children_of['ROOT_PLACEHOLDER']
    elif nodes_without_parent:
        root_node_name = nodes_without_parent[0]
        if len(nodes_without_parent) > 1:
            virtual_root = "Life"
            children_of[virtual_root] = nodes_without_parent
            root_node_name = virtual_root

    if root_node_name is None:
        print("エラー: ルートノードを特定できませんでした。データを確認してください。")
        print("Children of nodes:", children_of)
        return

    node_species_counts = df.set_index('門名').T.to_dict('index')['種数']
    depth_map = {}

    def calculate_depths(node, current_depth):
        depth_map[node] = current_depth
        for child in children_of.get(node, []):
            calculate_depths(child, current_depth + 1)

    calculate_depths(root_node_name, 0)
    max_depth = max(depth_map.values(), default=0)

    leaf_y_positions = {}
    current_y_pos = 0
    y_spacing_factor = 3.0

    def assign_leaf_y(node):
        nonlocal current_y_pos
        if node not in children_of or not children_of[node]:
            leaf_y_positions[node] = current_y_pos * y_spacing_factor
            current_y_pos += 1
        else:
            for child in sorted(children_of[node]):
                assign_leaf_y(child)

    assign_leaf_y(root_node_name)

    node_coords = {
        leaf: (depth_map.get(leaf, 0), y_pos)
        for leaf, y_pos in leaf_y_positions.items()
    }

    all_nodes = set(all_phyla_in_data) | all_parents_nodes
    if root_node_name not in all_nodes:
        all_nodes.add(root_node_name)

    y_calc_order = []
    visited_y_calc = set()

    def get_post_order(node):
        if node in visited_y_calc:
            return
        visited_y_calc.add(node)
        for child in sorted(children_of.get(node, [])):
            get_post_order(child)
        y_calc_order.append(node)

    get_post_order(root_node_name)

    for node in y_calc_order:
        if node in node_coords:
            continue
        children = children_of.get(node, [])
        y_coords = [
            node_coords[child][1]
            for child in children
            if child in node_coords and node_coords[child][1] is not None
        ]
        if y_coords:
            avg_y = sum(y_coords) / len(y_coords)
            node_coords[node] = (depth_map.get(node, 0), avg_y)

    all_ys = [coord[1] for coord in node_coords.values() if coord[1] is not None]
    min_y_data = min(all_ys, default=0)
    max_y_data = max(all_ys, default=1)
    min_x_data = min((coord[0] for coord in node_coords.values()), default=0)
    max_x_data = max((coord[0] for coord in node_coords.values()), default=0)

    x_padding_left = 0.5
    x_padding_right = 3.0
    y_padding_bottom = 0.1 * y_spacing_factor
    y_padding_top = 0.1 * y_spacing_factor
    units_per_inch_x = 0.6
    units_per_inch_y = 0.6

    fig_width = (max_x_data + x_padding_left + x_padding_right) / units_per_inch_x
    fig_height = (max_y_data - min_y_data + y_padding_bottom + y_padding_top) / units_per_inch_y

    final_fig_width = max(18, fig_width)
    final_fig_height = max(28, fig_height)

    fig, ax = plt.subplots(figsize=(final_fig_width, final_fig_height))
    ax.set_aspect('equal')
    ax.set_xlim(min_x_data - x_padding_left, max_x_data + x_padding_right)
    ax.set_ylim(min_y_data - y_padding_bottom, max_y_data + y_padding_top)

    for parent_node, children_list in children_of.items():
        if parent_node not in node_coords or node_coords[parent_node][1] is None:
            continue

        px, py = node_coords[parent_node]
        for child_node in sorted(children_list, key=lambda c: node_coords.get(c, (0, 0))[1]):
            if child_node not in node_coords or node_coords[child_node][1] is None:
                continue
            cx, cy = node_coords[child_node]
            ax.plot([px, cx], [py, py], color='black', linewidth=1.5)
            ax.plot([cx, cx], [py, cy], color='black', linewidth=1.5)

    max_species = 1000
    if '種数' in df.columns and not df['種数'].empty:
        numeric_species_counts = pd.to_numeric(df['種数'], errors='coerce').dropna()
        if not numeric_species_counts.empty:
            max_species = numeric_species_counts.max()

    node_name_fontsize = 18
    species_text_fontsize = 14
    base_radius = 0.5
    max_visual_radius = 1.5

    for node_name, (x, y) in node_coords.items():
        if y is None or node_name == "Life":
            continue

        text_offset_x = max_visual_radius * 1.0
        if node_name in all_children_nodes and node_name in df['門名'].unique():
            if node_name not in children_of or not children_of[node_name]:
                ax.text(
                    x + text_offset_x, y, node_name,
                    va='center', ha='left', fontsize=node_name_fontsize, color='black'
                )
            else:
                ax.text(
                    x - text_offset_x, y, node_name,
                    va='center', ha='right', fontsize=node_name_fontsize, color='gray'
                )

        if node_name in node_species_counts and pd.notna(node_species_counts[node_name]):
            species_count = node_species_counts[node_name]
            scaled_radius = (species_count / max_species) * (max_visual_radius - base_radius) \
                if max_species else base_radius
            circle_radius = base_radius + scaled_radius
            circle_radius = min(circle_radius, max_visual_radius)
            circle = plt.Circle((x, y), circle_radius, color='red', alpha=0.7, zorder=2)
            ax.add_patch(circle)
            ax.text(
                x, y, str(int(species_count)),
                ha='center', va='center', fontsize=species_text_fontsize,
                color='white', zorder=3
            )

    ax.axis('off')
    output_image_path = "phylogenetic_tree_custom_matplotlib.png"
    plt.savefig(output_image_path, dpi=600, bbox_inches='tight')
    print(f"改善された系統樹が {output_image_path} に保存されました。")
    plt.close(fig)


create_and_draw_phylogenetic_tree_custom_matplotlib('phylum_data.xlsx')


'phylum_data.xlsx' をExcelファイルとして読み込みました。
改善された系統樹が phylogenetic_tree_custom_matplotlib.png に保存されました。
