In [4]:
import random
# Set a seed for reproducibility
random.seed(42)

class TreeNode:
    def __init__(self, name=None, weight=None):
        self.name = name
        self.weight = weight
        self.children = []

def parse_newick(newick_str):
    stack = []
    current_node = TreeNode()
    root = current_node
    name_buffer = []
    weight_buffer = []
    internal_node_counter = 1
    reading_weight = False
    for char in newick_str:
        if char == '(':
            new_node = TreeNode(name=f"Internal_{len(stack) + 1}_{internal_node_counter}")
            internal_node_counter += 1
            current_node.children.append(new_node)
            stack.append(current_node)
            current_node = new_node
        elif char == ',':
            if name_buffer:
                current_node.name = ''.join(name_buffer).strip()
                name_buffer = []
            if weight_buffer:
                current_node.weight = float(''.join(weight_buffer).strip())
                weight_buffer = []
            reading_weight = False
            new_node = TreeNode(name=f"Internal_{len(stack) + 1}_{internal_node_counter}")
            internal_node_counter += 1
            stack[-1].children.append(new_node)
            current_node = new_node
        elif char == ')':
            if name_buffer:
                current_node.name = ''.join(name_buffer).strip()
                name_buffer = []
            if weight_buffer:
                current_node.weight = float(''.join(weight_buffer).strip())
                weight_buffer = []
            reading_weight = False
            current_node = stack.pop()
        elif char == ':':
            reading_weight = True
        elif char != ';' and not reading_weight:
            name_buffer.append(char)
        elif reading_weight:
            weight_buffer.append(char)
    return root

def delete_internal_nodes_randomly(node, parent=None, root=None):
    if node is None or not node.children:
        return

    for child in list(node.children):
        delete_internal_nodes_randomly(child, node)

    if node != root and random.random() < 0.2:
        if parent:  # Check if parent is not None
            for child in node.children:
                parent.children.append(child)
            parent.children.remove(node)

# def insert_internal_node(node, parent=None, root=None):
#     if node is None or not node.children or len(node.children) <= 2:
#         return

#     if random.random() < 0.2:
#         new_node = TreeNode(name=f"Internal_{random.randint(1, 10)}")
#         # Move one child to the new internal node
#         new_node.children.append(node.children.pop())
#         new_node.children.append(node.children.pop())
#         node.children.append(new_node)

#     for child in node.children:
#         insert_internal_node(child, node)

def insert_internal_node(node, parent=None, root=None):
    if node is None or not node.children or len(node.children) <= 2:
        return

    if random.random() < 0.4:
        new_node = TreeNode(name=f"Internal_{random.randint(1, 10)}")
        # Randomly select two children to move to the new internal node
        random_children_indices = random.sample(range(len(node.children)), 2)
        for index in sorted(random_children_indices, reverse=True):
            new_node.children.append(node.children.pop(index))
        node.children.append(new_node)

    for child in node.children:
        insert_internal_node(child, node)


# Modify the save_all_trees_to_file function to call insert_internal_node after deleting internal nodes
def save_all_trees_to_file(file_name, newick_str, num_iterations):
    with open(file_name, 'w') as file:
        for i in range(num_iterations):
            root = parse_newick(newick_str)
            delete_internal_nodes_randomly(root)
            insert_internal_node(root)

            newick_tree = tree_to_newick(root) + ';'
            file.write(newick_tree + "\n")



def tree_to_newick(node):
    """Converts a TreeNode to a Newick format string without internal node names."""
    if not node.children:  # Leaf node
        return f"{node.name}:{node.weight}" if node.weight is not None else node.name

    children_str = ','.join(tree_to_newick(child) for child in node.children)
    if node.weight is not None:
        node_str = f"({children_str}):{node.weight}"
    else:
        node_str = f"({children_str})"

    return node_str


def save_all_trees_to_file(file_name, newick_str, num_iterations):
    with open(file_name, 'w') as file:
        for i in range(num_iterations):
            root = parse_newick(newick_str)
            delete_internal_nodes_randomly(root)

            insert_internal_node(root)

            newick_tree = tree_to_newick(root) + ';'
            # file.write(f"Iteration {i + 1} Newick Format:\n")
            file.write(newick_tree + "\n")


#ref_tree = '((a:0.1,b:0.2)Internal_1_1:0.3,(c:0.4,d:0.5)Internal_1_2:0.6,e:0.7);'
ref_tree = '((a:0.1,b:0.2)Internal_1_1:0.3,(g:.1,h:.2):.3,(c:0.4,d:0.5)Internal_1_2:0.6,e:0.7);'
# File name for saving the trees
all_trees_file_name = '3.txt'

# Save all trees to a single file in Newick format
k = 10
save_all_trees_to_file(all_trees_file_name, ref_tree, k)


In [None]:
pip install toytree

In [9]:
import toytree
import numpy as np
ref_tree = '((a:0.1,b:0.2):0.3,(c:0.4,d:0.5):0.6,e:0.7);'

generated = '(e:0.7,c:0.4,(d:0.5,(a:0.1,b:0.2):0.3));'

ref2 = '((a:0.1,b:0.2)Internal_1_1:0.3,(g:0.1,h:0.2):0.3,(c:0.4,d:0.5)Internal_1_2:0.6,e:0.7);'


gen2 = "((c:0.4,d:0.5):0.6,e:0.7,a:0.1,(b:0.2,(g:0.1,h:0.2):0.3));"

ref_tree = toytree.tree(ref2, tree_format=1)
generated = toytree.tree(gen2, tree_format=1)

In [10]:
ref_tree, axes, mark = ref_tree.draw(height=300, width = 400,tree_style='o',node_sizes=10);

In [11]:
generated, axes, mark = generated.draw(height=300, width = 400,tree_style='o',node_sizes=10);