In [None]:
import random
# Set a seed for reproducibility
random.seed(42)
class TreeNode:
    def __init__(self, name=None, weight=None):
        self.name = name
        self.weight = weight
        self.children = []

def parse_newick(newick_str):
    stack = []
    current_node = TreeNode()
    root = current_node
    name_buffer = []
    weight_buffer = []
    internal_node_counter = 1
    reading_weight = False

    for char in newick_str:
        if char == '(':
            new_node = TreeNode(name=f"Internal_{len(stack) + 1}_{internal_node_counter}")
            internal_node_counter += 1
            current_node.children.append(new_node)
            stack.append(current_node)
            current_node = new_node
        elif char == ',':
            if name_buffer:
                current_node.name = ''.join(name_buffer).strip()
                name_buffer = []
            if weight_buffer:
                current_node.weight = float(''.join(weight_buffer).strip())
                weight_buffer = []
            reading_weight = False
            new_node = TreeNode(name=f"Internal_{len(stack) + 1}_{internal_node_counter}")
            internal_node_counter += 1
            stack[-1].children.append(new_node)
            current_node = new_node
        elif char == ')':
            if name_buffer:
                current_node.name = ''.join(name_buffer).strip()
                name_buffer = []
            if weight_buffer:
                current_node.weight = float(''.join(weight_buffer).strip())
                weight_buffer = []
            reading_weight = False
            current_node = stack.pop()
        elif char == ':':
            reading_weight = True
        elif char != ';' and not reading_weight:
            name_buffer.append(char)
        elif reading_weight:
            weight_buffer.append(char)
    return root

def delete_internal_nodes_randomly(node, parent=None, root=None):
    if node is None or not node.children:
        return

    for child in list(node.children):
        delete_internal_nodes_randomly(child, node)

    if node != root and random.random() < 0.2:
        if parent:  # Check if parent is not None
            for child in node.children:
                parent.children.append(child)
            parent.children.remove(node)


def tree_to_newick(node):
    """Converts a TreeNode to a Newick format string."""
    if not node.children:  # Leaf node
        return f"{node.name}:{node.weight}" if node.weight is not None else node.name

    children_str = ','.join(tree_to_newick(child) for child in node.children)
    node_str = f"({children_str})"
    if node.name:
        node_str += node.name
    if node.weight is not None:
        node_str += f":{node.weight}"
    return node_str

def save_all_trees_to_file(file_name, newick_str, num_iterations):
    with open(file_name, 'w') as file:
        for i in range(num_iterations):
            root = parse_newick(newick_str)
            delete_internal_nodes_randomly(root)

            newick_tree = tree_to_newick(root) + ';'
            # file.write(f"Iteration {i + 1} Newick Format:\n")
            file.write(newick_tree + "\n")

# Example Newick string
# newick_str = '((a:0.1,b:0.2)Internal_1_1:0.3,(c:0.4,d:0.5)Internal_1_2:0.6,e:0.7);'
newick_str = '(2:0.6777,((10:0.7562,((1:0.8030,(12:0.6086,15:0.6774):0.9596):1.2179,(8:0.5632,(5:1.0052,(6:1.2852,16:0.5543):0.9160):0.6712):0.6512):0.8763):1.1659,(7:0.8885,18:1.3717):2.3191):0.7332,((4:1.4749,(11:1.1367,(13:0.8061,(14:0.8402,17:1.8708):1.4004):0.5387):2.2528):0.9152,(3:0.7242,(19:0.5383,(9:0.7981,20:2.0009):0.7158):0.7773):0.7671):0.7414);'
# File name for saving the trees
all_trees_file_name = 'all_modified_trees100.txt'

# Save all trees to a single file in Newick format
k = 100
save_all_trees_to_file(all_trees_file_name, newick_str, k)


# Both cases 5% and 20%

In [None]:
def delete_internal_nodes_randomly(node, parent=None, root=None):
    if node is None or not node.children:
        return

    for child in list(node.children):
        delete_internal_nodes_randomly(child, node)

    if random.random() < 0.05:  # 5% probability of deletion
        if parent:  # Check if parent is not None
            # Reattach children to another internal node
            internal_nodes = [child for child in parent.children if child != node and child.children]
            if internal_nodes:
                random_internal_node = random.choice(internal_nodes)
                for child in node.children:
                    random_internal_node.children.append(child)
                parent.children.remove(node)


In [None]:
def delete_internal_nodes_randomly(node, parent=None, root=None):
    if node is None or not node.children:
        return

    for child in list(node.children):
        delete_internal_nodes_randomly(child, node)

    if random.random() < 0.2:  # 20% probability of deletion
        if parent:  # Check if parent is not None
            # Reattach children to another internal node within the same parent
            internal_nodes = [child for child in parent.children if child != node and child.children]
            if internal_nodes:
                random_internal_node = random.choice(internal_nodes)
                for child in node.children:
                    random_internal_node.children.append(child)
                parent.children.remove(node)
    elif random.random() < 0.05:  # 5% probability of deletion
        if parent:  # Check if parent is not None
            # Reattach children to another internal node within the same parent
            internal_nodes = [child for child in parent.children if child != node and child.children]
            if internal_nodes:
                random_internal_node = random.choice(internal_nodes)
                for child in node.children:
                    random_internal_node.children.append(child)
                parent.children.remove(node)
