In [69]:

import random


def gen_num_non_leafs(min_depth: int, min_degree: int, max_degree: int, num_leafs: int, min_num_non_leafs: int = 1, max_num_non_leafs: int = None):
    n = num_leafs - 1
    lower_bound = min_degree - 1
    upper_bound = max_degree - 1

    non_leafs_count_lower_bound = max(min_depth, min_num_non_leafs, n // upper_bound + (1 if n % upper_bound else 0))
    non_leafs_count_upper_bound = max(min_depth, n // lower_bound)
    if max_num_non_leafs is not None:
        non_leafs_count_upper_bound = min(non_leafs_count_upper_bound, max_num_non_leafs)

    return random.randint(non_leafs_count_lower_bound, non_leafs_count_upper_bound)


def gen_random_composition(n, lower_bound, upper_bound, size):
    """
    Splits the integer 'n' into a composition, i.e. a sum of 'size' random integers, where each integer is >= lower_bound and <= upper_bound.

    :param a: The integer to be split.
    :param lower_bound: The minimum value of each split integer.
    :param upper_bound: The maximum value of each split integer.
    :param min_numbers_count: The minimum number of integers to split 'n' into.
    :return: A list of integers that sum up to 'n'.
    """

    # Check if it's possible to split 'a' under given constraints
    if n < lower_bound * size or n > upper_bound * size:
        raise ValueError("Impossible to split the integer under given constraints.")
    
    if lower_bound > upper_bound:
        raise ValueError("Lower bound cannot be greater than upper bound.")
    
    if lower_bound == upper_bound:
        if n != lower_bound * size:
            raise ValueError("Impossible to split the integer under given constraints.")
        else:
            return [lower_bound] * size

    # Start with an empty list to store the split integers
    composition = []
    remaining_numbers_count = size
    remaining_sum = n
    while remaining_numbers_count > 0:
        remaining_numbers_count -= 1
        # Choose a random value for the current piece
        current_value_lower_bound = max(lower_bound, remaining_sum - upper_bound * remaining_numbers_count)
        current_value_upper_bound = min(upper_bound, remaining_sum - lower_bound * remaining_numbers_count)
        current_piece = random.randint(current_value_lower_bound, current_value_upper_bound)
        composition.append(current_piece)
        remaining_sum -= current_piece


    # shuffle the list
    random.shuffle(composition)

    return composition


def swap_degree_values_outer(depth_degree_allocation : list[list[int]], leafs_counts : list[int], current_it : int, degree_values_list : list[int]):

    tree_depth = len(depth_degree_allocation)
    
    arg_max_it = max(range(current_it, len(degree_values_list)), key=lambda i: degree_values_list[i])
    
    depths_options = [d for d, v in enumerate(depth_degree_allocation) if degree_values_list[arg_max_it] > v[-1] and d < tree_depth-1]
    
    if len(depths_options) == 0:
        return None
    
    selected_depth = random.choice(depths_options)
    
    added_leafs_count = degree_values_list[arg_max_it] - depth_degree_allocation[selected_depth][-1]
    
    degree_values_list[arg_max_it], depth_degree_allocation[selected_depth][-1] = depth_degree_allocation[selected_depth][-1], degree_values_list[arg_max_it]
    
    depth_degree_allocation[selected_depth].sort(reverse=True)
    
    chosen_leaf_depth = selected_depth + 1
    
    leafs_counts[chosen_leaf_depth] += added_leafs_count
    
    return chosen_leaf_depth


def swap_degree_values_inner(depth_degree_allocation : list[list[int]], leafs_counts : list[int]):

    tree_depth = len(depth_degree_allocation)
    
    chosen_i, chosen_j = None, None
    
    for i in range(0, tree_depth - 1):
        for j in range(tree_depth - 1, i, -1):
            if depth_degree_allocation[i][-1] < depth_degree_allocation[j][0]:
                chosen_i, chosen_j = i, j
                break
        if chosen_j is not None:
            break
    
    if chosen_j is None:
        return None
    
    added_leafs_count = depth_degree_allocation[chosen_j][0] - depth_degree_allocation[chosen_i][-1]
    
    depth_degree_allocation[chosen_i][-1], depth_degree_allocation[chosen_j][0] = depth_degree_allocation[chosen_j][0], depth_degree_allocation[chosen_i][-1]
    
    depth_degree_allocation[chosen_i].sort(reverse=True)
    
    depth_degree_allocation[chosen_j].sort(reverse=True)
    
    # incrementally update the depth list from chosen_j onwards
    for k in range(1, tree_depth - chosen_j):
        depth_degree_allocation[chosen_i + k] += depth_degree_allocation[chosen_j + k][:added_leafs_count]
        new_added_leafs_count = sum(depth_degree_allocation[chosen_j + k][:added_leafs_count])
        depth_degree_allocation[chosen_j + k] = depth_degree_allocation[chosen_j + k][added_leafs_count:]
        depth_degree_allocation[chosen_i + k].sort(reverse=True)
        added_leafs_count = new_added_leafs_count

    chosen_leaf_depth = chosen_i + tree_depth - chosen_j

    # update the slots list
    leafs_counts[chosen_leaf_depth] += added_leafs_count

    return chosen_leaf_depth


def add_value_cut_outer(depth_degree_allocation : list[list[int]], leafs_counts : list[int], current_it : int, degree_values_list : list[int], min_degree: int, max_degree: int):

    tree_depth = len(depth_degree_allocation)

    arg_max_it = max(range(current_it, len(degree_values_list)), key=lambda i: degree_values_list[i])
    
    if degree_values_list[arg_max_it] - 1 < min_degree:
        return None
    
    depths_options = [d for d, v in enumerate(depth_degree_allocation) if (max_degree is None or v[-1] + 1 <= max_degree) and d < tree_depth - 1]
    
    if len(depths_options) == 0:
        return None
    
    selected_depth = random.choice(depths_options)
    
    added_leafs_count = random.randint(1, min(max_degree - depth_degree_allocation[selected_depth][-1], degree_values_list[arg_max_it] - min_degree))
    
    degree_values_list[arg_max_it] -= added_leafs_count
    
    depth_degree_allocation[selected_depth][-1] += added_leafs_count
    
    chosen_leaf_depth = selected_depth + 1
    
    leafs_counts[chosen_leaf_depth] += added_leafs_count
    
    return chosen_leaf_depth


def add_value_cut_inner(depth_degree_allocation : list[list[int]], leaves_counts : list[int], min_degree: int, max_degree: int):
    
    tree_depth = len(depth_degree_allocation)

    chosen_i, chosen_j = None, None
    for i in range(0, tree_depth - 1):
        if max_degree is not None and depth_degree_allocation[i][-1] + 1 > max_degree:
            continue
        for j in range(tree_depth - 1, i, -1):
            if depth_degree_allocation[j][0] - 1 >= min_degree:
                chosen_i, chosen_j = i, j
                break
        if chosen_j is not None:
            break

    if chosen_j is None:
        return None
     
    added_leafs_count = random.randint(1, min(max_degree - depth_degree_allocation[chosen_i][-1], depth_degree_allocation[chosen_j][0] - min_degree))
    
    depth_degree_allocation[chosen_i][-1] += added_leafs_count
    depth_degree_allocation[chosen_i].sort(reverse=True)

    depth_degree_allocation[chosen_j][0] -= added_leafs_count
    depth_degree_allocation[chosen_j].sort(reverse=True)

    
    # incrementally update the depth list from chosen_j onwards
    for k in range(1, tree_depth - chosen_j):
        # move the end added_slots elements from depth_list[k + 1] to depth_list[k]
        depth_degree_allocation[chosen_i + k] += depth_degree_allocation[chosen_j + k][:added_leafs_count]
        new_added_leafs_count = sum(depth_degree_allocation[chosen_j + k][:added_leafs_count])
        depth_degree_allocation[chosen_j + k] = depth_degree_allocation[chosen_j + k][added_leafs_count:]
        depth_degree_allocation[chosen_i + k].sort(reverse=True)
        added_leafs_count = new_added_leafs_count

    chosen_leaf_depth = chosen_i + tree_depth - chosen_j

    leaves_counts[chosen_leaf_depth] += added_leafs_count

    return chosen_leaf_depth


def alter_depth_degree_allocation(depth_degree_allocation : list[list[int]], leafs_counts : list[int], current_it : int, degree_values_list : list[int], min_degree: int, max_degree: int):
    chosen_depth = swap_degree_values_outer(depth_degree_allocation, leafs_counts, current_it, degree_values_list)
    if chosen_depth is not None:
        return chosen_depth
    chosen_depth = swap_degree_values_inner(depth_degree_allocation, leafs_counts)
    if chosen_depth is not None:
        return chosen_depth
    chosen_depth = add_value_cut_outer(depth_degree_allocation, leafs_counts, current_it, degree_values_list, min_degree, max_degree)
    if chosen_depth is not None:
        return chosen_depth
    chosen_depth = add_value_cut_inner(depth_degree_allocation, leafs_counts, min_degree, max_degree)
    return chosen_depth


def accumulate_degree_value(depth_degree_allocation : list[list[int]], leafs_counts : list[int], max_degree: int, current_it : int, degree_values_list : list[int]):
    """
    Exploit remaining degree values to increase the degree value of existing tree (existing depth_degree_allocation).
    """
    tree_depth = len(depth_degree_allocation)
    outer_degree_value_remaining = degree_values_list[current_it]
    d = 0
    i = 0
    while outer_degree_value_remaining > 0 and d < tree_depth:
        new_degree_value = min(max_degree, depth_degree_allocation[d][i] + outer_degree_value_remaining)
        added_leafs_count = new_degree_value - depth_degree_allocation[d][i]
        depth_degree_allocation[d][i] = new_degree_value
        if d + 1 < tree_depth:
            leafs_counts[d + 1] += added_leafs_count
        outer_degree_value_remaining -= added_leafs_count
        i += 1
        if i >= len(depth_degree_allocation[d]):
            d += 1
            i = 0
    return outer_degree_value_remaining


def assign_depth_degree(degree_values_list : list[int], min_depth: int = 1, max_depth: int = None, min_degree : int = 2, max_degree : int = None, strict_num_internal_nodes : bool = False, min_num_internals : int = 1):
    """
    Assigns depth and degree to each non-leaf node (internal node) in the tree, based on the node's children count, and accounting for constraints. 

    The target of this function is to make sure the depth of the entire tree is >= min_depth and <= max_depth.
    """
    assert min_depth >= 1
    assert max_depth is None or min_depth is None or max_depth >= min_depth
    assert min_degree >= 2
    assert max_degree is None or max_degree >= min_degree
    if max_degree is None:
        max_degree = float("inf")
    
    depth_degree_allocation : list[list[int]] = [] # depth -> list of nodes's degree value. The node's have depth = depth.
    leafs_counts : list[int] = [1] # depth -> number of leafs of that depth

    internal_nodes_count = len(degree_values_list)

    for it in range(len(degree_values_list)):
        # choose a random element and its index from the slots list, where the element value is > 0
        if len(depth_degree_allocation) < min_depth:
            chosen_leaf_depth = len(depth_degree_allocation)
        else:
            available_depths = [d for d, v in enumerate(leafs_counts) if v > 0]

            if len(available_depths) > 0:
                chosen_leaf_depth = random.choice(available_depths)
            else:
                chosen_leaf_depth = alter_depth_degree_allocation(depth_degree_allocation, leafs_counts, it, degree_values_list, min_degree, max_degree)
                if chosen_leaf_depth is None:
                    if strict_num_internal_nodes or internal_nodes_count <= min_num_internals:
                        return None
                    else:
                        outer_degree_value_remaining = accumulate_degree_value(depth_degree_allocation, leafs_counts, max_degree, it, degree_values_list)
                        if outer_degree_value_remaining > 0:
                            return None
                        else:
                            internal_nodes_count -= 1
                            continue


        if len(depth_degree_allocation) <= chosen_leaf_depth:
            depth_degree_allocation.append([])
        
        degree_value = degree_values_list[it]

        depth_degree_allocation[chosen_leaf_depth].append(degree_value)

        depth_degree_allocation[chosen_leaf_depth].sort(reverse=True)
        
        leafs_counts[chosen_leaf_depth] -= 1
        
        if max_depth is None or chosen_leaf_depth <= max_depth:
            if chosen_leaf_depth + 1 >= len(leafs_counts):
                leafs_counts.append(degree_value)
            else:
                leafs_counts[chosen_leaf_depth + 1] += degree_value

    return depth_degree_allocation, internal_nodes_count


def form_tree(depth_degree_allocation: list[list[int]]):
    """
    Form a tree from a depth -> degree allocation.
    """
    parent_info : dict[int, int] = {}
    depth_info : dict[int, int] = {}
    children_info : dict[int, list[int]] = {}
    global_id_iterator = 0
    root_id = global_id_iterator
    children_info[root_id] = []
    frontiers: list[list[int]] = [[] for d in range(len(depth_degree_allocation) + 1)]
    frontiers[0] = [root_id]
    for depth, degree_values in enumerate(depth_degree_allocation):
        for degree in degree_values:
            parent_id_index = random.randint(0, len(frontiers[depth]) - 1)
            parent_id = frontiers[depth].pop(parent_id_index)
            if parent_id not in children_info:
                children_info[parent_id] = []
            for _ in range(degree):
                global_id_iterator += 1
                frontiers[depth + 1].append(global_id_iterator)
                parent_info[global_id_iterator] = parent_id
                depth_info[global_id_iterator] = depth
                children_info[parent_id].append(global_id_iterator)
    return root_id, parent_info, depth_info, children_info, frontiers



def gen_random_tree(num_leafs : int, min_depth : int = 1, max_depth : int = None, num_non_leafs : int = None, min_degree : int = 2, max_degree : int = None, min_num_non_leafs : int = 1, max_num_non_leafs : int = None):
    """
    Generates a random tree with the given number of leafs, minimum depth, maximum depth, non_leafs_count, and the given maximum and minimum number of children possible per non-leaf node.

    Priority is given to the minimum depth, then to the number of non-leaf nodes, then to the minimum children count, then max children coun, then finally max depth.
    """
    depth_info : dict[int, int] = {}
    parent_info : dict[int, int] = {}
    children_info : dict[int, list[int]] = {}

    depth_info[0] = 0

    assert min_degree >= 2
    assert max_degree is None or max_degree >= min_degree
    assert min_depth >= 1
    assert max_depth is None or max_depth >= min_depth

    if max_degree is None:
        max_degree = num_leafs

    strict_num_non_leafs = num_non_leafs is not None

    if num_non_leafs is None:
        num_non_leafs = gen_num_non_leafs(min_depth, min_degree, max_degree, num_leafs, min_num_non_leafs, max_num_non_leafs)

    degree_values_list = gen_random_composition(
        n=num_leafs + num_non_leafs - 1,
        lower_bound = min_degree,
        upper_bound = max_degree,
        size=num_non_leafs,
    )

    depth_degree_allocation, internal_nodes_count = assign_depth_degree(
        degree_values_list=degree_values_list, 
        min_depth=min_depth, 
        max_depth=max_depth, 
        min_degree=min_degree, 
        max_degree=max_degree, 
        strict_num_internal_nodes=strict_num_non_leafs, 
        min_num_internals=min_num_non_leafs
    )

    root_id, parent_info, depth_info, children_info, frontiers = form_tree(depth_degree_allocation)

    tree_depth = max(depth_info.values())
    
    leaf_nodes = [node_id for node_id in depth_info if children_info.get(node_id, None) is None or len(children_info[node_id]) == 0]

    avg_branching_factor = (num_leafs + num_non_leafs - 1) / num_non_leafs
    
    return depth_info, parent_info, children_info, leaf_nodes, frontiers, num_non_leafs, tree_depth, avg_branching_factor



In [74]:
depth_info, parent_info, children_info, leaf_nodes, frontiers, num_non_leafs, tree_depth, avg_branching_factor = gen_random_tree(1000, min_degree=2, max_degree=2)

In [73]:
len(children_info)

99

In [None]:
from andortree.tree_types import Node, NodeType, reverse_node_type

In [None]:
def gen_conventional_node_ids(leaf_nodes : list[int], children_info : dict[int, list[int]], root_node_id : int = 0):
    """
    Generate new node ids, based on this convention:
    - leaf nodes are numbered first, from 0 to len(leaf_nodes) - 1 (inclusive)
    - then, a dummy node is added. Its id is len(leaf_nodes).
    - then non-leaf nodes, from len(leaf_nodes) + 1 to len(leaf_nodes) + len(children_info) - 1 (inclusive)
    - then finally, the root node
    """
    middle_nodes = [node_id for node_id in children_info.keys() if node_id != root_node_id and len(children_info[node_id]) > 0]
    random.shuffle(middle_nodes)
    random.shuffle(leaf_nodes)
    # old_dummy_node_id = -1 # stub
    node_ids_sorted = leaf_nodes + [-1] + middle_nodes + [root_node_id]
    new_node_ids_map = { old_node_id : new_node_id for new_node_id, old_node_id in enumerate(node_ids_sorted) }
    new_root_node_id = len(node_ids_sorted) - 1
    new_leaf_nodes = list(range(len(leaf_nodes)))
    return new_node_ids_map, new_leaf_nodes, new_root_node_id

In [None]:
def reevaluate_tree(depth_info : dict[int, int], parent_info : dict[int, int], children_info : dict[int, int], new_node_ids_map : dict[int, int]):
    """
    Reevaluate tree information based on the new node_id mapping.
    """
    new_parent_info = {}
    new_children_info = {}
    new_depth_info = {}
    for old_node_id, new_node_id in new_node_ids_map.items():
        if old_node_id in parent_info:
            new_parent_info[new_node_id] = new_node_ids_map[parent_info[old_node_id]]
        if old_node_id in children_info:
            new_children_info[new_node_id] = [new_node_ids_map[child_id] for child_id in children_info[old_node_id]]
        if old_node_id in depth_info:
            new_depth_info[new_node_id] = depth_info[old_node_id]
    return new_depth_info, new_parent_info, new_children_info

In [None]:
def assign_node_type(depth_info: dict[int, int], leaf_nodes : list[int], children_info: dict[int, int], root_node_id : int, strict_and_or: bool = True, root_node_type : NodeType = None):
    """
    Randomly assigns a node type to each node in the tree, where the node type is either "AND", "OR", or "LEAF".
    """
    node_type_info = {}
    if root_node_type is None:
        root_node_type = random.choice([NodeType.AND, NodeType.OR])
    reversed_root_node_type = reverse_node_type(root_node_type)
    node_type_info[root_node_id] = root_node_type
    for node_id in leaf_nodes:
        node_type_info[node_id] = NodeType.LEAF
    for node_id, node_children_ids in children_info.items():
        if node_children_ids is None or len(node_children_ids) == 0:
            node_type_info[node_id] = NodeType.LEAF
            continue

        if node_id == root_node_id:
            continue
        
        if strict_and_or:
            node_depth = depth_info[node_id]
            node_type_info[node_id] = root_node_type if node_depth % 2 == 0 else reversed_root_node_type
        else:
            node_type_info[node_id] = random.choice([NodeType.AND, NodeType.OR])

    return node_type_info

In [None]:
def get_tree_info(tree_info_size : int, depth_info : dict[int, int], parent_info : dict[int, int], children_info : dict[int, list[int]], node_type_info : dict[int, NodeType]):
    return [Node(
        node_id=node_id,
        node_type=NodeType(node_type_info[node_id]),
        parent_id=parent_info[node_id] if node_id in parent_info else None,
        children_ids=children_info[node_id] if node_id in children_info else None,
        depth=depth_info[node_id] if node_id in depth_info else None,
    ) for node_id in range(tree_info_size)]

In [None]:
def gen_tree_info_full(num_leafs : int, min_depth : int = 1, max_depth : int = None, num_non_leafs : int = None, min_num_children : int = 2, max_num_children : int = None, root_node_type : NodeType = None, strict_and_or : bool = True):
    """
    Generate a random tree, with minimum depth, maximum depth, number of non-leaf nodes, and the maximum and minimum number of children per non-leaf node.

    Return a list of Node objects, where each Node object contains information about the node's id, type, parent id, children ids, and depth.

    The node ids are assigned according to the following convention:
    - leaf nodes are numbered first, from 0 to num_leafs - 1 (inclusive)
    - then, a dummy node is added. Its id is num_leafs.
    - then non-leaf nodes, from num_leafs + 1 to num_leafs + num_non_leafs - 1 (inclusive)
    - then finally, the root node
    """
    depth_info, parent_info, children_info, leaf_nodes, num_non_leafs, tree_depth, avg_branching_factor = gen_random_tree(num_leafs=num_leafs, min_depth=min_depth, max_depth=max_depth, num_non_leafs=num_non_leafs, min_degree=min_num_children, max_degree=max_num_children)
    new_node_ids_map, leaf_nodes, root_node_id = gen_conventional_node_ids(leaf_nodes=leaf_nodes, children_info=children_info, root_node_id=0)
    depth_info, parent_info, children_info = reevaluate_tree(depth_info, parent_info, children_info, new_node_ids_map)
    node_type_info = assign_node_type(depth_info=depth_info, leaf_nodes=leaf_nodes, children_info=children_info, root_node_id=root_node_id,strict_and_or=strict_and_or, root_node_type=root_node_type)
    node_type_info[num_leafs] = NodeType.DUMMY
    tree_info_size = num_leafs + num_non_leafs + 1
    tree_info = get_tree_info(tree_info_size, depth_info, parent_info, children_info, node_type_info)
    return tree_info, depth_info, parent_info, children_info, leaf_nodes, root_node_id, num_non_leafs, tree_depth, avg_branching_factor 

In [None]:
tree_info, depth_info, parent_info, children_info, leaf_nodes, root_node_id, strict_num_non_leafs, tree_depth, avg_branching_factor = gen_tree_info_full(10000, min_depth=20, min_num_children=10, max_num_children=11)
len(tree_info), tree_depth, avg_branching_factor, tree_depth * avg_branching_factor