In [33]:
def infer_binary_tree_depth(ecd):
    """
    Infer the total depth (an empty tree is assumed to have depth 0) of a given
    `ecd` -- an encoding representation for a complete binary tree
    """
    n = len(ecd)
    d, s = 0, 0
    while n > s:
        d = d + 1
        s = 2 ** d - 1
    return d, n


def get_level_to_pre_mapping(d):
    """
    Obtain the level-order to pre-order (as well as the pre-order to level-order)
    mapping based on the given depth `d` for a complete binary tree.
    A mapping is defined as an array such that, when queried by index from the source
    order, it returns the value in the target order.
    """
    n = 2 ** d - 1
    pre_to_level_map, level_to_pre_map = [], [0] * n
    counter = 0
    stack = [0]
    # a simple DFS would give the mapping from pre-order to level-order
    # the reverse mapping, which is of our major interest, can be obtained
    # in the same one run as well by flipping the index and value
    while len(stack):
        el = stack.pop()
        left, right = el * 2 + 1, el * 2 + 2
        pre_to_level_map.append(el)
        level_to_pre_map[el] = counter
        counter += 1
        if left < n:
            stack.append(right)
            stack.append(left)

    return level_to_pre_map, pre_to_level_map


def convert_level_order_to_pre_order(level_order_ecd):
    d, n = infer_binary_tree_depth(level_order_ecd)
    level_to_pre_map, _ = get_level_to_pre_mapping(d)
    pre_order_ecd = [0] * n
    for i in range(n):
        pre_order_ecd[level_to_pre_map[i]] = level_order_ecd[i]
    return pre_order_ecd


def convert_pre_order_to_level_order(pre_order_ecd):
    d, n = infer_binary_tree_depth(pre_order_ecd)
    _, pre_to_level_map = get_level_to_pre_mapping(d)
    level_order_ecd = [0] * n
    for i in range(n):
        level_order_ecd[pre_to_level_map[i]] = pre_order_ecd[i]
    return level_order_ecd

In [107]:
# (10 + x1) * x1
fake_level_order = ["*", "x1", "+", "-1", "-1", "10", "x1"]

In [108]:
fake_pre_order = convert_level_order_to_pre_order(fake_level_order)
fake_pre_order

['*', 'x1', '-1', '-1', '+', '10', 'x1']

In [109]:
recovered_level_order = convert_pre_order_to_level_order(fake_pre_order)
recovered_level_order

['*', 'x1', '+', '-1', '-1', '10', 'x1']

## Part II: Keeping track of the next tokens to assign

The easy way to implement is to first obtain the mappings (from above), and then keep track of the level-order encoding (as usual), and map the next token to assign to a pre-order one.

In [104]:
level_to_pre_map, pre_to_level_map = get_level_to_pre_mapping(3)

In [106]:
level_to_pre_map

[0, 1, 4, 2, 3, 5, 6]

In [98]:
def find_first_placeholder(ecd, placeholder=-2):
    for i in range(len(ecd)):
        if ecd[i] == placeholder:
            return i
    return -1

def ecd_update_one_step(pre_order_ecd, level_order_ecd, action, level_to_pre_map, pre_to_level_map, placeholder=-2):
    # find the next token to assign from pre-order ecd
    assign_idx_pre = find_first_placeholder(pre_order_ecd, placeholder)
    assign_idx_level = pre_to_level_map[assign_idx_pre]
    pre_order_ecd[assign_idx_pre] = action
    level_order_ecd[assign_idx_level] = action

    # find the children indices in level-order
    left_idx_level = assign_idx_level * 2 + 1
    right_idx_level = assign_idx_level * 2 + 2

    if action in ["*", "+"]:
        left_idx_pre = level_to_pre_map[left_idx_level]
        right_idx_pre = level_to_pre_map[right_idx_level]

        level_order_ecd[left_idx_level] = placeholder
        level_order_ecd[right_idx_level] = placeholder

        pre_order_ecd[left_idx_pre] = placeholder
        pre_order_ecd[right_idx_pre] = placeholder
    elif action in ["sin", "cos"]:
        left_idx_pre = level_to_pre_map[left_idx_level]

        level_order_ecd[left_idx_level] = placeholder
        pre_order_ecd[left_idx_pre] = placeholder


In [101]:
import random

features = ["x1", "x2"]
functions = ["*", "+", "sin", "cos"]

def simulate_tree_construction(depth=3, placeholder=-2):
    level_to_pre_map, pre_to_level_map = get_level_to_pre_mapping(depth)
    n = 2 ** depth - 1
    pre_order_ecd, level_order_ecd = [-1] * n, [-1] * n
    pre_order_ecd[0] = level_order_ecd[0] = placeholder
    next_token_pre = find_first_placeholder(pre_order_ecd, placeholder)
    while next_token_pre != -1:
        next_token_level = pre_to_level_map[next_token_pre]
        if 2 * next_token_level + 1 < n:
            action = random.choice(features + functions)
        else:
            action = random.choice(features)
        ecd_update_one_step(pre_order_ecd, level_order_ecd, action,
                            level_to_pre_map, pre_to_level_map, placeholder)
        next_token_pre = find_first_placeholder(pre_order_ecd, placeholder)
        print("action to assign:", action)
        print("pre-order:", pre_order_ecd)
        print("level-order:", level_order_ecd)
    return pre_order_ecd, level_order_ecd

In [102]:
simulate_tree_construction()

action to assign: +
pre-order: ['+', -2, -1, -1, -2, -1, -1]
level-order: ['+', -2, -2, -1, -1, -1, -1]
action to assign: sin
pre-order: ['+', 'sin', -2, -1, -2, -1, -1]
level-order: ['+', 'sin', -2, -2, -1, -1, -1]
action to assign: x1
pre-order: ['+', 'sin', 'x1', -1, -2, -1, -1]
level-order: ['+', 'sin', -2, 'x1', -1, -1, -1]
action to assign: *
pre-order: ['+', 'sin', 'x1', -1, '*', -2, -2]
level-order: ['+', 'sin', '*', 'x1', -1, -2, -2]
action to assign: x2
pre-order: ['+', 'sin', 'x1', -1, '*', 'x2', -2]
level-order: ['+', 'sin', '*', 'x1', -1, 'x2', -2]
action to assign: x1
pre-order: ['+', 'sin', 'x1', -1, '*', 'x2', 'x1']
level-order: ['+', 'sin', '*', 'x1', -1, 'x2', 'x1']


(['+', 'sin', 'x1', -1, '*', 'x2', 'x1'],
 ['+', 'sin', '*', 'x1', -1, 'x2', 'x1'])