# main functions

In [None]:
import random
import numpy as np
from collections import defaultdict
from typing import List, Tuple

# ---------------------------------------------------------------------------
# Configurations and constants

# Operator definitions
UNARY_OPERATORS = ['sin', 'cos', 'exp', 'log', 'sqrt', 'tan', 'tanh', 'sinh', 'cosh', 'abs', 'log10', 'log2']
BINARY_OPERATORS = ['+', '-', '*', '/', '**', 'mod']

OPERATOR_WEIGHTS = {
    '+': 0.3,
    '-': 0.3,
    '*': 0.2,
    '/': 0.1,    # Less frequent divisions
    '**': 0.05,  # Rare powers
    'sin': 0.15,
    'cos': 0.15,
    'exp': 0.05, # Very rare exponential
    'log': 0.1,
    'sqrt': 0.1,
    'tan': 0.05,
    'tanh': 0.05,
    'sinh': 0.05,
    'cosh': 0.05,
    'abs': 0.05,
    'log10': 0.05,
    'log2': 0.05,
    'mod': 0.05
}

class MutationConfig:
    MUTATION_WEIGHTS = {
        'SUBTREE': 0.3,
        'OPERATOR': 0.2,
        'VALUE': 0.2,
        'HOIST': 0.1,
        'EXPANSION': 0.1,
        'COLLAPSE': 0.1
    }

    SUBTREE_DEPTH_RANGE = (1, 4)
    VALUE_STEP_FACTOR = 0.1
    MUTATION_DECAY = 0.5
    MAX_TREE_DEPTH = 10   # 🌲 Maximum depth limit
    MAX_TREE_NODES = 40   # 🌲 Maximum nodes limit


    



# ---------------------------------------------------------------------------
# Node class definition
class Node:
    def __init__(self, value=None, op=None, left: 'Node' = None, right: 'Node' = None):
        self.value = value    # For leaves: constant number or variable (e.g. "x[0]")
        self.op = op          # For internal nodes: operator (e.g. "+", "sin", "mod", etc.)
        self.left = left
        self.right = right

    def evaluate(self, x: np.ndarray):
        """
        Evaluates the tree (symbolic function) on vector x.
        (Implements use of SAFE_OPERATIONS and clip_value according to context.)
        """
        try:
            # Evaluation for binary operators
            if self.op in BINARY_OPERATORS:
                if not self.left or not self.right:
                    return 0
                left_val = self.left.evaluate(x)
                right_val = self.right.evaluate(x)
                return SAFE_OPERATIONS[self.op](left_val, right_val)
            # Evaluation for unary operators
            elif self.op in UNARY_OPERATORS:
                if not self.left:
                    return 0
                operand_val = self.left.evaluate(x)
                return SAFE_OPERATIONS[self.op](operand_val)
            # If it's a leaf with variable
            elif isinstance(self.value, str) and self.value.startswith("x["):
                index = int(self.value[2:-1])
                return clip_value(x[index])
            elif self.value is not None:
                return clip_value(self.value)
        except Exception:
            return 0

    def copy(self) -> 'Node':
        """Returns a deep copy of the tree."""
        return Node(
            value=self.value,
            op=self.op,
            left=self.left.copy() if self.left else None,
            right=self.right.copy() if self.right else None,
        )

    def __str__(self) -> str:
        """Returns a string representation of the tree."""
        if self.op:
            if self.left and self.right:
                return f"({self.left} {self.op} {self.right})"
            elif self.left:
                return f"{self.op}({self.left})"
        return str(self.value)


def compile_tree(root: Node):
    """
    Converts the tree into a string representing a Python expression and compiles it into a lambda function.
    The expression uses safe functions to securely handle special operations.
    """
    # Helper function that transforms the tree into a string.
    def node_to_str(node: Node) -> str:
        if node.op:
            # Binary operator
            if node.left is not None and node.right is not None:
                left_str = node_to_str(node.left)
                right_str = node_to_str(node.right)
                # Mapping for special operators that don't have direct infix syntax
                if node.op == '/':
                    op_str = 'safe_divide'
                    return f"{op_str}({left_str}, {right_str})"
                elif node.op == '**':
                    op_str = 'safe_pow'
                    return f"{op_str}({left_str}, {right_str})"
                elif node.op == 'mod':
                    op_str = 'safe_mod'
                    return f"{op_str}({left_str}, {right_str})"
                else:
                    # For operators like +, -, *: use infix syntax
                    return f"({left_str} {node.op} {right_str})"
            # Unary operator
            elif node.left is not None:
                operand_str = node_to_str(node.left)
                # Some math functions already have a corresponding name
                return f"{node.op}({operand_str})"
            else:
                raise ValueError("Operator node without operand.")
        else:
            # Leaf node: if it's a variable or constant
            if isinstance(node.value, str):
                # Assume variables are defined as 'x[0]', 'x[1]', ...
                return node.value
            else:
                return str(node.value)

    # Convert tree to string
    expr_str = node_to_str(root)
    # Create string for lambda function that accepts x as vector (or NumPy array)
    func_str = f"lambda x: {expr_str}"

    # Build dictionary with safe and NumPy functions, to be available in eval
    safe_globals = {
        "np": np,
        "safe_divide": safe_divide,
        "safe_pow": safe_pow,
        "safe_mod": safe_mod,
        "sin": safe_sin,
        "cos": safe_cos,
        "exp": safe_exp,
        "log": safe_log,
        "sqrt": safe_sqrt,
        "tan": safe_tan,
        "tanh": safe_tanh,
        "sinh": safe_sinh,
        "cosh": safe_cosh,
        "abs": safe_abs,
        "log10": safe_log10,
        "log2": safe_log2
    }
    # Compile string into function
    try:
        compiled_func = eval(func_str, safe_globals)
    except Exception as e:
        # In case of errors, e.g. due to malformed expressions, return fallback function.
        def fallback(x):
            return np.zeros_like(x[0])
        compiled_func = fallback
    return compiled_func


# ---------------------------------------------------------------------------
# Safe functions and mathematical utilities
CLIP_MIN = -1e6
CLIP_MAX = 1e6

def clip_value(value):
    return np.clip(value, CLIP_MIN, CLIP_MAX)

def safe_divide(a, b):
    return a / b if b != 0 else 0

def safe_sin(x):
    x = np.clip(x, -1000, 1000)
    result = np.sin(x)
    return np.clip(result, -1000, 1000)

def safe_cos(x):
    x = np.clip(x, -1000, 1000)
    result = np.cos(x)
    return np.clip(result, -1000, 1000)

def safe_sinh(x):
    x = np.clip(x, -100, 100)
    result = np.sinh(x)
    return np.clip(result, -1000, 1000)

def safe_cosh(x):
    x = np.clip(x, -100, 100)
    result = np.cosh(x)
    return np.clip(result, -1000, 1000)

def safe_tan(x):
    if np.isclose(np.mod(x, np.pi), np.pi / 2):
        return float(10**6)
    x = np.clip(x, -1000, 1000)
    result = np.tan(x)
    return np.clip(result, -1000, 1000)

def safe_log10(x):
    if x <= 0:
        return float(10**6)
    result = np.log10(x)
    return np.clip(result, -1000, 1000)

def safe_pow(base, exp):
    base = np.clip(base, -1000, 1000)
    exp = np.clip(exp, -40, 40)
    if base == 0 and exp < 0:
        return float(10**6)
    elif base < 0 and not np.all(np.isinteger(exp)):
        return float(10**6)
    try:
        result = np.power(base, exp)
        return np.clip(result, -1000, 1000)
    except ValueError:
        return float(10**6)

def safe_log2(x):
    if x <= 0:
        return float(10**6)
    result = np.log2(x)
    return np.clip(result, -1000, 1000)

def safe_mod(x, y):
    if y == 0:
        return float(10**6)
    x = np.clip(x, -1000, 1000)
    y = np.clip(y, -1000, 1000)
    result = np.mod(x, y)
    return np.clip(result, -1000, 1000)

def safe_tanh(x):
    x = np.clip(x, -1000, 1000)
    result = np.tanh(x)
    return np.clip(result, -1000, 1000)

def safe_exp(x):
    x = np.clip(x, -100, 100)
    result = np.exp(x)
    return np.clip(result, -1000, 1000)

def safe_log(x):
    if x <= 0:
        return float(10**6)
    result = np.log(x)
    return np.clip(result, -1000, 1000)

def safe_sqrt(x):
    x = np.maximum(x, 0)
    result = np.sqrt(x)
    return np.clip(result, -1000, 1000)

def safe_abs(x):
    result = np.abs(x)
    return np.clip(result, -1000, 1000)

SAFE_OPERATIONS = {
    '+': lambda a, b: a + b,
    '-': lambda a, b: a - b,
    '*': lambda a, b: a * b,
    '/': safe_divide,
    'sin': safe_sin,
    'cos': safe_cos,
    'exp': safe_exp,
    'log': safe_log,
    'sqrt': safe_sqrt,
    'tan': safe_tan,
    'tanh': safe_tanh,
    'sinh': safe_sinh,
    'cosh': safe_cosh,
    'abs': safe_abs,
    'log10': safe_log10,
    'log2': safe_log2,
    '**': safe_pow,
    'mod': safe_mod
}

# ---------------------------------------------------------------------------
# Helper functions for tree manipulation

def get_random_node(node: Node, include_root: bool = True) -> Node:
    """
    Recursively collects all nodes in the tree and returns a random one.
    If include_root=False, excludes the root (if possible).
    """
    nodes = []
    def traverse(n: Node):
        if n is None:
            return
        nodes.append(n)
        traverse(n.left)
        traverse(n.right)
    traverse(node)
    if not include_root and len(nodes) > 1:
        nodes = nodes[1:]
    return random.choice(nodes)

def get_random_node_with_parent(root: Node) -> Tuple[Node, Node, bool]:
    """
    Returns a tuple (node, parent, is_left) chosen randomly,
    where is_left is True if node is left child of parent.
    """
    nodes = []
    def traverse(node: Node, parent=None, is_left: bool = None):
        if node is None:
            return
        nodes.append((node, parent, is_left))
        traverse(node.left, node, True)
        traverse(node.right, node, False)
    traverse(root)
    return random.choice(nodes)

def replace_node(root: Node, target: Node, new_subtree: Node) -> bool:
    """
    Recursively replaces target node with new_subtree in tree rooted at root.
    Returns True if replacement was successful.
    """
    if root is None:
        return False
    if root.left == target:
        root.left = new_subtree
        return True
    if root.right == target:
        root.right = new_subtree
        return True
    return replace_node(root.left, target, new_subtree) or replace_node(root.right, target, new_subtree)

def get_random_leaf_with_parent(root: Node) -> Tuple[Node, Node, bool]:
    """
    Returns tuple (leaf, parent, is_left) by randomly choosing a leaf (node without children).
    """
    leaves = []
    def traverse(node: Node, parent=None, is_left: bool = None):
        if node is None:
            return
        if node.left is None and node.right is None:
            leaves.append((node, parent, is_left))
        else:
            traverse(node.left, node, True)
            traverse(node.right, node, False)
    traverse(root)
    if leaves:
        return random.choice(leaves)
    return (None, None, None)

# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# Function to sanitize tree

def sanitize_tree(node: Node, max_depth: int, n_variables: int) -> Node:
    """
    Sanitizes the tree to ensure it doesn't exceed maximum depth and has no missing nodes.
    """
    if node is None:
        return create_random_tree(0, max_depth, n_variables)
    
    if max_depth <= 0:
        if node.op is not None:
            return Node(value=f"x[{random.randint(0, n_variables - 1)}]")
        return node
    
    if node.op in BINARY_OPERATORS:
        node.left = sanitize_tree(node.left, max_depth - 1, n_variables)
        node.right = sanitize_tree(node.right, max_depth - 1, n_variables)
    elif node.op in UNARY_OPERATORS:
        node.left = sanitize_tree(node.left, max_depth - 1, n_variables)
        node.right = None
    return node

# ---------------------------------------------------------------------------
# Genetic Operators: Crossover and Mutations

def crossover(parent1: Node, parent2: Node, n_variables: int) -> Node:
    """Performs a "constrained" crossover to ensure structure consistency."""
    offspring = parent1.copy()
    # Select target node with its parent (to be able to replace it)
    target, parent, is_left = get_random_node_with_parent(offspring)
    # Select donor node from parent2
    donor = get_random_node(parent2, include_root=True)
    
    # Create copy of donor subtree; here you could decide
    # whether to take entire subtree or just left part (for unary operations)
    new_subtree = donor.copy()
    
    # If target has binary operator, ensure new_subtree has both children
    if target.op is not None and target.op in BINARY_OPERATORS:
        if new_subtree.left is None:
            new_subtree.left = create_random_tree(0, MutationConfig.SUBTREE_DEPTH_RANGE[1], n_variables)
        if new_subtree.right is None:
            new_subtree.right = create_random_tree(0, MutationConfig.SUBTREE_DEPTH_RANGE[1], n_variables)
    # If target is unary operator, use left child (if available)
    elif target.op is not None and target.op in UNARY_OPERATORS:
        if new_subtree.left is None:
            new_subtree.left = create_random_tree(0, MutationConfig.SUBTREE_DEPTH_RANGE[1], n_variables)
        else:
            new_subtree.left = new_subtree.left.copy()
    # If target is leaf, e.g. replace value or create new leaf
    else:
        if isinstance(target.value, str) and target.value.startswith("x["):
            target.value = f"x[{random.randint(0, n_variables-1)}]"
        else:
            target.value = generate_constant()
        return sanitize_tree(offspring, MutationConfig.MAX_TREE_DEPTH, n_variables)
    
    # Replace target node with new_subtree
    if parent is None:
        offspring = new_subtree
    else:
        if is_left:
            parent.left = new_subtree
        else:
            parent.right = new_subtree

    # Sanitize resulting tree to ensure no missing children
    offspring = sanitize_tree(offspring, MutationConfig.MAX_TREE_DEPTH, n_variables)
    return offspring


def subtree_mutation(individual: Node, max_depth: int, n_variables: int) -> Node:
    """
    Subtree mutation:
      - Select random node in individual
      - Generate new random subtree (with randomly chosen depth)
      - Replace selected node with new subtree
    """
    mutated = individual.copy()
    target = get_random_node(mutated, include_root=True)
    new_depth = random.randint(MutationConfig.SUBTREE_DEPTH_RANGE[0],
                               MutationConfig.SUBTREE_DEPTH_RANGE[1])
    new_subtree = create_random_tree(0, new_depth, n_variables)
    if mutated == target:
        mutated = new_subtree
    else:
        replace_node(mutated, target, new_subtree)
    return mutated

def point_mutation(individual: Node, n_variables: int) -> Node:
    mutated = individual.copy()
    target = get_random_node(mutated, include_root=True)
    
    if target.op is not None:
        # If node is operator, choose compatible new operator
        if target.left is not None and target.right is not None:
            # Binary node
            possible_ops = [op for op in OPERATOR_WEIGHTS.keys() if op in BINARY_OPERATORS]
        elif target.left is not None and target.right is None:
            # Unary node: might want to limit to unary operators
            possible_ops = [op for op in OPERATOR_WEIGHTS.keys() if op in UNARY_OPERATORS]
        else:
            possible_ops = list(OPERATOR_WEIGHTS.keys())
        
        if target.op in possible_ops and len(possible_ops) > 1:
            possible_ops.remove(target.op)
        new_op = random.choice(possible_ops)
        target.op = new_op
        
        # If new operator is binary, ensure both children exist
        if new_op in BINARY_OPERATORS:
            if target.left is None:
                target.left = create_random_tree(0, MutationConfig.SUBTREE_DEPTH_RANGE[1], n_variables)
            if target.right is None:
                target.right = create_random_tree(0, MutationConfig.SUBTREE_DEPTH_RANGE[1], n_variables)
        # If unary operator, ensure left child exists
        elif new_op in UNARY_OPERATORS:
            if target.left is None:
                target.left = create_random_tree(0, MutationConfig.SUBTREE_DEPTH_RANGE[1], n_variables)
    else:
        # For leaf: if numeric, modify value; if variable, change index
        if isinstance(target.value, (int, float)):
            factor = MutationConfig.VALUE_STEP_FACTOR
            target.value += random.uniform(-factor, factor) * (target.value if target.value != 0 else 1)
        elif isinstance(target.value, str) and target.value.startswith("x["):
            target.value = f"x[{random.randint(0, n_variables - 1)}]"
    
    # Sanitize tree after mutation
    mutated = sanitize_tree(mutated, MutationConfig.MAX_TREE_DEPTH, n_variables)
    return mutated

def hoist_mutation(individual: Node) -> Node:
    """
    Hoist mutation:
      - Select subtree (excluding root) and promote it to new individual
      - This operator tends to simplify tree
    """
    mutated = individual.copy()
    target = get_random_node(mutated, include_root=False)
    return target.copy()

def expansion_mutation(individual: Node, max_depth: int, n_variables: int) -> Node:
    """
    Expansion mutation:
      - Select leaf and replace it with new random subtree
      - Increases tree complexity by transforming leaf into internal node
    """
    mutated = individual.copy()
    target, parent, is_left = get_random_leaf_with_parent(mutated)
    if target is None:
        return mutated
    new_depth = random.randint(MutationConfig.SUBTREE_DEPTH_RANGE[0],
                               MutationConfig.SUBTREE_DEPTH_RANGE[1])
    new_subtree = create_random_tree(0, new_depth, n_variables)
    if parent is None:
        mutated = new_subtree
    else:
        if is_left:
            parent.left = new_subtree
        else:
            parent.right = new_subtree
    return mutated

def collapse_mutation(individual: Node) -> Node:
    """
    Collapse mutation:
      - Select internal node (subtree) and replace it with leaf
      - Simplifies tree, countering bloat phenomenon
    """
    mutated = individual.copy()
    nodes = []
    def traverse(node: Node, parent=None, is_left: bool = None):
        if node is None:
            return
        if node.left is not None or node.right is not None:
            nodes.append((node, parent, is_left))
        traverse(node.left, node, True)
        traverse(node.right, node, False)
    traverse(mutated)
    if not nodes:
        return mutated
    target, parent, is_left = random.choice(nodes)
    new_leaf = Node(value=random.uniform(-10, 10))
    if parent is None:
        mutated = new_leaf
    else:
        if is_left:
            parent.left = new_leaf
        else:
            parent.right = new_leaf
    return mutated

# ---------------------------------------------------------------------------
# Functions for random tree generation

def generate_constant():
    """
    Generates random constant value.
      - With 0.5 probability returns small value (between -1 and 1),
        otherwise larger value (between -10 and 10)
    """
    if random.random() < 0.5:
        return random.uniform(-1, 1)
    else:
        return random.uniform(-10, 10)

def create_random_tree(depth: int, max_depth: int, n_variables: int) -> Node:
    """
    Creates random tree for symbolic regression.
      - If depth reaches max_depth or based on probability,
        generates leaf node (variable or constant)
      - Otherwise, generates operator node (binary or unary) and recursively creates children
    """
    if depth >= max_depth or (depth > 0 and random.random() < 0.5):
        # Leaf node: probability of choosing variable or constant
        if random.random() < 0.7:
            return Node(value=f"x[{random.randint(0, n_variables - 1)}]")
        else:
            return Node(value=generate_constant())
    # Generate operator node
    op = random.choices(list(OPERATOR_WEIGHTS.keys()),
                        weights=list(OPERATOR_WEIGHTS.values()),
                        k=1)[0]
    if op in BINARY_OPERATORS:
        left = create_random_tree(depth + 1, max_depth, n_variables)
        right = create_random_tree(depth + 1, max_depth, n_variables)
        return Node(op=op, left=left, right=right)
    elif op in UNARY_OPERATORS:
        operand = create_random_tree(depth + 1, max_depth, n_variables)
        return Node(op=op, left=operand)
    else:
        # In rare cases, if operator not categorized, treat as unary
        operand = create_random_tree(depth + 1, max_depth, n_variables)
        return Node(op=op, left=operand)

def tournament_selection(population, fitness_scores, tournament_size=3):     #fast
    """Optimized tournament selection without unnecessary `random.sample` calls."""
    indices = np.random.choice(len(population), size=tournament_size, replace=False)
    winner_idx = indices[np.argmin([fitness_scores[i] for i in indices])]
    return population[winner_idx]


def calculate_fitness(tree: Node, x: np.ndarray, y_true: np.ndarray) -> float:
    """Calculates fitness of individual based on mean squared error."""
    try:
        compiled_func = compile_tree(tree)  # Compile once
        y_pred = compiled_func(x)  # Execute function on entire dataset
        y_pred = np.nan_to_num(y_pred, nan=0.0, posinf=CLIP_MAX, neginf=CLIP_MIN)  # Protection from NaN/inf
        return np.mean(np.square(y_true - y_pred))
    except Exception:
        return float('inf')  # Penalize invalid functions



# ---------------------------------------------------------------------------
# End of module


In [3]:

def tree_depth(node: Node) -> int:
    """ Calculate the maximum depth of the tree """
    if node is None:
        return 0
    left_depth = tree_depth(node.left)
    right_depth = tree_depth(node.right)
    return 1 + max(left_depth, right_depth)

def count_nodes(node: Node) -> int:
    """ Count the total number of nodes in a tree """
    if node is None:
        return 0
    return 1 + count_nodes(node.left) + count_nodes(node.right)

def prune_tree(node: Node, max_depth: int, max_nodes: int) -> Node:
    """ 
    Reduces tree size if it exceeds the maximum allowed.
    Replaces deep subtrees with leaves and cuts excess branches.
    """
    if node is None:
        return None

    # If exceeds depth limit, transform node into leaf
    if tree_depth(node) > max_depth:
        return Node(value=random.uniform(-10, 10))

    # If exceeds node limit, replace deepest branches with leaves
    if count_nodes(node) > max_nodes:
        if node.left and node.right:
            # Randomly choose a branch to cut
            if random.random() < 0.5:
                node.left = Node(value=random.uniform(-10, 10))
            else:
                node.right = Node(value=random.uniform(-10, 10))
        elif node.left:
            node.left = Node(value=random.uniform(-10, 10))
        elif node.right:
            node.right = Node(value=random.uniform(-10, 10))

    # Apply pruning recursively on subtrees
    node.left = prune_tree(node.left, max_depth, max_nodes)
    node.right = prune_tree(node.right, max_depth, max_nodes)

    return node

# --- Helper function to apply a mutation chosen from those implemented ---
def apply_mutation(individual, mutation_prob, max_depth, n_variables):
    """
    Applies a mutation to the individual with probability mutation_prob.
    After mutation, checks that the tree does not exceed limits.
    """
    if random.random() > mutation_prob:
        return individual  # No mutation

    mutation_choices = {
        'SUBTREE': subtree_mutation,
        'OPERATOR': point_mutation,
        'VALUE': point_mutation,
        'HOIST': hoist_mutation,
        'EXPANSION': expansion_mutation,
        'COLLAPSE': collapse_mutation
    }

    mutation_type = random.choices(
        list(MutationConfig.MUTATION_WEIGHTS.keys()),
        weights=list(MutationConfig.MUTATION_WEIGHTS.values()),
        k=1
    )[0]

    mutation_function = mutation_choices[mutation_type]

    if mutation_type in ['SUBTREE', 'EXPANSION']:
        mutated = mutation_function(individual, max_depth, n_variables)
    elif mutation_type in ['OPERATOR', 'VALUE']:
        mutated = mutation_function(individual, n_variables)
    else:
        mutated = mutation_function(individual)

    # 🌲 Check if tree is too large and prune if necessary
    if tree_depth(mutated) > MutationConfig.MAX_TREE_DEPTH or count_nodes(mutated) > MutationConfig.MAX_TREE_NODES:
        mutated = prune_tree(mutated, MutationConfig.MAX_TREE_DEPTH, MutationConfig.MAX_TREE_NODES)

    return mutated



In [5]:
def function_to_string(root: Node, mse: float, func_name: str) -> str:
    def node_to_str(node: Node) -> str:
        if node.op:
            if node.left is not None and node.right is not None:
                left_str = node_to_str(node.left)
                right_str = node_to_str(node.right)
                if node.op == '/':
                    return f"({left_str} / {right_str})"
                elif node.op == '**':
                    return f"({left_str} ** {right_str})"
                elif node.op == 'mod':
                    return f"np.mod({left_str}, {right_str})"
                else:
                    return f"({left_str} {node.op} {right_str})"
            elif node.left is not None:
                operand_str = node_to_str(node.left)
                # Special handling for unary operators like '-' or '+'
                if node.op == '-':
                    return f"(-{operand_str})"
                elif node.op == '+':
                    return f"(+{operand_str})"
                mapping = {
                    'sin': 'np.sin',
                    'cos': 'np.cos',
                    'exp': 'np.exp',
                    'log': 'np.log',
                    'sqrt': 'np.sqrt',
                    'tan': 'np.tan',
                    'tanh': 'np.tanh',
                    'sinh': 'np.sinh',
                    'cosh': 'np.cosh',
                    'abs': 'np.abs',
                    'log10': 'np.log10',
                    'log2': 'np.log2'
                }
                if node.op in mapping:
                    return f"{mapping[node.op]}({operand_str})"
                else:
                    return f"np.{node.op}({operand_str})"

            else:
                raise ValueError("Operator node without operand.")
        else:
            if isinstance(node.value, str):
                return node.value
            else:
                return str(node.value)
    
    expr_str = node_to_str(root)
    func_str = (
        f"def {func_name}(x: np.ndarray) -> np.ndarray:  # mse: {mse:.4e}\n"
        f"    return {expr_str}\n"
    )
    return func_str


def sanitize_tree(node: Node, max_depth: int, n_variables: int, current_depth: int = 0) -> Node:
    """
    Recursively checks and corrects the tree structure:
      - For binary operators, if left or right child is missing, recreates it.
      - For unary operators, if child is missing, recreates it.
      - If node is a leaf, it is not modified.
    The current_depth parameter is used to avoid exceeding maximum depth.
    """
    if node is None:
        # If node is missing, create a random leaf.
        # Alternatively you can use a constant or variable.
        return Node(value=generate_constant())
    
    # If we're too deep, return a leaf (to avoid bloat)
    if current_depth >= max_depth:
        return Node(value=generate_constant())

    # If node is an operator, check children
    if node.op is not None:
        if node.op in BINARY_OPERATORS:
            if node.left is None:
                # If left child is missing, recreate a random subtree
                node.left = create_random_tree(current_depth + 1, max_depth, n_variables)
            else:
                node.left = sanitize_tree(node.left, max_depth, n_variables, current_depth + 1)
            if node.right is None:
                # If right child is missing, recreate a random subtree
                node.right = create_random_tree(current_depth + 1, max_depth, n_variables)
            else:
                node.right = sanitize_tree(node.right, max_depth, n_variables, current_depth + 1)
        elif node.op in UNARY_OPERATORS:
            if node.left is None:
                node.left = create_random_tree(current_depth + 1, max_depth, n_variables)
            else:
                node.left = sanitize_tree(node.left, max_depth, n_variables, current_depth + 1)
    # If it's a leaf (node.op is None) no sanitization needed
    return node


# main algorithm

In [6]:
import os
import glob


def genetic_algorithm(filepath: str):
    """
    Runs the genetic algorithm on all 'problem_#.npz' files present in the folder
    specified by `filepath` and saves the obtained functions in a Python file named 's323914.py'.
    
    The function writes a definition for each problem in the file, in the format:
    
        import numpy as np
        
        def f1(x: np.ndarray) -> np.ndarray:  # mse: 0.1234
            return <formula>
    
    Where the number in the function (f1, f2, ...) corresponds to the number extracted from the filename
    (e.g. "problem_1.npz") and the comment indicates the obtained fitness value (mse).
    """
    # Algorithm constants
    POPULATION_SIZE = 1500
    MAX_DEPTH = 7
    GENERATIONS = 150
    # MIN_DIVERSITY = 0.6
    TOURNAMENT_SIZE = 15

    ELITISM_RATE = 0.05
    MUTATION_PROB = 0.5

    
    # Find all files matching pattern "problem_*.npz" in specified folder
    problem_files = glob.glob(os.path.join(filepath, "problem_*.npz"))
    solutions = []  # List of tuples: (problem_number, best_fitness, best_code)

    # Loop through problem files (sorted by number)
    for prob_file in sorted(problem_files):
        
        base = os.path.basename(prob_file)
        try:
            num_str = base.split('_')[1].split('.')[0]
            prob_number = int(num_str)
        except (IndexError, ValueError):
            print(f"Non-conforming filename, skipping: {prob_file}")
            continue

        print(f"Solving problem {prob_number}...")
        # Exclude problem_0
        # if prob_number == 1 or prob_number == 2 or prob_number ==  5:
        #     print(f"Skipping problem {prob_number} (problem_0 excluded).")
        #     continue

        # Load data from .npz file
        data = np.load(prob_file)
        x_train = data['x']
        y_train = data['y']
        N_VARIABLES = x_train.shape[0]

        # Initialize population
        population = [create_random_tree(0, MAX_DEPTH, N_VARIABLES) for _ in range(POPULATION_SIZE)]

        best_fitness = float('inf')
        best_tree = None

        # Evolution loop
        ELITE_COUNT = max(1, int(POPULATION_SIZE * ELITISM_RATE))
    
        for generation in range(GENERATIONS):
            # Calculate fitness for each individual
            fitness_scores = [calculate_fitness(ind, x_train, y_train) for ind in population]
            # fitness_scores = calculate_fitness_parallel(population, x_train, y_train)


            # Sort population by fitness (lower is better)
            sorted_indices = np.argsort(fitness_scores)
            elites = [population[i] for i in sorted_indices[:ELITE_COUNT]]
            
            # Update best_tree if necessary
            if fitness_scores[sorted_indices[0]] < best_fitness:
                best_fitness = fitness_scores[sorted_indices[0]]
                best_tree = population[sorted_indices[0]]
                no_improvement = 0
            else:
                no_improvement += 1
            
            if no_improvement > 10:
                print("No improvement for 20 consecutive generations. Early stopping!")
                break
            
            # Generate new individuals to complete population
            new_population = elites.copy()
            while len(new_population) < POPULATION_SIZE:
                # Select two parents via tournament
                p1 = tournament_selection(population, fitness_scores, TOURNAMENT_SIZE)
                p2 = tournament_selection(population, fitness_scores, TOURNAMENT_SIZE)
                # Generate child via crossover and apply mutation
                child = crossover(p1, p2, n_variables=N_VARIABLES)
                child = apply_mutation(child, mutation_prob=MUTATION_PROB, max_depth=MAX_DEPTH, n_variables=N_VARIABLES)
                new_population.append(child)
                
                # If population not complete yet, can generate second child
                if len(new_population) < POPULATION_SIZE:
                    p1 = tournament_selection(population, fitness_scores, TOURNAMENT_SIZE)
                    p2 = tournament_selection(population, fitness_scores, TOURNAMENT_SIZE)
                    child = crossover(p1, p2, n_variables=N_VARIABLES)
                    child = apply_mutation(child, mutation_prob=MUTATION_PROB, max_depth=MAX_DEPTH, n_variables=N_VARIABLES)
                    new_population.append(child)

            # Update population for next generation
            population = new_population

            # print(f"Generation {generation + 1}: Best fitness: {best_fitness}, Best individual: {best_tree}")

        print(f"Problem {prob_number} - Best individual: {best_tree} with fitness: {best_fitness}")

        solutions.append((prob_number, best_fitness, best_tree))

    # Write output file "s323914.py" with obtained functions

    

    output_filename = "s323914.py"
    with open(output_filename, "w") as f:
        f.write("import numpy as np\n\n")
        # For each problem write function definition (f1, f2, ...)
        for prob_number, best_fitness, best_code in sorted(solutions, key=lambda x: x[0]):
            code = function_to_string(best_code, best_fitness, f"f{prob_number}")
            f.write(code)
    print(f"Solutions saved in {output_filename}")


# train

In [7]:


genetic_algorithm("data")


Risoluzione del problema 1...
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning tree
Pruning 

KeyboardInterrupt: 