# main functions

In [67]:
import random
import numpy as np
from collections import defaultdict
from typing import List, Tuple

# ---------------------------------------------------------------------------
# Constants and configurations

# Clip limits for safe operations
CLIP_MIN = -1e6
CLIP_MAX = 1e6

# Operator definitions
UNARY_OPERATORS = ['sin', 'cos', 'exp', 'log', 'sqrt', 'tan', 'tanh', 'sinh', 'cosh', 'abs', 'log10', 'log2']
BINARY_OPERATORS = ['+', '-', '*', '/', '**', 'mod']

# Operator weights for random selection
OPERATOR_WEIGHTS = {
    '+': 0.3,
    '-': 0.3,
    '*': 0.2,
    '/': 0.1,    # Less frequent divisions
    '**': 0.05,  # Rare powers
    'sin': 0.15,
    'cos': 0.15,
    'exp': 0.05, # Very rare exponential
    'log': 0.1,
    'sqrt': 0.1,
    'tan': 0.05,
    'tanh': 0.05,
    'sinh': 0.05,
    'cosh': 0.05,
    'abs': 0.05,
    'log10': 0.05,
    'log2': 0.05,
    'mod': 0.05
}

# Mutation configuration
class MutationConfig:
    MUTATION_WEIGHTS = {
        'SUBTREE': 0.3,
        'OPERATOR': 0.2,
        'VALUE': 0.2,
        'HOIST': 0.1,
        'EXPANSION': 0.1,
        'COLLAPSE': 0.1
    }

    SUBTREE_DEPTH_RANGE = (1, 4)
    VALUE_STEP_FACTOR = 0.1
    MUTATION_DECAY = 0.5
    MAX_TREE_DEPTH = 10   # Maximum depth limit
    MAX_TREE_NODES = 40   # Maximum nodes limit

# ---------------------------------------------------------------------------
# Safe mathematical operations

def clip_value(value):
    return np.clip(value, CLIP_MIN, CLIP_MAX)

def safe_divide(a, b):
    return a / b if b != 0 else 0

def safe_sin(x):
    x = np.clip(x, -1000, 1000)
    result = np.sin(x)
    return np.clip(result, -1000, 1000)

def safe_cos(x):
    x = np.clip(x, -1000, 1000)
    result = np.cos(x)
    return np.clip(result, -1000, 1000)

def safe_sinh(x):
    x = np.clip(x, -100, 100)
    result = np.sinh(x)
    return np.clip(result, -1000, 1000)

def safe_cosh(x):
    x = np.clip(x, -100, 100)
    result = np.cosh(x)
    return np.clip(result, -1000, 1000)

def safe_tan(x):
    x = np.clip(x, -1000, 1000)
    result = np.tan(x)
    result = np.nan_to_num(result, nan=1000.0, posinf=1000.0, neginf=-1000.0)
    return np.clip(result, -1000, 1000)

def safe_tanh(x):
    x = np.clip(x, -1000, 1000)
    result = np.tanh(x)
    return np.clip(result, -1000, 1000)

def safe_exp(x):
    x = np.clip(x, -100, 100)
    result = np.exp(x)
    return np.clip(result, -1000, 1000)

def safe_log(x):
    x = np.clip(x, 1e-8, CLIP_MAX)
    result = np.log(x)
    return np.clip(result, -1000, 1000)

def safe_log10(x):
    x = np.clip(x, 1e-8, CLIP_MAX)
    result = np.log10(x)
    return np.clip(result, -1000, 1000)

def safe_log2(x):
    x = np.clip(x, 1e-8, CLIP_MAX)
    result = np.log2(x)
    return np.clip(result, -1000, 1000)

def safe_sqrt(x):
    x = np.maximum(x, 0)
    result = np.sqrt(x)
    return np.clip(result, -1000, 1000)

def safe_abs(x):
    result = np.abs(x)
    return np.clip(result, -1000, 1000)


def safe_pow(base, exp):
    base = np.clip(base, -100, 100)
    exp = np.clip(exp, -15, 15)
    
    if base == 0 and exp < 0:
        return 0.0
    elif base < 0 and not np.all(np.isclose(exp, np.round(exp))):
        return -CLIP_MIN

    try:
        result = np.power(base, exp)
        return np.clip(result, -1000, 1000)
    except (ValueError, OverflowError):
        return 0.0


def safe_mod(x, y):
    if y == 0:
        return 0.0
    x = np.clip(x, -1000, 1000)
    y = np.clip(y, -1000, 1000)
    result = np.mod(x, y)
    return np.clip(result, -1000, 1000)

# Map operators to their safe implementations
SAFE_OPERATIONS = {
    '+': lambda a, b: a + b,
    '-': lambda a, b: a - b,
    '*': lambda a, b: a * b,
    '/': safe_divide,
    'sin': safe_sin,
    'cos': safe_cos,
    'exp': safe_exp,
    'log': safe_log,
    'sqrt': safe_sqrt,
    'tan': safe_tan,
    'tanh': safe_tanh,
    'sinh': safe_sinh,
    'cosh': safe_cosh,
    'abs': safe_abs,
    'log10': safe_log10,
    'log2': safe_log2,
    '**': safe_pow,
    'mod': safe_mod
}

# ---------------------------------------------------------------------------
# Node class definition

class Node:
    def __init__(self, value=None, op=None, left: 'Node' = None, right: 'Node' = None):
        self.value = value    # For leaves: constant number or variable (e.g. "x[0]")
        self.op = op          # For internal nodes: operator (e.g. "+", "sin", "mod", etc.)
        self.left = left
        self.right = right

    def copy(self) -> 'Node':
        """Returns a deep copy of the tree."""
        return Node(
            value=self.value,
            op=self.op,
            left=self.left.copy() if self.left else None,
            right=self.right.copy() if self.right else None,
        )

    def __str__(self) -> str:
        """Returns a string representation of the tree."""
        if self.op:
            if self.left and self.right:
                return f"({self.left} {self.op} {self.right})"
            elif self.left:
                return f"{self.op}({self.left})"
        return str(self.value)

# ---------------------------------------------------------------------------
# Tree generation and manipulation functions

def generate_constant():
    """Generates random constant value."""
    if random.random() < 0.5:
        return random.uniform(-1, 1)
    else:
        return random.uniform(-10, 10)

def create_random_tree(depth: int, max_depth: int, n_variables: int) -> Node:
    """Creates random tree for symbolic regression."""
    if depth >= max_depth or (depth > 0 and random.random() < 0.5):
        # Leaf node
        if random.random() < 0.7:
            return Node(value=f"x[{random.randint(0, n_variables - 1)}]")
        else:
            return Node(value=generate_constant())
            
    # Generate operator node
    op = random.choices(list(OPERATOR_WEIGHTS.keys()),
                       weights=list(OPERATOR_WEIGHTS.values()),
                       k=1)[0]
    if op in BINARY_OPERATORS:
        left = create_random_tree(depth + 1, max_depth, n_variables)
        right = create_random_tree(depth + 1, max_depth, n_variables)
        return Node(op=op, left=left, right=right)
    elif op in UNARY_OPERATORS:
        operand = create_random_tree(depth + 1, max_depth, n_variables)
        return Node(op=op, left=operand)
    else:
        # Fallback for uncategorized operators
        operand = create_random_tree(depth + 1, max_depth, n_variables)
        return Node(op=op, left=operand)

def get_random_node(node: Node, include_root: bool = True) -> Node:
    """Returns a random node from the tree."""
    nodes = []
    def traverse(n: Node):
        if n is None:
            return
        nodes.append(n)
        traverse(n.left)
        traverse(n.right)
    traverse(node)
    if not include_root and len(nodes) > 1:
        nodes = nodes[1:]
    return random.choice(nodes)

def get_random_node_with_parent(root: Node) -> Tuple[Node, Node, bool]:
    """Returns (node, parent, is_left) chosen randomly."""
    nodes = []
    def traverse(node: Node, parent=None, is_left: bool = None):
        if node is None:
            return
        nodes.append((node, parent, is_left))
        traverse(node.left, node, True)
        traverse(node.right, node, False)
    traverse(root)
    return random.choice(nodes)

def get_random_leaf_with_parent(root: Node) -> Tuple[Node, Node, bool]:
    """Returns (leaf, parent, is_left) by randomly choosing a leaf."""
    leaves = []
    def traverse(node: Node, parent=None, is_left: bool = None):
        if node is None:
            return
        if node.left is None and node.right is None:
            leaves.append((node, parent, is_left))
        else:
            traverse(node.left, node, True)
            traverse(node.right, node, False)
    traverse(root)
    if leaves:
        return random.choice(leaves)
    return (None, None, None)

def replace_node(root: Node, target: Node, new_subtree: Node) -> bool:
    """Replaces target node with new_subtree in tree."""
    if root is None:
        return False
    if root.left == target:
        root.left = new_subtree
        return True
    if root.right == target:
        root.right = new_subtree
        return True
    return replace_node(root.left, target, new_subtree) or replace_node(root.right, target, new_subtree)


def sanitize_tree(node: Node, max_depth: int, n_variables: int, current_depth: int = 0) -> Node:
    """
    Recursively checks and corrects the tree structure:
      - For binary operators, if left or right child is missing, recreates it.
      - For unary operators, if child is missing, recreates it.
      - If node is a leaf, it is not modified.
    The current_depth parameter is used to avoid exceeding maximum depth.
    """
    if node is None:
        # If node is missing, create a random leaf.
        # Alternatively you can use a constant or variable.
        return Node(value=generate_constant())
    
    # If we're too deep, return a leaf (to avoid bloat)
    if current_depth >= max_depth:
        return Node(value=generate_constant())

    # If node is an operator, check children
    if node.op is not None:
        if node.op in BINARY_OPERATORS:
            if node.left is None:
                # If left child is missing, recreate a random subtree
                node.left = create_random_tree(current_depth + 1, max_depth, n_variables)
            else:
                node.left = sanitize_tree(node.left, max_depth, n_variables, current_depth + 1)
            if node.right is None:
                # If right child is missing, recreate a random subtree
                node.right = create_random_tree(current_depth + 1, max_depth, n_variables)
            else:
                node.right = sanitize_tree(node.right, max_depth, n_variables, current_depth + 1)
        elif node.op in UNARY_OPERATORS:
            if node.left is None:
                node.left = create_random_tree(current_depth + 1, max_depth, n_variables)
            else:
                node.left = sanitize_tree(node.left, max_depth, n_variables, current_depth + 1)
    # If it's a leaf (node.op is None) no sanitization needed
    return node



def tree_depth(node: Node) -> int:
    """ Calculate the maximum depth of the tree """
    if node is None:
        return 0
    left_depth = tree_depth(node.left)
    right_depth = tree_depth(node.right)
    return 1 + max(left_depth, right_depth)

def count_nodes(node: Node) -> int:
    """ Count the total number of nodes in a tree """
    if node is None:
        return 0
    return 1 + count_nodes(node.left) + count_nodes(node.right)

def prune_tree(node: Node, max_depth: int, max_nodes: int) -> Node:
    """ 
    Reduces tree size if it exceeds the maximum allowed.
    Replaces deep subtrees with leaves and cuts excess branches.
    """
    if node is None:
        return None

    # If exceeds depth limit, transform node into leaf
    if tree_depth(node) > max_depth:
        return Node(value=random.uniform(-10, 10))

    # If exceeds node limit, replace deepest branches with leaves
    if count_nodes(node) > max_nodes:
        if node.left and node.right:
            # Randomly choose a branch to cut
            if random.random() < 0.5:
                node.left = Node(value=random.uniform(-10, 10))
            else:
                node.right = Node(value=random.uniform(-10, 10))
        elif node.left:
            node.left = Node(value=random.uniform(-10, 10))
        elif node.right:
            node.right = Node(value=random.uniform(-10, 10))

    # Apply pruning recursively on subtrees
    node.left = prune_tree(node.left, max_depth, max_nodes)
    node.right = prune_tree(node.right, max_depth, max_nodes)

    return node

# ---------------------------------------------------------------------------
# Genetic operators

def crossover(parent1: Node, parent2: Node, n_variables: int) -> Node:
    """Performs constrained crossover between two parents."""
    offspring = parent1.copy()
    target, parent, is_left = get_random_node_with_parent(offspring)
    donor = get_random_node(parent2, include_root=True)
    
    new_subtree = donor.copy()
    
    if target.op is not None and target.op in BINARY_OPERATORS:
        if new_subtree.left is None:
            new_subtree.left = create_random_tree(0, MutationConfig.SUBTREE_DEPTH_RANGE[1], n_variables)
        if new_subtree.right is None:
            new_subtree.right = create_random_tree(0, MutationConfig.SUBTREE_DEPTH_RANGE[1], n_variables)
    elif target.op is not None and target.op in UNARY_OPERATORS:
        if new_subtree.left is None:
            new_subtree.left = create_random_tree(0, MutationConfig.SUBTREE_DEPTH_RANGE[1], n_variables)
        else:
            new_subtree.left = new_subtree.left.copy()
    else:
        if isinstance(target.value, str) and target.value.startswith("x["):
            target.value = f"x[{random.randint(0, n_variables-1)}]"
        else:
            target.value = generate_constant()
        return sanitize_tree(offspring, MutationConfig.MAX_TREE_DEPTH, n_variables)
    
    if parent is None:
        offspring = new_subtree
    else:
        if is_left:
            parent.left = new_subtree
        else:
            parent.right = new_subtree

    return sanitize_tree(offspring, MutationConfig.MAX_TREE_DEPTH, n_variables)

def subtree_mutation(individual: Node, max_depth: int, n_variables: int) -> Node:
    """Replaces random subtree with new random tree."""
    mutated = individual.copy()
    target = get_random_node(mutated, include_root=True)
    new_depth = random.randint(MutationConfig.SUBTREE_DEPTH_RANGE[0],
                             MutationConfig.SUBTREE_DEPTH_RANGE[1])
    new_subtree = create_random_tree(0, new_depth, n_variables)
    if mutated == target:
        mutated = new_subtree
    else:
        replace_node(mutated, target, new_subtree)
    return mutated

def point_mutation(individual: Node, n_variables: int) -> Node:
    """Mutates a single node's operator or value."""
    mutated = individual.copy()
    target = get_random_node(mutated, include_root=True)
    
    if target.op is not None:
        if target.left is not None and target.right is not None:
            possible_ops = [op for op in OPERATOR_WEIGHTS.keys() if op in BINARY_OPERATORS]
        elif target.left is not None and target.right is None:
            possible_ops = [op for op in OPERATOR_WEIGHTS.keys() if op in UNARY_OPERATORS]
        else:
            possible_ops = list(OPERATOR_WEIGHTS.keys())
        
        if target.op in possible_ops and len(possible_ops) > 1:
            possible_ops.remove(target.op)
        new_op = random.choice(possible_ops)
        target.op = new_op
        
        if new_op in BINARY_OPERATORS:
            if target.left is None:
                target.left = create_random_tree(0, MutationConfig.SUBTREE_DEPTH_RANGE[1], n_variables)
            if target.right is None:
                target.right = create_random_tree(0, MutationConfig.SUBTREE_DEPTH_RANGE[1], n_variables)
        elif new_op in UNARY_OPERATORS:
            if target.left is None:
                target.left = create_random_tree(0, MutationConfig.SUBTREE_DEPTH_RANGE[1], n_variables)
    else:
        if isinstance(target.value, (int, float)):
            factor = MutationConfig.VALUE_STEP_FACTOR
            target.value += random.uniform(-factor, factor) * (target.value if target.value != 0 else 1)
        elif isinstance(target.value, str) and target.value.startswith("x["):
            target.value = f"x[{random.randint(0, n_variables - 1)}]"
    
    return sanitize_tree(mutated, MutationConfig.MAX_TREE_DEPTH, n_variables)

def hoist_mutation(individual: Node) -> Node:
    """Promotes random subtree to root."""
    mutated = individual.copy()
    target = get_random_node(mutated, include_root=False)
    return target.copy()

def expansion_mutation(individual: Node, max_depth: int, n_variables: int) -> Node:
    """Replaces leaf with random subtree."""
    mutated = individual.copy()
    target, parent, is_left = get_random_leaf_with_parent(mutated)
    if target is None:
        return mutated
    new_depth = random.randint(MutationConfig.SUBTREE_DEPTH_RANGE[0],
                             MutationConfig.SUBTREE_DEPTH_RANGE[1])
    new_subtree = create_random_tree(0, new_depth, n_variables)
    if parent is None:
        mutated = new_subtree
    else:
        if is_left:
            parent.left = new_subtree
        else:
            parent.right = new_subtree
    return mutated

def collapse_mutation(individual: Node) -> Node:
    """Replaces random internal node with leaf."""
    mutated = individual.copy()
    nodes = []
    def traverse(node: Node, parent=None, is_left: bool = None):
        if node is None:
            return
        if node.left is not None or node.right is not None:
            nodes.append((node, parent, is_left))
        traverse(node.left, node, True)
        traverse(node.right, node, False)
    traverse(mutated)
    if not nodes:
        return mutated
    target, parent, is_left = random.choice(nodes)
    new_leaf = Node(value=random.uniform(-10, 10))
    if parent is None:
        mutated = new_leaf
    else:
        if is_left:
            parent.left = new_leaf
        else:
            parent.right = new_leaf
    return mutated

# --- Helper function to apply a mutation chosen from those implemented ---
def apply_mutation(individual, mutation_prob, max_depth, n_variables):
    """
    Applies a mutation to the individual with probability mutation_prob.
    After mutation, checks that the tree does not exceed limits.
    """
    if random.random() > mutation_prob:
        return individual  # No mutation

    mutation_choices = {
        'SUBTREE': subtree_mutation,
        'OPERATOR': point_mutation,
        'VALUE': point_mutation,
        'HOIST': hoist_mutation,
        'EXPANSION': expansion_mutation,
        'COLLAPSE': collapse_mutation
    }

    mutation_type = random.choices(
        list(MutationConfig.MUTATION_WEIGHTS.keys()),
        weights=list(MutationConfig.MUTATION_WEIGHTS.values()),
        k=1
    )[0]

    mutation_function = mutation_choices[mutation_type]

    if mutation_type in ['SUBTREE', 'EXPANSION']:
        mutated = mutation_function(individual, max_depth, n_variables)
    elif mutation_type in ['OPERATOR', 'VALUE']:
        mutated = mutation_function(individual, n_variables)
    else:
        mutated = mutation_function(individual)

    # 🌲 Check if tree is too large and prune if necessary
    if tree_depth(mutated) > MutationConfig.MAX_TREE_DEPTH or count_nodes(mutated) > MutationConfig.MAX_TREE_NODES:
        mutated = prune_tree(mutated, MutationConfig.MAX_TREE_DEPTH, MutationConfig.MAX_TREE_NODES)

    return mutated

# ---------------------------------------------------------------------------
# Evaluation and selection



def compile_tree(root: Node):
    """Converts tree to Python lambda function."""
    def node_to_str(node: Node) -> str:
        if node.op:
            if node.left is not None and node.right is not None:
                left_str = node_to_str(node.left)
                right_str = node_to_str(node.right)
                if node.op in ['/', '**', 'mod']:
                    op_str = f'safe_{node.op.replace("**", "pow")}'
                    return f"{op_str}({left_str}, {right_str})"
                else:
                    return f"({left_str} {node.op} {right_str})"
            elif node.left is not None:
                operand_str = node_to_str(node.left)
                return f"{node.op}({operand_str})"
            else:
                raise ValueError("Operator node without operand.")
        else:
            return str(node.value)

    expr_str = node_to_str(root)
    func_str = f"lambda x: {expr_str}"

    safe_globals = {
        "np": np,
        "safe_divide": safe_divide,
        "safe_pow": safe_pow,
        "safe_mod": safe_mod,
        "sin": safe_sin,
        "cos": safe_cos,
        "exp": safe_exp,
        "log": safe_log,
        "sqrt": safe_sqrt,
        "tan": safe_tan,
        "tanh": safe_tanh,
        "sinh": safe_sinh,
        "cosh": safe_cosh,
        "abs": safe_abs,
        "log10": safe_log10,
        "log2": safe_log2
    }
    
    try:
        compiled_func = eval(func_str, safe_globals)
    except Exception:
        def fallback(x):
            return np.zeros_like(x[0])
        compiled_func = fallback
    return compiled_func

def calculate_fitness(tree: Node, x: np.ndarray, y_true: np.ndarray) -> float:
    """Calculates mean squared error fitness."""
    try:
        compiled_func = compile_tree(tree)
        y_pred = compiled_func(x)
        y_pred = np.nan_to_num(y_pred, nan=0.0, posinf=CLIP_MAX, neginf=CLIP_MIN)
        return np.mean(np.square(y_true - y_pred))
    except Exception:
        return float('inf')

def tournament_selection(population, fitness_scores, tournament_size=3):
    """Selects individual using tournament selection."""
    indices = np.random.choice(len(population), size=tournament_size, replace=False)
    winner_idx = indices[np.argmin([fitness_scores[i] for i in indices])]
    return population[winner_idx]


#-----------------------------------------------------------------
# Function to write the solution


def function_to_string(root: Node, mse: float, func_name: str) -> str:
    def node_to_str(node: Node) -> str:
        """
        Recursively converts the tree into a string that uses only NumPy functions.
        For some functions (log, log10, log2, sqrt) np.abs and/or an offset is used
        only if necessary, i.e. if the child node is not a non-negative constant.
        
        Also integrates a mapping for standard unary operator cases.
        """
        # Helper to check if a node is a non-negative numeric constant
        def is_nonnegative_constant(n: Node) -> bool:
            if n.op is None and isinstance(n.value, (int, float)):
                return n.value >= 0
            return False

        # Helper to check if a node is exactly the constant zero
        def is_zero_constant(n: Node) -> bool:
            if n.op is None and isinstance(n.value, (int, float)):
                return n.value == 0
            return False

        # Mapping for standard unary operators (not handled specially)
        mapping = {
            'sin': 'np.sin',
            'cos': 'np.cos',
            'exp': 'np.exp',
            'tan': 'np.tan',
            'tanh': 'np.tanh',
            'sinh': 'np.sinh',
            'cosh': 'np.cosh',
            'abs': 'np.abs'
        }

        if node.op:
            # Binary operator case (two children)
            if node.left is not None and node.right is not None:
                left_str = node_to_str(node.left)
                right_str = node_to_str(node.right)
                if node.op == '/':
                    # If denominator is zero (constant), add an offset
                    if is_zero_constant(node.right):
                        return f"(({left_str})/(({right_str})+(1e-8)))"
                    else:
                        return f"({left_str} / {right_str})"
                elif node.op == '**':
                    return f"({left_str} ** {right_str})"
                elif node.op == 'mod':
                    return f"np.mod({left_str}, {right_str})"
                else:
                    return f"({left_str} {node.op} {right_str})"
            # Unary operator case (single child)
            elif node.left is not None:
                operand_str = node_to_str(node.left)
                if node.op == '+':
                    return f"(+{operand_str})"
                elif node.op == '-':
                    return f"(-{operand_str})"
                elif node.op == 'log':
                    # For log, if argument is a non-negative constant
                    # np.abs is not needed; otherwise we apply it
                    if is_nonnegative_constant(node.left):
                        return f"np.log({operand_str}+1e-8)" if is_zero_constant(node.left) else f"np.log({operand_str}+1e-8)"
                    else:
                        return f"np.log(np.abs({operand_str})+1e-8)"
                elif node.op == 'sqrt':
                    return f"np.sqrt({operand_str})" if is_nonnegative_constant(node.left) else f"np.sqrt(np.abs({operand_str}))"
                elif node.op == 'log10':
                    if is_nonnegative_constant(node.left):
                        return f"np.log10({operand_str}+1e-8)" if is_zero_constant(node.left) else f"np.log10({operand_str}+1e-8)"
                    else:
                        return f"np.log10(np.abs({operand_str})+1e-8)"
                elif node.op == 'log2':
                    if is_nonnegative_constant(node.left):
                        return f"np.log2({operand_str}+1e-8)" if is_zero_constant(node.left) else f"np.log2({operand_str}+1e-8)"
                    else:
                        return f"np.log2(np.abs({operand_str})+1e-8)"
                else:
                    # If operator is in mapping, use it
                    if node.op in mapping:
                        return f"{mapping[node.op]}({operand_str})"
                    else:
                        # Otherwise assume there exists a np.<op> function
                        return f"np.{node.op}({operand_str})"
            else:
                # Operator node without child (anomalous situation)
                raise ValueError("Operator node without operand.")
        else:
            # Leaf node: if it's a variable (string) return as is, otherwise return the constant
            if isinstance(node.value, str):
                return node.value
            else:
                return str(node.value)

    
    expr_str = node_to_str(root)
    func_str = (
        f"def {func_name}(x: np.ndarray) -> np.ndarray:  # mse: {mse:.4e}\n"
        f"    return {expr_str}\n"
    )
    return func_str



# main algorithm

In [68]:
import os
import glob


def genetic_algorithm(filepath: str):
    """
    Runs the genetic algorithm on all 'problem_#.npz' files present in the folder
    specified by `filepath` and saves the obtained functions in a Python file named 's323914.py'.
    
    The function writes a definition for each problem in the file, in the format:
    
        import numpy as np
        
        def f1(x: np.ndarray) -> np.ndarray:  # mse: 0.1234
            return <formula>
    
    Where the number in the function (f1, f2, ...) corresponds to the number extracted from the filename
    (e.g. "problem_1.npz") and the comment indicates the obtained fitness value (mse).
    """
    # Algorithm hyperparameters
    POPULATION_SIZE = 1500
    MAX_DEPTH = 7 
    GENERATIONS = 150
    TOURNAMENT_SIZE = 15
    ELITISM_RATE = 0.05
    ELITE_COUNT = max(1, int(POPULATION_SIZE * ELITISM_RATE))
    MUTATION_PROB = 0.5

    # Find and sort problem files
    problem_files = sorted(glob.glob(os.path.join(filepath, "problem_*.npz")))
    solutions = []

    # Process each problem file
    for prob_file in problem_files:
        # Extract problem number from filename
        try:
            prob_number = int(os.path.basename(prob_file).split('_')[1].split('.')[0])
        except (IndexError, ValueError):
            print(f"Non-conforming filename, skipping: {prob_file}")
            continue

        if prob_number != 2:
            continue

        print(f"Processing problem {prob_number}...")

        # Load and prepare data
        data = np.load(prob_file)
        x_train = data['x']
        y_train = data['y']
        N_VARIABLES = x_train.shape[0]

        # Initialize population
        population = [create_random_tree(0, MAX_DEPTH, N_VARIABLES) for _ in range(POPULATION_SIZE)]
        best_fitness = float('inf')
        best_tree = None
        no_improvement = 0

        # Evolution loop
        for generation in range(GENERATIONS):
            # Evaluate population
            fitness_scores = [calculate_fitness(ind, x_train, y_train) for ind in population]
            sorted_indices = np.argsort(fitness_scores)
            
            # Select elites
            elites = [population[i] for i in sorted_indices[:ELITE_COUNT]]
            
            # Update best solution
            if fitness_scores[sorted_indices[0]] < best_fitness:
                best_fitness = fitness_scores[sorted_indices[0]]
                best_tree = population[sorted_indices[0]]
                no_improvement = 0
            else:
                no_improvement += 1

            # Early stopping check
            if no_improvement > 20:
                print("No improvement for 20 consecutive generations. Early stopping!")
                break
            
            # Generate new population
            new_population = elites.copy()
            while len(new_population) < POPULATION_SIZE:
                # Tournament selection and breeding
                for _ in range(2):
                    if len(new_population) < POPULATION_SIZE:
                        p1 = tournament_selection(population, fitness_scores, TOURNAMENT_SIZE)
                        p2 = tournament_selection(population, fitness_scores, TOURNAMENT_SIZE)
                        child = crossover(p1, p2, n_variables=N_VARIABLES)
                        child = apply_mutation(child, mutation_prob=MUTATION_PROB, max_depth=MAX_DEPTH, n_variables=N_VARIABLES)
                        new_population.append(child)

            population = new_population

        print(f"Problem {prob_number} - Best individual: {best_tree} with fitness: {best_fitness:.4e}")
        solutions.append((prob_number, best_fitness, best_tree))

    # Write solutions to output file
    output_filename = "s323914.py"
    with open(output_filename, "w") as f:
        f.write("import numpy as np\n\n")
        for prob_number, best_fitness, best_code in sorted(solutions):
            code = function_to_string(best_code, best_fitness, f"f{prob_number}")
            f.write(code)
    print(f"Solutions saved in {output_filename}")


# train

In [69]:


genetic_algorithm(r"..\data")


Processing problem 2...
Problem 2 - Best individual: (cosh((exp(((x[1] + x[2]) - -0.21774089038211697)) * cosh(exp((x[0] - -5.534411412299692))))) * (((((((cosh(-8.543501863929627) - -8.256038024026491) - -8.543501863929627) - -9.228786531584895) - -8.543501863929627) - exp((((x[1] + x[2]) - -0.18699689876638592) - -2.093755963686612))) * x[0]) + ((((((cosh(-8.543501863929627) - -9.730753058026401) - exp(x[2])) - -8.256038024026491) - exp((((x[0] + x[0]) - 1.5379362597415547) - 0.190992556774237))) - exp(((x[1] + x[2]) - -0.3239359296775852))) * x[0]))) with fitness: 1.1565e+13
Solutions saved in s323914.py
