In [12]:
import numpy as np
import random

# Generazione dei Dati
def generate_data(test_size=10_000, train_size=1_000):
    """
    Genera i dati di input x e output y utilizzando una funzione vera (true_f),
    salva i dati di train in un file .npz.
    """
    # Funzione "vera" per generare y
    def true_f(x: np.ndarray) -> np.ndarray:
        return x[0] + np.sin(x[1]) / 5  # Cambia questa funzione se necessario

    # Generazione di x_validation
    x_validation = np.vstack(
        [
            np.random.random_sample(size=test_size) * 2 * np.pi - np.pi,  # Range [-π, π]
            np.random.random_sample(size=test_size) * 2 - 1,  # Range [-1, 1]
        ]
    )
    y_validation = true_f(x_validation)

    # Suddivisione in train e test
    train_indexes = np.random.choice(test_size, size=train_size, replace=False)
    x_train = x_validation[:, train_indexes]
    y_train = y_validation[train_indexes]

    # Verifica
    assert np.all(y_train == true_f(x_train)), "Errore nei dati generati!"

    # Salvataggio nel file .npz
    np.savez("problem_0.npz", x=x_train, y=y_train)
    print("Dati generati e salvati in 'problem_0.npz'")


In [36]:
import numpy as np
import random

# Costanti e Operatori
FUNCTIONS = ['+', '-', '*', '/', '**']
UNARY_FUNCTIONS = ['sin', 'cos', 'exp', 'log']
VARIABLES = ['x[0]', 'x[1]']
CONSTANTS = ['0', '1', '0.5', '2']  # Convert to strings

class Node:
    def __init__(self, value, children=None):
        # Advanced type conversion
        if isinstance(value, (int, float)):
            value = str(value)
        
        # Comprehensive validation
        if (value not in VARIABLES and 
            value not in CONSTANTS and 
            value not in FUNCTIONS and 
            value not in UNARY_FUNCTIONS):
            print(f"WARNING: Unexpected value: {value}")
            print(f"Valid VARIABLES: {VARIABLES}")
            print(f"Valid CONSTANTS: {CONSTANTS}")
            print(f"Valid FUNCTIONS: {FUNCTIONS}")
            print(f"Valid UNARY_FUNCTIONS: {UNARY_FUNCTIONS}")
            raise ValueError(f"Invalid node value: {value}")
        
        self.value = value
        self.children = children or []

    def evaluate(self, x):
        if self.value in VARIABLES:  # Variabili (e.g., x[0], x[1])
            return eval(self.value)
        elif self.value in CONSTANTS:  # Costanti
            return float(self.value)
        elif self.value in FUNCTIONS:  # Operatori binari (+, -, *, /
            left_val = self.children[0].evaluate(x)
            right_val = self.children[1].evaluate(x)
            if self.value == '/' and right_val == 0:  # Gestione divisione per zero
                return 1.0
            return eval(f"{left_val} {self.value} {right_val}")
        elif self.value in UNARY_FUNCTIONS:  # Funzioni unarie (sin, cos)
            operand = self.children[0].evaluate(x)
            return eval(f"np.{self.value}({operand})")
        else:
            raise ValueError(f"Valore del nodo sconosciuto: {self.value}")

    def to_numpy(self):
        """Converte l'albero in una stringa compatibile con NumPy."""
        if self.value in VARIABLES:
            return self.value
        elif self.value in CONSTANTS:
            return str(self.value)
        elif self.value in FUNCTIONS:
            return f"({self.children[0].to_numpy()} {self.value} {self.children[1].to_numpy()})"
        elif self.value in UNARY_FUNCTIONS:
            return f"np.{self.value}({self.children[0].to_numpy()})"

In [37]:

# Fitness
def fitness(tree, x, y):
    predictions = np.array([tree.evaluate(xi) for xi in x.T])
    return np.mean((predictions - y) ** 2)

# Main Loop Genetic Programming
def genetic_programming(x, y, population_size=50, generations=100):
    # Inizializzazione popolazione
    population = []
    for _ in range(population_size):
        tree = generate_random_tree()
        
        # Debugging: Print out each tree's nodes
        print("\nNew Tree Nodes:")
        for node in tree_to_list(tree):
            print(f"Node value: {node.value}")
            if node.value not in VARIABLES + CONSTANTS + FUNCTIONS + UNARY_FUNCTIONS:
                print(f"INVALID NODE: {node.value}")
                print("VARIABLES:", VARIABLES)
                print("CONSTANTS:", CONSTANTS)
                print("FUNCTIONS:", FUNCTIONS)
                print("UNARY_FUNCTIONS:", UNARY_FUNCTIONS)
    for tree in population:
        for node in tree_to_list(tree):  # Definisci una funzione che converte un albero in lista di nodi
            assert node.value in VARIABLES + CONSTANTS + FUNCTIONS + UNARY_FUNCTIONS, f"Valore non valido: {node.value}"

    
    for generation in range(generations):
        # Calcolo fitness
        fitness_scores = [fitness(tree, x, y) for tree in population]
        
        # Miglior individuo
        best_index = np.argmin(fitness_scores)
        best_tree = population[best_index]
        print(f"Generation {generation}: Best fitness = {fitness_scores[best_index]:.6f}")
        
        # Selezione e riproduzione
        new_population = []
        for _ in range(population_size // 2):
            parent1, parent2 = random.choices(population, weights=[1 / (f + 1) for f in fitness_scores], k=2)
            child1, child2 = crossover(parent1, parent2)
            new_population.extend([mutate(child1), mutate(child2)])
        
        population = new_population
    
    return best_tree

# Crossover (semplificato)
def crossover(parent1, parent2):
    # Scambia sottoalberi tra i due genitori
    return parent1, parent2  # Da implementare

# Mutazione
def mutate(tree, mutation_rate=0.1):
    if random.random() < mutation_rate:
        return generate_random_tree()
    return tree


In [38]:
def tree_to_list(tree):
    """
    Recursively converts a tree to a list of nodes.
    
    Args:
        tree (Node): The root node of the tree
    
    Returns:
        list: A list of all nodes in the tree
    """
    nodes = [tree]
    
    for child in tree.children:
        nodes.extend(tree_to_list(child))
    
    return nodes

def crossover(parent1, parent2):
    """
    Perform crossover between two parent trees by swapping random subtrees.
    
    Args:
        parent1 (Node): First parent tree
        parent2 (Node): Second parent tree
    
    Returns:
        tuple: Two child trees
    """
    # Create a deep copy of the trees to avoid modifying original trees
    def copy_tree(node):
        if not node.children:
            return Node(node.value)
        return Node(node.value, [copy_tree(child) for child in node.children])
    
    child1, child2 = copy_tree(parent1), copy_tree(parent2)
    
    # Get lists of nodes for both trees
    nodes1 = tree_to_list(child1)
    nodes2 = tree_to_list(child2)
    
    # Randomly select subtrees to swap
    if nodes1 and nodes2:
        subtree1 = random.choice(nodes1)
        subtree2 = random.choice(nodes2)
        
        # Swap subtrees
        subtree1.value, subtree2.value = subtree2.value, subtree1.value
        subtree1.children, subtree2.children = subtree2.children, subtree1.children
    
    return child1, child2

In [29]:
# Costanti e Operatori
FUNCTIONS = ['+', '-', '*', '/', '**']
UNARY_FUNCTIONS = ['sin', 'cos', 'exp', 'log']
VARIABLES = ['x[0]', 'x[1]']
CONSTANTS = [0, 1, 0.5, 2]

In [33]:


# Esecuzione
generate_data()
problem = np.load("problem_0.npz")
x_train = problem['x']
y_train = problem['y']

best_tree = genetic_programming(x_train, y_train)

# Esportazione in un file .py
formula = best_tree.to_numpy()
with open("s323914.py", "w") as f:
    f.write("import numpy as np\n")
    f.write(f"def f(x: np.ndarray) -> np.ndarray:\n    return {formula}\n")

Dati generati e salvati in 'problem_0.npz'

New Tree Nodes:
Node value: /
Node value: +
Node value: /
Node value: x[1]
Node value: 0.5
INVALID NODE: 0.5
VARIABLES: ['x[0]', 'x[1]']
CONSTANTS: [0, 1, 0.5, 2]
FUNCTIONS: ['+', '-', '*', '/', '**']
UNARY_FUNCTIONS: ['sin', 'cos', 'exp', 'log']
Node value: /
Node value: x[1]
Node value: 2
INVALID NODE: 2
VARIABLES: ['x[0]', 'x[1]']
CONSTANTS: [0, 1, 0.5, 2]
FUNCTIONS: ['+', '-', '*', '/', '**']
UNARY_FUNCTIONS: ['sin', 'cos', 'exp', 'log']
Node value: cos
Node value: -
Node value: 2
INVALID NODE: 2
VARIABLES: ['x[0]', 'x[1]']
CONSTANTS: [0, 1, 0.5, 2]
FUNCTIONS: ['+', '-', '*', '/', '**']
UNARY_FUNCTIONS: ['sin', 'cos', 'exp', 'log']
Node value: 0
INVALID NODE: 0
VARIABLES: ['x[0]', 'x[1]']
CONSTANTS: [0, 1, 0.5, 2]
FUNCTIONS: ['+', '-', '*', '/', '**']
UNARY_FUNCTIONS: ['sin', 'cos', 'exp', 'log']

New Tree Nodes:
Node value: sin
Node value: /
Node value: -
Node value: 2
INVALID NODE: 2
VARIABLES: ['x[0]', 'x[1]']
CONSTANTS: [0, 1, 0.5, 2]

ValueError: attempt to get argmin of an empty sequence

## Evaluation

In [None]:
problem = np.load('problem_0.npz')
x = problem['x']
y = problem['y']
x.shape
print(f"MSE (train): {100*np.square(y_train-d3584.f(x_train)).sum()/len(y_train):g}")
print(f"MSE (real) : {100*np.square(y_validation-d3584.f(x_validation)).sum()/len(y_validation):g}")