## Bài 6: Genetic Programming (GP)

### Cài đặt thuật toán tìm biểu thức gần đúng hàm f(x)=x^2+sin⁡(x)

In [3]:
import operator, math, random
import numpy as np
funcs = [operator.add, operator.sub, operator.mul]
unary_funcs = [math.sin, math.cos]
terminals = ["x"]
class Node:
    def __init__(self, func=None, children=None, value=None):
        self.func = func
        self.children = children or []
        self.value = value
    def eval(self, x):
        if self.value is not None:
            if self.value == "x":
                return x
            return self.value
        elif self.func in funcs:
            return self.func(self.children[0].eval(x), self.children[1].eval(x))
        else:
            return self.func(self.children[0].eval(x))
    
    def copy(self):
        if self.value is not None:
            return Node(value=self.value)
        return Node(self.func, [c.copy() for c in self.children])
def generate_tree(depth=3):
    if depth == 0 or (random.random() < 0.3):
        if random.random() < 0.7:
            return Node(value="x")
        else:
            return Node(value=random.uniform(-2, 2))
    if random.random() < 0.5:
        f = random.choice(funcs)
        return Node(f, [generate_tree(depth-1), generate_tree(depth-1)])
    else:
        f = random.choice(unary_funcs)
        return Node(f, [generate_tree(depth-1)])


def fitness(tree, xs, target_fn):
    preds = [tree.eval(x) for x in xs]
    target = [target_fn(x) for x in xs]
    return np.mean((np.array(preds) - np.array(target))**2)


def crossover(t1, t2):
    child1, child2 = t1.copy(), t2.copy()
    if not child1.children or not child2.children:
        return child1, child2

    i = random.randrange(len(child1.children))
    j = random.randrange(len(child2.children))

    child1.children[i], child2.children[j] = child2.children[j].copy(
    ), child1.children[i].copy()
    return child1, child2


def mutate(tree):
    if random.random() < 0.2:
        return generate_tree(2)
    if tree.children:
        tree.children = [mutate(c) for c in tree.children]
    return tree


def f(x):
    return x**2 + math.sin(x)


def GP(pop_size=30, gens=20):
    xs = np.linspace(-2, 2, 20)
    pop = [generate_tree(3) for _ in range(pop_size)]
    for g in range(gens):
        fits = [fitness(ind, xs, f) for ind in pop]
        best_idx = np.argmin(fits)
        print(f"Gen {g+1}, Best Fitness: {fits[best_idx]:.6f}")
        best = pop[best_idx]
        new_pop = [best.copy()]
        while len(new_pop) < pop_size:
            i1, i2 = random.sample(range(pop_size), 2)
            p1 = pop[i1] if fits[i1] < fits[i2] else pop[i2]
            i3, i4 = random.sample(range(pop_size), 2)
            p2 = pop[i3] if fits[i3] < fits[i4] else pop[i4]
            c1, c2 = crossover(p1, p2)
            new_pop.extend([mutate(c1), mutate(c2)])
        pop = new_pop[:pop_size]

    return best

In [4]:
best_tree = GP()
print("\nBest tree approximates f(x) = x^2 + sin(x)")

Gen 1, Best Fitness: 1.242331
Gen 2, Best Fitness: 0.241918
Gen 3, Best Fitness: 0.241918
Gen 4, Best Fitness: 0.241918
Gen 5, Best Fitness: 0.241918
Gen 6, Best Fitness: 0.241918
Gen 7, Best Fitness: 0.241918
Gen 8, Best Fitness: 0.241918
Gen 9, Best Fitness: 0.241918
Gen 10, Best Fitness: 0.094383
Gen 11, Best Fitness: 0.094383
Gen 12, Best Fitness: 0.094383
Gen 13, Best Fitness: 0.094383
Gen 14, Best Fitness: 0.094383
Gen 15, Best Fitness: 0.094383
Gen 16, Best Fitness: 0.094383
Gen 17, Best Fitness: 0.094383
Gen 18, Best Fitness: 0.094383
Gen 19, Best Fitness: 0.000000
Gen 20, Best Fitness: 0.000000

Best tree approximates f(x) = x^2 + sin(x)
