In [13]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import resample  # solo para bootstrap

np.random.seed(42)

# Datos complejos (para ver reduccion de variance)
def true_function(x):
    return np.sin(6 * np.pi * x) + np.cos(4 * np.pi * x) + 0.3 * x**3

N = 200
x = np.sort(np.random.uniform(-2, 2, N))
y = true_function(x) + np.random.normal(0, 0.5, N)

x_test = np.linspace(-2, 2, 300)
y_true = true_function(x_test)

In [None]:
def regression_tree_fit(x, y, max_depth=10, min_samples_split=5):
    def find_best_split(x, y):
        best_split = None
        best_rss = np.inf
        for split in np.unique(x):
            left = x < split
            right = ~left
            if np.sum(left) < min_samples_split or np.sum(right) < min_samples_split:
                continue
            rss_left = np.sum((y[left] - np.mean(y[left]))**2)
            rss_right = np.sum((y[right] - np.mean(y[right]))**2)
            rss_total = rss_left + rss_right
            if rss_total < best_rss:
                best_rss = rss_total
                best_split = split
        return best_split

    def build_tree(x, y, depth=0):
        if depth >= max_depth or len(y) < min_samples_split:
            return np.mean(y)
        split = find_best_split(x, y)
        if split is None:
            return np.mean(y)
        left = x < split
        right = ~left
        return {
            'split': split,
            'left': build_tree(x[left], y[left], depth + 1),
            'right': buid_tree(x[right], y[right], depth + 1)
        }

    return build_tree(x, y)
def predict_tree(tree, x):
    if not isinstance(tree, dict):
        return tree
    return predict_tree(tree['left'], x) if x < tree['split'] else predict_tree(tree['right'], x)  

In [15]:
B = 50  # número de árboles (más = mejor reducción de variance)

y_pred_bag = np.zeros(len(x_test))
trees = []

for b in range(B):
    x_boot, y_boot = resample(x, y, replace=True)
    tree_b = regression_tree_fit(x_boot, y_boot, max_depth=10)
    trees.append(tree_b)
    y_pred_b += np.array([predict_tree(tree_b, xi) for xi in x_test])

y_pred_bag /= B  # promedio

plt.figure(figsize=(10, 6))
plt.scatter(x, y, color='blue', alpha=0.4, label='Datos')
plt.plot(x_test, y_true, 'g--', label='Verdadera')
plt.plot(x_test, y_pred_bag, 'r-', linewidth=2, label=f'Bagging ({B} árboles, depth=10)')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Bagging reduce varianza (Capítulo 7)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

NameError: name 'rigth' is not defined