In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

np.random.seed(42)

N = 100
x = np.random.uniform(0, 10, N)
y = 2.0 + 1.5 * x + 0.1 * x*2 - 0.01 * x*3 + np.random.normal(0, 3.0, N)

# Matriz de diseño para grados 1 a 5
grados = range(1, 6)
X_list = []
for d in grados:
    X_d = np.column_stack([x**k for k in range(d + 1)])
    X_list.append(X_d)

In [3]:
def aic_bic(n, rss, k):
    aic = n * np.log(rss / n) + 2 * k
    bic = n * np.log(rss / n) + np.log(n) * k
    return aic, bic

aic_values = []
bic_values = []

for X_d in X_list:
    betas = np.linalg.lstsq(X_d, y, rcond=None)[0]
    y_pred = X_d @ betas
    rss = np.sum((y - y_pred)**2)
    k = X_d.shape[1]  # número de parámetros
    aic, bic = aic_bic(N, rss, k)
    aic_values.append(aic)
    bic_values.append(bic)

print("Grado | AIC | BIC")
for d, a, b in zip(grados, aic_values, bic_values):
    print(f"{d}     | {a:.1f} | {b:.1f}")

best_aic = np.argmin(aic_values) + 1
best_bic = np.argmin(bic_values) + 1
print(f"\nMejor grado por AIC: {best_aic}")
print(f"Mejor grado por BIC: {best_bic}")

Grado | AIC | BIC
1     | 202.2 | 207.4
2     | 200.5 | 208.3
3     | 201.9 | 212.3
4     | 202.7 | 215.8
5     | 204.5 | 220.1

Mejor grado por AIC: 2
Mejor grado por BIC: 1
