In [6]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold

np.random.seed(42)

# Datos sinteticos (regresion lineal + ruido)
N = 100
x = np.random.uniform(0, 10, N)
beta0_true, beta1_true = 2.0, 1.5
y = beta0_true + beta1_true * x + np.random.normal(0, 2.0, N)

# Matriz de dise√±o
X = np.column_stack((np.ones(N), x))

# Funcion OLS simple
def ols_fit(X, y):
    return np.linalg.lstsq(X, y, rcond=None)[0]

In [7]:
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)

mse_cv = []

for train_idx, test_idx in kf.split(X):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx,], y[test_idx]

    betas = ols_fit(X_train, y_train)
    y_pred  = X_test @ betas
    mse = np.mean((y_test - y_pred)**2)
    mse_cv.append(mse)

mse_cv_mean = np.mean(mse_cv)
print(f"MSE promedio por {k}-fold CV: {mse_cv_mean:.3f}")    

MSE promedio por 5-fold CV: 3.346


In [8]:
# LOOCV (leave-One-Out)
mse_loocv = []
for i in range(N):
    X_train = np.delete(X, i, axis=0)
    y_train = np.delete(y, i)
    betas = ols_fit(X_train, y_train)
    y_pred = X[i] @ betas
    mse_loocv.append((y[i] - y_pred)**2)

print(f"MSE promedio por LOOCV: {np.mean(mse_loocv):.3f}")

# Bootstrap (1000 resamples)
n_boot = 1000
mse_boot =[]
for _ in range(n_boot):
    idx = np.random.choice(N, N, replace=True)
    X_boot = X[idx]
    y_boot = y[idx]
    betas = ols_fit(X_boot, y_boot)
    y_pred = X @ betas  # predict on original data
    mse_boot.append(np.mean((y - y_pred)**2))

print(f"MSE promedio por Bootstrap: {np.mean(mse_boot):.3f}")    

MSE promedio por LOOCV: 3.356
MSE promedio por Bootstrap: 3.288
