In [3]:

import pandas as pd
import urllib.request
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Lectura de datos y normalización
url = 'http://lib.stat.cmu.edu/datasets/boston'
filename = 'boston_housing.csv'
urllib.request.urlretrieve(url, filename)

df = pd.read_csv(filename, delimiter='', header=None)

scaler = StandardScaler()
X = scaler.fit_transform(df.iloc[:, :-1])
y = df.iloc[:, -1]

# Agregamos una columna de unos para el término de sesgo (bias)
X = np.hstack((np.ones((X.shape[0], 1)), X))

# División de los datos
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Funciones de costo y gradiente
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def cost(theta, X, y, reg_factor):
    m = len(y)
    h = sigmoid(X @ theta)
    cost = (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
    reg_term = reg_factor / (2 * m) * np.sum(theta[1:]**2)
    return cost + reg_term

def gradient(theta, X, y, reg_factor):
    m = len(y)
    h = sigmoid(X @ theta)
    grad = (1 / m) * X.T @ (h - y)
    reg_term = (reg_factor / m) * np.concatenate(([0], theta[1:]))
    return grad + reg_term

# Función de entrenamiento
def train(X_train, y_train, reg_factor, alpha, num_iters):
    m, n = X_train.shape
    theta = np.zeros(n)
    costs = []
    for i in range(num_iters):
        cost_i = cost(theta, X_train, y_train, reg_factor)
        grad_i = gradient(theta, X_train, y_train, reg_factor)
        theta -= alpha * grad_i
        costs.append(cost_i)
    return theta, costs

# Experimento 1
theta, costs = train(X_train, y_train, reg_factor=0, alpha=0.1, num_iters=1000)

y_pred = sigmoid(X_test @ theta)
mse = ((y_pred - y_test)**2).mean()
print('Experimento 1 - Error cuadrático medio:', mse)

plt.plot(costs)
plt.xlabel('Iteraciones')
plt.ylabel('Costo')
plt.title('Experimento 1 - Costo en función de las iteraciones')
plt.show()

# Experimento 2
theta, costs = train(X_train, y_train, reg_factor=0.1, alpha=0.1, num_iters=1000)

y_pred = sigmoid(X_test @ theta)
mse = ((y_pred - y_test)**2).mean()
print('Experimento 2 - Error cuadrático medio:', mse)

plt.plot(costs)
plt.xlabel('Iteraciones')
plt.ylabel('Costo')
plt.title('Experimento 2 - Costo en función de las iteraciones')
plt.show()

# Experimento 3
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

theta, costs = train(X_test, y_test, reg_factor=0, alpha=0.1, num_iters=1000)

y_pred = sigmoid(X_train @ theta)
mse = ((y_pred - y_train)**2).mean()
print('Experimento 3 - Error cuadrático medio:', mse)

plt.plot(costs)
plt.xlabel('Iteraciones')
plt.ylabel('Costo')
plt.title('Experimento 3 - Costo en función de las iteraciones')
plt.show()

ValueError: ignored