In [14]:
from typing import List, Tuple
import math
import random
import tqdm

In [2]:
def predict(alpha: float, beta: float, x_i: float) -> float:
    return beta * x_i + alpha

In [3]:
def error(alpha: float, beta: float, x_i: float, y_i: float) -> float:
    """el error de predecir beta x_i + alpha cuandop el valor real es y_i"""
    return predict(alpha, beta, x_i) - y_i

In [4]:
Vector = List[float]

def sum_of_sqerrors(alpha: float, beta: float, x: Vector, y: Vector) -> float:
    """la suma de los errores al cuadrado de predecir beta x_i + alpha cuando el valor real es y_i"""
    return sum(error(alpha, beta, x_i, y_i) ** 2 for x_i, y_i in zip(x, y))

In [5]:
def mean(xs: List[float]) -> float:
    return sum(xs) / len(xs)

def de_mean(xs: List[float]) -> List[float]:
    """Traduce xs restándole su media (para que la nueva media sea 0)"""
    x_bar = mean(xs)
    return [x - x_bar for x in xs]

def variance(xs: List[float]) -> float:
    """Casi la desviación media de los cuadrados de las desviaciones"""
    n = len(xs)
    deviations = de_mean(xs)
    return sum(d**2 for d in deviations) / (n - 1)

def standard_deviation(xs: List[float]) -> float:
    return math.sqrt(variance(xs))

def covariance(xs: List[float], ys: List[float]) -> float:
    n = len(xs)
    return sum(x_i * y_i for x_i, y_i in zip(de_mean(xs), de_mean(ys))) / (n - 1)

def correlation(xs: List[float], ys: List[float]) -> float:
    stdev_x = standard_deviation(xs)
    stdev_y = standard_deviation(ys)
    if stdev_x > 0 and stdev_y > 0:
        return covariance(xs, ys) / (stdev_x * stdev_y)
    else:
        return 0  # Si no hay variación, la correlación es cero

In [6]:
def least_squares_fit(x: Vector, y: Vector) -> Tuple[float, float]:
    """Dados dos vectores x e y, encontrar los valores de mínimos cuadrados de alfa y beta"""
    beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x)
    alpha = mean(y)-beta * mean(x)
    return alpha, beta

In [7]:
x = [i for i in range(-100, 110, 10)]
y = [3 * i-5 for i in x]
# deberia hallar que y = 3x - 5

assert least_squares_fit(x, y ) == (-5.0, 3.0)

In [9]:
num_friends_good = [49, 41, 40, 25, 21, 21, 19, 19, 18, 18, 16, 15, 15, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1]

daily_minutes_good = [68.75, 51.25, 52.08, 38.36, 44.54, 57.13, 51.4, 41.42, 31.25, 34.76, 54.01, 38.79, 47.59, 49.1, 27.66, 41.03, 36.73, 48.65, 28.12, 46.62, 35.57, 32.98, 35, 26.07, 23.77, 39.73, 40.57, 31.65, 31.21, 36.32, 20.45, 21.93, 26.02, 27.34, 23.49, 46.94, 30.5, 33.8, 24.23, 21.4, 27.94, 32.24, 40.57, 25.07, 19.42, 22.39, 18.42, 46.96, 23.72, 26.41, 26.97, 36.76, 40.32, 35.02, 29.47, 30.2, 31, 36.27, 18.89, 21.57, 27.57, 44.29, 27.91, 31, 33.06, 31.37, 22.26, 25.56, 25.14, 12.75, 31.91, 21.07, 13.58, 31.22, 18.89, 12.65, 17.51, 15, 13.99, 15.38, 4.97, 11.27, 16.1, 11.38, 12.74]

# Ahora tu función debería funcionar
alpha, beta = least_squares_fit(num_friends_good, daily_minutes_good)
#assert 22.9 < alpha < 23.0
#assert 0.9 < beta < 0.905

In [13]:
def total_sum_of_squares(y: Vector) -> float:
    """la suma total de los cuadrados de las desviaciones de y_i de su media"""
    return sum(v ** 2 for v in de_mean(y))

def r_squared(alpha: float, beta: float, x: Vector, y: Vector) -> float:
    """la proporción de la varianza en y explicada por x"""
    return 1.0 - (sum_of_sqerrors(alpha, beta, x, y) / total_sum_of_squares(y))

rsq = r_squared(alpha, beta, num_friends_good, daily_minutes_good)

In [20]:
def gradient_step(v: Vector, gradient: Vector, step_size: float) -> Vector:
    """Se mueve 'step_size' en la dirección del gradiente desde 'v'"""
    step = [step_size * g_i for g_i in gradient]
    return [v_i + s_i for v_i, s_i in zip(v, step)]

In [25]:
num_epochs = 10000
random.seed(0)
guess = [random.random(), random.random()]
learning_rate = 0.00001

# 2. Iteramos sobre range() y usamos 't' para la descripción
with tqdm.trange(num_epochs) as t:
    for _ in t:
        alpha, beta = guess
        
        # Derivada parcial respecto a alpha
        grad_a = sum(2 * error(alpha, beta, x_i, y_i) 
                     for x_i, y_i in zip(num_friends_good, daily_minutes_good))
        
        # Derivada parcial respecto a beta
        grad_b = sum(2 * error(alpha, beta, x_i, y_i) * x_i 
                     for x_i, y_i in zip(num_friends_good, daily_minutes_good))
        
        # Actualizamos la descripción de la barra de progreso
        loss = sum_of_sqerrors(alpha, beta, num_friends_good, daily_minutes_good)
        t.set_description(f"loss: {loss:.3f}")
        
        # 3. Actualizamos la conjetura usando el paso de gradiente
        guess = gradient_step(guess, [grad_a, grad_b], -learning_rate)

# Resultados finales
alpha, beta = guess
print(f"\nAlpha: {alpha:.4f}, Beta: {beta:.4f}")

loss: 5888.364: 100%|██████████| 10000/10000 [00:17<00:00, 563.91it/s]


Alpha: 21.3155, Beta: 1.0184



