### 1. Importamos librerías y cargamos los datos procesados

In [1]:
import os
import random
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error

processed_dir = 'data/processed'
train_df = pd.read_csv(os.path.join(processed_dir, 'train.csv'))
test_df  = pd.read_csv(os.path.join(processed_dir, 'test.csv'))


### 2. Declaramos nuestros hiperparámetros

In [2]:
NUM_FACTORS    = 7       # f
LEARNING_RATE  = 0.001   # γ
REGULARIZATION = 0.1     # λ
NUM_ITERATIONS = 20      # epochs
MIN_RATING, MAX_RATING = 1.0, 5.0

In [3]:
# 3. Construcción de R como lista de listas (0 donde no hay rating)
num_users = int(max(train_df.user_id.max(), test_df.user_id.max()))
num_items = int(max(train_df.book_id.max(), test_df.book_id.max()))
R = [[0.0]*num_items for _ in range(num_users)]
for _, row in train_df.iterrows():
    u = int(row.user_id) - 1
    i = int(row.book_id) - 1
    R[u][i] = float(row.rating)

# 4. Inicialización de P y Q con uniformes en [0,1]
random.seed(42)
P = [[random.random() for _ in range(NUM_FACTORS)] for _ in range(num_users)]
Q = [[random.random() for _ in range(NUM_FACTORS)] for _ in range(num_items)]

# 5. Entrenamiento por SGD iterativo
for it in range(NUM_ITERATIONS):
    print(f"Iteración {it+1} de {NUM_ITERATIONS}")
    # Clonamos para updates en batch
    updated_P = [row.copy() for row in P]
    updated_Q = [row.copy() for row in Q]

    for u in range(num_users):
        for i in range(num_items):
            r_ui = R[u][i]
            if r_ui == 0:
                continue
            # Predicción y error
            pred = sum(P[u][k] * Q[i][k] for k in range(NUM_FACTORS))
            e = r_ui - pred

            # Actualizar factores
            for k in range(NUM_FACTORS):
                p_uk = P[u][k]
                q_ik = Q[i][k]
                grad_p = e * q_ik - REGULARIZATION * p_uk
                grad_q = e * p_uk - REGULARIZATION * q_ik
                updated_P[u][k] += LEARNING_RATE * grad_p
                updated_Q[i][k] += LEARNING_RATE * grad_q

    P, Q = updated_P, updated_Q

# 6. Evaluación en test
y_true, y_pred = [], []
for _, row in test_df.iterrows():
    u = int(row.user_id) - 1
    i = int(row.book_id) - 1
    y_true.append(row.rating)
    pred = sum(P[u][k] * Q[i][k] for k in range(NUM_FACTORS))
    y_pred.append(float(np.clip(pred, MIN_RATING, MAX_RATING)))

rmse = np.sqrt(mean_squared_error(y_true, y_pred))
mae  = mean_absolute_error(y_true, y_pred)
print(f"\nPMF → RMSE: {rmse:.4f}, MAE: {mae:.4f}")


KeyboardInterrupt: 