In [25]:
import urllib.request

import math
import numpy as np

from keras.models import Model
from keras.layers import Embedding, Flatten, Input, Dense, Concatenate, Dot

from funciones import preprocesar_dataframe_animes
np.random.seed = 42

In [26]:
(train_df, test_df, 
 ratings_train_matrix, ratings_test_matrix,
 NUM_USERS, NUM_ANIMES, MIN_RATING, MAX_RATING, SCORES) = \
    preprocesar_dataframe_animes(
    dataframe_path='csv/rating.csv',
    n_user_ratings=5,
    n_anime_ratings=5,
    num_users=1000,
    test_size=0.2, 
    RANDOM_STATE=42 
    )

Cargamos los votos de entrenamiento. La carga de estos datos, por imposición de `keras` no se hace en una matriz como en los modelos de factorización matricial. Se generan dos *arrays* con los códigos de los usuarios y los ítems y un tercer *array* con las votaciones:

In [27]:
X_train = [np.array([], dtype=int), np.array([], dtype=int)]
y_train = np.array([], dtype=int)

In [28]:
for u in range(NUM_USERS):
  for i in range(NUM_ANIMES):
    if ratings_train_matrix[u][i] != None:
        X_train[0] = np.append(X_train[0], int(u))
        X_train[1] = np.append(X_train[1], int(i))
        y_train = np.append(y_train, int(ratings_train_matrix[u][i]))

Cargamos también los votos de test del mismo modo:

In [29]:
X_test = [np.array([], dtype=int), np.array([], dtype=int)]
y_test = np.array([], dtype=int)

for u in range(NUM_USERS):
  for i in range(NUM_ANIMES):
    if ratings_test_matrix[u][i] != None:
        X_test[0] = np.append(X_test[0], int(u))
        X_test[1] = np.append(X_test[1], int(i))
        y_test = np.append(y_test, int(ratings_test_matrix[u][i]))

Los hiper-parámetros de nuestro modelo serán el número de factores latentes (`latent_dim`) y el número de iteraciones del entrenamiento (`epochs`).

In [30]:
latent_dim = 7
epochs = 5

Definimos la arquitectura, en este caso con tres capas densas para el MLP .



In [None]:
user_input = Input(shape=[1])
user_embedding = Embedding(NUM_USERS, latent_dim)(user_input)
user_vec = Flatten()(user_embedding)

item_input = Input(shape=[1])
item_embedding = Embedding(NUM_ANIMES, latent_dim)(item_input)
item_vec = Flatten()(item_embedding)

concat = Concatenate(axis=1)([user_vec, item_vec])
d1 = Dense(32, activation='relu')(concat)
d2 = Dense(16, activation='relu')(d1)
d3 = Dense(8, activation='relu')(d2)
output = Dense(1, activation='relu')(d3)

MLP = Model([user_input, item_input], output)

In [48]:
MLP.compile(optimizer='adam', metrics=['mae'], loss='mean_squared_error')
MLP.summary()
MLP.fit(X_train, y_train, epochs=epochs, verbose=1)

Epoch 1/5
[1m2528/2528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 22.3062 - mae: 3.5645
Epoch 2/5
[1m2528/2528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 1.4517 - mae: 0.9212
Epoch 3/5
[1m2528/2528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 1.4206 - mae: 0.9101
Epoch 4/5
[1m2528/2528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 1.3793 - mae: 0.8955
Epoch 5/5
[1m2528/2528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 1.3664 - mae: 0.8914


<keras.src.callbacks.history.History at 0x1b9c6afa4b0>

In [42]:
y_pred = MLP.predict(X_test)

y_pred = np.clip(y_pred, MIN_RATING, MAX_RATING)
y_pred = np.round(y_pred).astype(int)
y_pred

[1m632/632[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step


array([[9],
       [9],
       [9],
       ...,
       [6],
       [6],
       [6]])

In [44]:
# Métricas de clasificación
# LATENT DIMS = 7
# EPOCHS = 50
# LAYERS = 2 (20,10)
#     n_user_ratings=5
#    n_anime_ratings=5
from sklearn.metrics import precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error

print("Mean Absolute Error", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error", mean_squared_error(y_test, y_pred))
print("Precision Score:", precision_score(y_test, y_pred, average='weighted'))
print("Recall Score:", recall_score(y_test, y_pred, average='weighted'))
print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))

Mean Absolute Error 0.9111342145650775
Mean Squared Error 1.5529481657507798
Precision Score: 0.40122167782405976
Recall Score: 0.3397692955096787
F1 Score: 0.32010929068013444


In [None]:
# Métricas de clasificación
# LATENT DIMS = 7
# EPOCHS = 50
# LAYERS = 2 (20,10)
#     n_user_ratings=5
#    n_anime_ratings=5
from sklearn.metrics import precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error

print("Mean Absolute Error", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error", mean_squared_error(y_test, y_pred))
print("Precision Score:", precision_score(y_test, y_pred, average='weighted'))
print("Recall Score:", recall_score(y_test, y_pred, average='weighted'))
print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))

Mean Absolute Error 0.9336942485348168
Mean Squared Error 1.7528061984702494
Precision Score: 0.36185198349624476
Recall Score: 0.35219032482368134
F1 Score: 0.3469808118791166


In [None]:
# Métricas de clasificación
# LATENT DIMS = 5
# EPOCHS = 10
# LAYERS = 2 (20,10)
from sklearn.metrics import precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error

print("Mean Absolute Error", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error", mean_squared_error(y_test, y_pred))
print("Precision Score:", precision_score(y_test, y_pred, average='weighted'))
print("Recall Score:", recall_score(y_test, y_pred, average='weighted'))
print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))

Mean Absolute Error 0.8696767954773682
Mean Squared Error 1.528359430207907
Precision Score: 0.39503548763397844
Recall Score: 0.37218730241380593
F1 Score: 0.3617941294297779


In [None]:
# Métricas de clasificación
# LATENT DIMS = 5
# EPOCHS = 10
# LAYERS = 2 (20,10)
from sklearn.metrics import precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error

print("Mean Absolute Error", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error", mean_squared_error(y_test, y_pred))
print("Precision Score:", precision_score(y_test, y_pred, average='weighted'))
print("Recall Score:", recall_score(y_test, y_pred, average='weighted'))
print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))

Mean Absolute Error 0.8393892681494531
Mean Squared Error 1.4003523452983924
Precision Score: 0.4053295391904056
Recall Score: 0.37216472142699847
F1 Score: 0.36303433669826013


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Al final el mejor modelo ha sido este: 

 - LATENT DIMS = 5
 - EPOCHS = 10
 - LAYERS = 3 (32,16,8)

In [None]:
# Métricas de clasificación
# LATENT DIMS = 5
# EPOCHS = 10
# LAYERS = 3 (32,16,8)
from sklearn.metrics import precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error

print("Mean Absolute Error", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error", mean_squared_error(y_test, y_pred))
print("Precision Score:", precision_score(y_test, y_pred, average='weighted'))
print("Recall Score:", recall_score(y_test, y_pred, average='weighted'))
print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))

Mean Absolute Error 0.8428760184981282
Mean Squared Error 1.40883065404096
Precision Score: 0.4065756563743282
Recall Score: 0.37209131615650004
F1 Score: 0.36495205357786786


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
from sklearn.metrics import ndcg_score
y_pred_classes = np.argmax(y_pred, axis=1)
# Calcular NDCG para cada clase y promediar los resultados
ndcg_scores = []
for class_label in range(6):
    y_test_class = (y_test == class_label).astype(int)
    y_pred_class = (y_pred_classes == class_label).astype(int)
    ndcg_class = ndcg_score([y_test_class], [y_pred_class])
    ndcg_scores.append(ndcg_class)

# Calcular el promedio de los NDCG scores para todas las clases
average_ndcg = np.mean(ndcg_scores)

print("Average NDCG Score:", average_ndcg)

Average NDCG Score: 0.35265568415735427


También hemos probado algunos con más capas pero tampoco cambiaban mucho las cosas.

In [None]:
# Métricas de clasificación
# LATENT DIMS = 5
# EPOCHS = 10
# LAYERS = 4 (64,32,16,8)
from sklearn.metrics import precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error

print("Mean Absolute Error", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error", mean_squared_error(y_test, y_pred))
print("Precision Score:", precision_score(y_test, y_pred, average='weighted'))
print("Recall Score:", recall_score(y_test, y_pred, average='weighted'))
print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))

Mean Absolute Error 0.8568964251633268
Mean Squared Error 1.4248330030096161
Precision Score: 0.4214430733843706
Recall Score: 0.36137414666373047
F1 Score: 0.3440499068992255


In [None]:
# Métricas de clasificación
# LATENT DIMS = 64
# EPOCHS = 10
# LAYERS = 4 (64,32,16,8)
from sklearn.metrics import precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error

print("Mean Absolute Error", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error", mean_squared_error(y_test, y_pred))
print("Precision Score:", precision_score(y_test, y_pred, average='weighted'))
print("Recall Score:", recall_score(y_test, y_pred, average='weighted'))
print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))

Mean Absolute Error 0.9120971885781399
Mean Squared Error 1.6058504000587241
Precision Score: 0.3690601854335305
Recall Score: 0.3469500110107906
F1 Score: 0.34070124313941613


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
