## Cargar Librerías

In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras

## Cargar Datasets

In [2]:
train_data = pd.read_csv('train_dataset.csv')
test_data = pd.read_csv('test_dataset.csv')

## Datos Evaluación

In [3]:
# Separar los datos de evaluación
X_test_user = test_data['user_index']
X_test_book = test_data['book_index']
X_test_features = test_data.drop(['user_index', 'book_index', 'normalized_rating', 'user_id', 'book_id'], axis=1)
y_test = test_data['normalized_rating']

## Datos Entrenamiento

In [4]:
# Separar los datos de entrenamiento
X_train_user = train_data['user_index']
X_train_book = train_data['book_index']
X_train_additional = train_data.drop(['user_index', 'book_index', 'normalized_rating', 'user_id', 'book_id'], axis=1)
y_train = train_data['normalized_rating']

n_users = train_data['user_index'].max() + 1
n_books = train_data['book_index'].max() + 1

## Modelo

In [13]:
def create_model(num_users, num_books):
    # Definir variables clave
    embedding_size = 100  # Tamaño del embedding

    # Input para el filtrado colaborativo (usuarios y libros)
    user_input = keras.layers.Input(shape=(1,), name='user_input')
    book_input = keras.layers.Input(shape=(1,), name='book_input')

    # Embeddings para usuarios y libros
    user_embedding = keras.layers.Embedding(input_dim=num_users, output_dim=embedding_size, name='user_embedding')(user_input)
    book_embedding = keras.layers.Embedding(input_dim=num_books, output_dim=embedding_size, name='book_embedding')(book_input)

    # Aplanar los embeddings
    user_vector = keras.layers.Flatten()(user_embedding)
    book_vector = keras.layers.Flatten()(book_embedding)

    # Concatenación de los embeddings (filtrado colaborativo)
    collaborative_vector = keras.layers.Concatenate()([user_vector, book_vector])

    # Input para características adicionales del libro
    book_features_input = keras.Input(shape=(104,), name='book_features_input')

    # Red densa para procesar las características del contenido del libro
    x = keras.layers.Dense(128, activation='relu')(book_features_input)
    x = keras.layers.Dense(64, activation='relu')(x)

    # Combinar las representaciones colaborativa y de características de contenido
    combined_vector = keras.layers.Concatenate()([collaborative_vector, x])

    # Pasar por capas densas adicionales
    x = keras.layers.Dense(64, activation='relu')(combined_vector)
    x = keras.layers.Dense(32, activation='relu')(x)
    output = keras.layers.Dense(1, activation='sigmoid')(x)

    # Definir el modelo final
    model = keras.Model(inputs=[user_input, book_input, book_features_input], outputs=output)

    # Compilar el modelo
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

    return model

## Crear y Entrenar Modelo

In [14]:
model = create_model(n_users, n_books)
model.fit([X_train_user, X_train_book, X_train_additional], y_train, epochs=10, batch_size=64, validation_split=0.2)

Epoch 1/10


ValueError: in user code:

    File "c:\Users\ikera\miniconda3\envs\recommendador\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\ikera\miniconda3\envs\recommendador\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\ikera\miniconda3\envs\recommendador\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\ikera\miniconda3\envs\recommendador\lib\site-packages\keras\engine\training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "c:\Users\ikera\miniconda3\envs\recommendador\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\ikera\miniconda3\envs\recommendador\lib\site-packages\keras\engine\input_spec.py", line 216, in assert_input_compatibility
        raise ValueError(

    ValueError: Layer "model_2" expects 2 input(s), but it received 3 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, 1) dtype=int64>, <tf.Tensor 'IteratorGetNext:1' shape=(None, 1) dtype=int64>, <tf.Tensor 'IteratorGetNext:2' shape=(None, 104) dtype=float64>]


## Predicción del modelo

In [11]:
y_pred = model.predict([X_test_user, X_test_book, X_test_features])
y_pred = y_pred.flatten()



## Métricas del modelo

In [12]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

# MSE
mse = mean_squared_error(y_test, y_pred)
print(f"MSE: {mse}")

# RMSE
rmse = np.sqrt(mse)
print(f"RMSE: {rmse}")

# MAE
mae = mean_absolute_error(y_test, y_pred)
print(f"MAE: {mae}")

# R-squared
r2 = r2_score(y_test, y_pred)
print(f"R-squared: {r2}")

# Mean Absolute Percentage Error (MAPE)
def mean_absolute_percentage_error(y_true, y_pred): 
    # Evitar divisiones por cero
    non_zero_indices = y_true != 0
    y_true_filtered = y_true[non_zero_indices]
    y_pred_filtered = y_pred[non_zero_indices]
    
    # Calcular el MAPE solo para los valores no cero
    return np.mean(np.abs((y_true_filtered - y_pred_filtered) / y_true_filtered)) * 100

# Ahora puedes calcular el MAPE sin que dé infinito
mape = mean_absolute_percentage_error(y_test, y_pred)
print(f"MAPE: {mape}%")




MSE: 0.06853379636177609
RMSE: 0.26178960323468936
MAE: 0.19887129442380075
R-squared: -0.11984777840211969
MAPE: 32.610764423944254%


## Modelo optimizado

In [66]:
from tensorflow.keras import regularizers

def create_improved_model(num_users, num_books, num_features):
    embedding_size = 150  # Tamaño del embedding ajustado
    
    # Inputs de usuarios, libros y características adicionales
    user_input = keras.layers.Input(shape=(1,))
    book_input = keras.layers.Input(shape=(1,))
    features_input = keras.layers.Input(shape=(num_features,))

    # Embeddings para usuarios y libros
    user_embedding = keras.layers.Embedding(input_dim=num_users, output_dim=embedding_size)(user_input)
    book_embedding = keras.layers.Embedding(input_dim=num_books, output_dim=embedding_size)(book_input)

    # Aplanar las salidas de embedding
    user_vector = keras.layers.Flatten()(user_embedding)
    book_vector = keras.layers.Flatten()(book_embedding)

    # Concatenar los vectores de usuario, libro y características adicionales
    merged = keras.layers.Concatenate()([user_vector, book_vector, features_input])

    # Capas densas con regularización L2
    hidden_1 = keras.layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.005))(merged)
    dropout_1 = keras.layers.Dropout(0.1)(hidden_1)  # Añadimos Dropout para evitar sobreajuste
    
    hidden_2 = keras.layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.005))(dropout_1)
    dropout_2 = keras.layers.Dropout(0.1)(hidden_2)
    
    hidden_3 = keras.layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.005))(dropout_2)

    # Salida (predicción del rating)
    output = keras.layers.Dense(1, activation='sigmoid')(hidden_3)

    # Definir y compilar el modelo
    model = keras.models.Model(inputs=[user_input, book_input, features_input], outputs=output)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.00005), loss='mse', metrics=['mean_absolute_error'])
    
    return model

# Crear el nuevo modelo con los hiperparámetros ajustados
num_features = X_train_additional.shape[1]  # El número de características adicionales del dataset
improved_model = create_improved_model(n_users, n_books, num_features)

# Entrenar el modelo
history = improved_model.fit([X_train_user, X_train_book, X_train_additional], y_train, epochs=100, batch_size=2048, validation_split=0.2)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

## Predecir con el nuevo modelo

In [67]:
y_pred = improved_model.predict([X_test_user, X_test_book, X_test_features])
y_pred = y_pred.flatten()



## Nuevas Métricas

In [68]:
# MSE
mse = mean_squared_error(y_test, y_pred)
print(f"MSE: {mse}")

# RMSE
rmse = np.sqrt(mse)
print(f"RMSE: {rmse}")

# MAE
mae = mean_absolute_error(y_test, y_pred)
print(f"MAE: {mae}")

# R-squared
r2 = r2_score(y_test, y_pred)
print(f"R-squared: {r2}")

# Mean Absolute Percentage Error (MAPE)
def mean_absolute_percentage_error(y_true, y_pred): 
    # Evitar divisiones por cero
    non_zero_indices = y_true != 0
    y_true_filtered = y_true[non_zero_indices]
    y_pred_filtered = y_pred[non_zero_indices]
    
    # Calcular el MAPE solo para los valores no cero
    return np.mean(np.abs((y_true_filtered - y_pred_filtered) / y_true_filtered)) * 100

# Ahora puedes calcular el MAPE sin que dé infinito
mape = mean_absolute_percentage_error(y_test, y_pred)
print(f"MAPE: {mape}%")

MSE: 0.05320316046716041
RMSE: 0.23065810297312428
MAE: 0.183919363627908
R-squared: 0.13065605269240876
MAPE: 30.31144623862458%


# Exportar resultados para usar en Power BI

In [70]:
# Crear un DataFrame con los resultados
results_df = pd.DataFrame({
    'user_index': X_test_user,
    'book_index': X_test_book,
    'true_rating': y_test,
    'predicted_rating': y_pred
})

# Guardar los resultados en un archivo CSV
results_df.to_csv('predictions.csv', index=False)