# **Importacion paquetes**
---

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Dropout, LSTM
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

2024-07-03 23:56:52.608945: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# **Carga de datos**
---

In [5]:
coments=pd.read_csv('../seleccion_vars/coments/DatasetAirbnb_Review_Varselect_v1.csv')
coments.drop(['user_id','apart_id','imagen','tokens'],axis=1,inplace=True)

# **Tokenización y padding de comentarios**
---

In [3]:
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(coments['text_clean'].astype(str))
comment_sequences = tokenizer.texts_to_sequences(coments['text_clean'].astype(str))
comment_input = pad_sequences(comment_sequences, maxlen=100)

In [6]:
coments.columns

Index(['ubicacion', 'gender', 'rating', 'sentimiento', 'prom_long_word',
       'num_adj', 'text_clean', 'prob_ruido', 'prob_limp', 'prob_ubi',
       'prob_wf', 'prob_park', 'prob_bañ', 'num_toks'],
      dtype='object')

# **Conversión a tensores**
---

In [7]:
ubicacion = tf.convert_to_tensor(coments['ubicacion'].values.reshape(-1, 1), dtype=tf.int32)
genero = tf.convert_to_tensor(coments['gender'].values.reshape(-1, 1), dtype=tf.int32)
ruido = tf.convert_to_tensor(coments['prob_ruido'].values.reshape(-1, 1), dtype=tf.int32)
ubi = tf.convert_to_tensor(coments['prob_ubi'].values.reshape(-1, 1), dtype=tf.int32)
wf = tf.convert_to_tensor(coments['prob_wf'].values.reshape(-1, 1), dtype=tf.int32)
park = tf.convert_to_tensor(coments['prob_park'].values.reshape(-1, 1), dtype=tf.int32)
ban = tf.convert_to_tensor(coments['prob_bañ'].values.reshape(-1, 1), dtype=tf.int32)
num_toks = tf.convert_to_tensor(coments['num_toks'].values.reshape(-1, 1), dtype=tf.int32)
limp = tf.convert_to_tensor(coments['prob_limp'].values.reshape(-1, 1), dtype=tf.int32)
comment_input = tf.convert_to_tensor(comment_input, dtype=tf.int32)

In [8]:
ratings = tf.convert_to_tensor(coments['rating'].values.reshape(-1, 1), dtype=tf.float32)

# **División de los datos en train y test**
---

In [9]:
# División de datos en entrenamiento y validación usando indexación
split_index = int(len(coments) * 0.8)

x_train = [ubicacion[:split_index], genero[:split_index], ruido[:split_index], ubi[:split_index],wf[:split_index],park[:split_index],ban[:split_index],num_toks[:split_index],limp[:split_index], comment_input[:split_index]]
y_train = ratings[:split_index]

x_val = [ubicacion[split_index:], genero[split_index:], ruido[split_index:], ubi[split_index:],wf[split_index:],park[split_index:],ban[split_index:],num_toks[split_index:],limp[split_index:], comment_input[split_index:]]
y_val = ratings[split_index:]

# **Parametros de entrada**
---

# **ACABAR DE ADAPTAR**

In [None]:
# Parámetros de entrada
num_users = comentarios['user_id'].nunique()
num_items = comentarios['apart_id'].nunique()
num_locations = comentarios['ubicacion'].nunique()
embedding_dim = 50


# Input layers
user_id_input = Input(shape=(1,), name='user_id_input')
item_id_input = Input(shape=(1,), name='item_id_input')
location_input = Input(shape=(1,), name='location_input')
gender_input = Input(shape=(1,), name='gender_input')
comment_input_layer = Input(shape=(100,), name='comment_input')

# Embedding layers
user_embedding = Embedding(input_dim=num_users, output_dim=embedding_dim, name='user_embedding')(user_id_input)
item_embedding = Embedding(input_dim=num_items, output_dim=embedding_dim, name='item_embedding')(item_id_input)
location_embedding = Embedding(input_dim=num_locations, output_dim=embedding_dim, name='location_embedding')(location_input)
gender_embedding = Embedding(input_dim=2, output_dim=embedding_dim, name='gender_embedding')(gender_input)  # Assuming binary gender

# Flatten layers
user_vec = Flatten(name='flatten_user')(user_embedding)
item_vec = Flatten(name='flatten_item')(item_embedding)
location_vec = Flatten(name='flatten_location')(location_embedding)
gender_vec = Flatten(name='flatten_gender')(gender_embedding)

# Text processing
comment_embedding = Embedding(input_dim=5000, output_dim=embedding_dim, name='comment_embedding')(comment_input_layer)
comment_lstm = LSTM(124)(comment_embedding)

# Concatenate all features
concat = Concatenate()([user_vec, item_vec, location_vec, gender_vec, comment_lstm])
dense = Dense(64, activation='relu')(concat)
dropout = Dropout(0.5)(dense)
output = Dense(1, activation='linear')(dropout)

# Define the model
model = Model(inputs=[user_id_input, item_id_input, location_input, gender_input, comment_input_layer], outputs=output)
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error',metrics=['mae', 'mse'])
print(model.summary())


model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath='best_model.h5',
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=False,  # Guardar el modelo completo
    verbose=1
)


# Training the model
history = model.fit(
    x_train, y_train,
    epochs=100,
    batch_size=64,
    validation_data=(x_val, y_val),
    callbacks=[ model_checkpoint_callback],)
# Making predictions for a specific user
user_id = np.array([0])  # User ID for which you want to make predictions
item_ids = np.array([i for i in range(num_items)])
location = np.array([0])  # Assuming location '0' for the user
gender = np.array([0])  # Assuming gender '0' for the user
comment_example = pad_sequences(tokenizer.texts_to_sequences(["example comment"]), maxlen=100)
predictions = model.predict([np.full_like(item_ids, user_id), item_ids, np.full_like(item_ids, location), np.full_like(item_ids, gender), np.full((len(item_ids), 100), comment_example)])
print("Predicciones de recomendaciones:", predictions.flatten())
