### Load Data

In [23]:
from cornac.eval_methods import RatioSplit
from cornac.data.text import BaseTokenizer
from cornac.data import TextModality
from cornac.data import Reader
import cornac

# HFT jointly models the user-item preferences and item texts (e.g., product reviews) with shared item factors
# Below we fit HFT to the MovieLens 1M dataset. We need  both the ratings and movie plots information
plots, movie_ids = cornac.datasets.movielens.load_plot()
ml_1m = cornac.datasets.movielens.load_feedback(variant="1M", reader=Reader(item_set=movie_ids))

# Instantiate a TextModality, it makes it convenient to work with text auxiliary information
# For more details, please refer to the tutorial on how to work with auxiliary data
item_text_modality = TextModality(corpus=plots, ids=movie_ids, tokenizer=BaseTokenizer(sep="\t", stop_words="english"), max_vocab=5000, max_doc_freq=0.5)

# Define an evaluation method to split feedback into train and test sets
ratio_split = RatioSplit( data=ml_1m, test_size=0.1, exclude_unknowns=True, item_text=item_text_modality, verbose=True, seed=123 )

### Create Model

In [31]:
from cornac.metrics import MAE, RMSE, Precision, Recall, NDCG, AUC, MAP, FMeasure
from cornac.hyperopt import GridSearch, RandomSearch, Discrete, Continuous

from cornac.models import Recommender
import tensorflow.keras.backend as K
from tqdm.auto import trange
from cornac import models
import tensorflow as tf
import numpy as np

class ATTREX(Recommender):

    def __init__(self, name="ATTREX", learning_rate=5e-4, n_epochs=10, batch_size=2048, vocab_size=None, seed=2032, embedding_dim=8, trainable=True, verbose=False):
        Recommender.__init__(self, name=name, trainable=trainable, verbose=verbose)
        self.learning_rate = learning_rate
        self.vocab_size = vocab_size
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.embedding_dim = embedding_dim
        self.seed = seed

        self.dot_prods = None

    def __get_model__(self, num_users, num_items):
        
        def root_mean_squared_error(y_true, y_pred):
            return K.sqrt(K.mean(K.square(y_pred - y_true))) 

        # Capa de entrada para el usuario
        user_input = tf.keras.layers.Input(shape=(1), dtype='int32', name="user")
        user_embedding = tf.keras.layers.Embedding(num_users, self.embedding_dim, name="user_emb")(user_input)
        user_vec = tf.keras.layers.Flatten()(user_embedding)
        # Capa de entrada para el item
        item_input = tf.keras.layers.Input(shape=(1), dtype='int32',name="item")
        item_embedding = tf.keras.layers.Embedding(num_items, self.embedding_dim, name="item_emb")(item_input)
        item_vec = tf.keras.layers.Flatten()(item_embedding)
        # Concatenar los embeddings del usuario y el item
        output_layer = tf.keras.layers.Dot(axes=1)([user_vec, item_vec])
        # output_layer = tf.keras.layers.Activation("relu")(output_layer)
        # Crear el modelo
        model =tf.keras.Model(inputs=[user_input, item_input], outputs=output_layer)
        # Compilar
        model.compile(optimizer=tf.keras.optimizers.Adam(self.learning_rate), loss=root_mean_squared_error)
        return model
    
    def fit(self, train_set, val_set=None):

        Recommender.fit(self, train_set, val_set)
        # Create Model
        self.model = self.__get_model__(train_set.total_users, train_set.total_items)
        
        # Obtain Data
        all_train_data = train_set.uir_tuple
        train_tfset = tf.data.Dataset.from_tensor_slices(({"user": all_train_data[0], "item": all_train_data[1]}, all_train_data[2]))
        train_tfset = train_tfset.batch(self.batch_size).cache().prefetch(tf.data.AUTOTUNE)
        val_tfset = None
        if val_set is not None:
            all_val_data = val_set.uir_tuple
            val_tfset = tf.data.Dataset.from_tensor_slices(({"user": all_val_data[0], "item": all_val_data[1]}, all_val_data[2]))
            val_tfset = val_tfset.batch(self.batch_size).cache().prefetch(tf.data.AUTOTUNE)
        
        # Training loop
        # self.model.fit(train_tfset, epochs=self.n_epochs, validation_data=val_tfset)
        loop = trange(self.n_epochs)
        for n_epoch in loop:
            self.model.fit(train_tfset, epochs=1, validation_data=val_tfset)
        loop.close()

        return self

    def score(self, user_id, item_id=None):
        if self.dot_prods is None:
            user_embs = self.model.get_layer("user_emb").weights[0]
            item_embs = self.model.get_layer("item_emb").weights[0]
            self.dot_prods = tf.tensordot(user_embs, tf.transpose(item_embs), axes=1).numpy()

        if item_id is None:
            return self.dot_prods[user_id]
        else:
            return [self.dot_prods[user_id, item_id]]

# Register your model in Cornac's model dictionary
models.ATTREX = ATTREX
from cornac.models import ATTREX

# Define metrics to evaluate the models
metrics = [MAE(), RMSE(), FMeasure(), Precision(k=10), Recall(k=1), Recall(k=5), Recall(k=10), NDCG(k=10), AUC(), MAP()]
# Instantiate models
m_attrex = cornac.models.ATTREX(vocab_size=5000, seed=123)

m_mf = cornac.models.MF(max_iter=50, seed=123)
gmf = cornac.models.GMF(num_factors=8, num_epochs=10, learner="adam", batch_size=256, lr=0.001, num_neg=50, seed=123, )
mlp = cornac.models.MLP(layers=[64, 32, 16, 8], act_fn="tanh", learner="adam", num_epochs=10, batch_size=256, lr=0.001, num_neg=50, seed=123, )
neumf1 = cornac.models.NeuMF(num_factors=8, layers=[64, 32, 16, 8], act_fn="tanh", learner="adam", num_epochs=10, batch_size=256, lr=0.001, num_neg=50, seed=123, )
neumf2 = cornac.models.NeuMF(name="NeuMF_pretrained", learner="adam", num_epochs=10, batch_size=256, lr=0.001, num_neg=50, seed=123, num_factors=gmf.num_factors, layers=mlp.layers, act_fn=mlp.act_fn, ).pretrain(gmf, mlp)

# GridSearch
# gs_m_attrex = GridSearch( model=m_attrex, space=[ Discrete("learning_rate", np.linspace(0.0001, 0.5, 2)), ], metric=AUC(), eval_method=ratio_split, )
# Experiment
cornac.Experiment(eval_method=ratio_split, models=[m_attrex, m_mf, gmf, mlp, neumf1, neumf2], metrics=metrics, user_based=False).run()


[ATTREX] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]


[ATTREX] Evaluation started!


Rating:   0%|          | 0/99343 [00:00<?, ?it/s]

Ranking:   0%|          | 0/5960 [00:00<?, ?it/s]


[MF] Training started!

[MF] Evaluation started!


Rating:   0%|          | 0/99343 [00:00<?, ?it/s]

Ranking:   0%|          | 0/5960 [00:00<?, ?it/s]


[GMF] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]


[GMF] Evaluation started!


Rating:   0%|          | 0/99343 [00:00<?, ?it/s]

Ranking:   0%|          | 0/5960 [00:00<?, ?it/s]


[MLP] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

: 

: 

: 

In [None]:
print('Grid search: max_iter = {:.2f}'.format(gs_hft.best_params.get('max_iter')))
print('Grid search: lambda_reg = {:.2f}'.format(gs_hft.best_params.get('batch_size')))
print('Grid search: learning_rate = {:.2f}'.format(gs_cdl.best_params.get('learning_rate')))

In [5]:
# Load rating and sentiment information
data = cornac.datasets.amazon_toy.load_feedback()
sentiment = cornac.datasets.amazon_toy.load_sentiment()

In [None]:
# initialize models, here we are comparing: Biased MF, PMF, and BPR
models = [
    ATTEX(),
    MF(k=10, max_iter=25, learning_rate=0.01, lambda_reg=0.02, use_bias=True, seed=123),
    # PMF(k=10, max_iter=100, learning_rate=0.001, lambda_reg=0.001, seed=123),
    # NeuMF(seed=123),
    # BPR(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.01, seed=123),
    # HFT(k=10, max_iter=200, seed=123),
    # BiVAECF(k=10, n_epochs=100, learning_rate=0.001, seed=123)
]

# put it together in an experiment, voilà!
cornac.Experiment(eval_method=ratio_split, models=models, metrics=metrics, user_based=True).run()