# Example. Training a Bayesian Personalized Ranking Matrix Factorization (BPR-MF) in ToR[e]cSys

In [1]:
from sklearn.model_selection import train_test_split
import torch.utils.data
import torecsys as trs

In [2]:
# get samples data from movielens as a example
# trs.data.sampledata.download_ml_data(size="latest-small", dir="./data")
_, _, ratings_df, _ = trs.data.sampledata.load_ml_data(size="latest-small", force=True)

In [3]:
user_index_field = trs.data.dataloader.IndexField()
movie_index_field = trs.data.dataloader.IndexField()

In [4]:
dataloader_collator = trs.data.dataloader.DataloaderCollator(
    schema = {
        "userId": "indices",
        "movieId": "indices",
        "rating" : "values"
    },
    kwargs = {
        "userId": { "mapping": user_index_field },
        "movieId": { "mapping": movie_index_field }
    }
)

In [5]:
dataloader_collator.summary()

+-------------------------------------------+
| Field Name:    Field Type:    Arguments:  |
| userId         indices        mapping     |
| movieId        indices        mapping     |
| rating         values                     |
+-------------------------------------------+


<torecsys.data.dataloader.collate_fn.DataloaderCollator at 0x2c5f32dc188>

In [6]:
# set hyperparameters of model
user_size = ratings_df.userId.max() + 1
item_size = ratings_df.movieId.max() + 1

embed_size = 16
num_fields = 2

# split data into training set and testing set
train_df, test_df = train_test_split(ratings_df, test_size=0.1)

# initialize training and testing dataset
columns = ["userId", "movieId", "rating"]
train_set = trs.data.dataset.DataFrameToDataset(train_df, columns=columns)
test_set = trs.data.dataset.DataFrameToDataset(test_df, columns=columns)

# initialize training and testing dataloader
train_dl = torch.utils.data.DataLoader(
    train_set, batch_size=2, shuffle=False, 
    num_workers=0, collate_fn=dataloader_collator.to_tensor)

test_dl = torch.utils.data.DataLoader(
    test_set, batch_size=2, shuffle=False, 
    num_workers=0, collate_fn=dataloader_collator.to_tensor)

In [7]:
# initialize embedding fields
feat_inputs_embedding = trs.inputs.base.MultiIndicesEmbedding(
    1, [user_size, item_size]
)

# define schema of wrapper and initialize InputsWrapper
feat_inputs_embedding.set_schema(inputs=["userId", "movieId"])

# initialize inputs wrapper
schema = {
    "emb_inputs" : feat_inputs_embedding
}
inputs = trs.inputs.Inputs(schema)



In [8]:
trainer = trs.trainer.TorecsysTrainer() \
    .set_objective("LearningToRank") \
    .set_inputs(inputs) \
    .set_model("MatrixFactorizationModel") \
    .set_sequential() \
    .build_negative_sampler(
        "UniformSampler", with_replacement=True, user_id={"high": 100, "low": 1}) \
    .build_criterion("BayesianPersonalizedRankingLoss", reduction="mean") \
    .build_optimizer("SGD", lr=1e-4) \
    .set_loader("train", train_dl) \
    .set_loader("eval", test_dl) \
    .set_negative_size(10) \
    .set_max_num_epochs(10) \
    .set_max_num_iterations(10)

In [9]:
trainer.summary()

+----------------------------------------------------+
|      Name:                     Value:              |
| Objective          learningtorank                  |
| Inputs             InputsWrapper                   |
| Model              MatrixFactorizationModel        |
| Loss               BayesianPersonalizedRankingLoss |
| Optimizer          SGD                             |
| Num of epochs      10                              |
| Log directory      {}                              |
| Negative sampler   UniformSampler                  |
| Negative size      10                              |
+----------------------------------------------------+


<torecsys.trainer.trainer.Trainer at 0x2c5f34c6dc8>

In [10]:
trainer.fit()

HBox(children=(FloatProgress(value=0.0, description='Current Mode: train, Step Loss: ?', max=10.0, style=Progr…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: eval, Step Loss: ?', max=10.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: train, Step Loss: ?', max=10.0, style=Progr…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: eval, Step Loss: ?', max=10.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: train, Step Loss: ?', max=10.0, style=Progr…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: eval, Step Loss: ?', max=10.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: train, Step Loss: ?', max=10.0, style=Progr…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: eval, Step Loss: ?', max=10.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: train, Step Loss: ?', max=10.0, style=Progr…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: eval, Step Loss: ?', max=10.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: train, Step Loss: ?', max=10.0, style=Progr…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: eval, Step Loss: ?', max=10.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: train, Step Loss: ?', max=10.0, style=Progr…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: eval, Step Loss: ?', max=10.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: train, Step Loss: ?', max=10.0, style=Progr…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: eval, Step Loss: ?', max=10.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: train, Step Loss: ?', max=10.0, style=Progr…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: eval, Step Loss: ?', max=10.0, style=Progre…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: train, Step Loss: ?', max=10.0, style=Progr…




HBox(children=(FloatProgress(value=0.0, description='Current Mode: eval, Step Loss: ?', max=10.0, style=Progre…


