# Example 0. Training a Matrix Factorization in ToR[e]cSys

In [1]:
from functools import partial
from sklearn.model_selection import train_test_split
import torch
import torch.utils.data
import torecsys as trs

In [2]:
# get samples data from movielens as a example
# trs.data.sampledata.download_ml_data(size="latest-small", dir="./data")
_, _, ratings_df, _ = trs.data.sampledata.load_ml_data(size="latest-small", force=True)



In [3]:
# set hyper-parameters of model
user_size = ratings_df.userId.max() + 1
item_size = ratings_df.movieId.max() + 1

embed_size = 16
num_fields = 2

In [4]:
# split data into training set and testing set
train_df, test_df = train_test_split(ratings_df, test_size=0.1)

# define inputs' schema and colleat_fn for dataloader
schema = {
    "userId": ["user_id", "single_index"],
    "movieId": ["movie_id", "single_index"],
    "rating": ["labels", "values"]
}
collate_fn = partial(trs.data.dataloader.dict_collate_fn, schema=schema)

# initialize training and testing dataset
columns = ["userId", "movieId", "rating"]
train_set = trs.data.dataset.DataFrameToDataset(train_df, columns=columns, names=("B", "N"))
test_set = trs.data.dataset.DataFrameToDataset(test_df, columns=columns, names=("B", "N"))

# initialize training and testing dataloader
train_dl = torch.utils.data.DataLoader(
    train_set, batch_size=1024, shuffle=True, 
    num_workers=0, collate_fn=collate_fn)

test_dl = torch.utils.data.DataLoader(
    test_set, batch_size=1024, shuffle=False, 
    num_workers=0, collate_fn=collate_fn)

In [8]:
# inititalize embedding fields
feat_inputs_embedding = trs.inputs.base.MultiIndicesEmbedding(
    1, [user_size, item_size]
)

# define schema of wrapper and initialize InputsWrapper
feat_inputs_embedding.set_schema(inputs=["user_id", "movie_id"])

# initialize inputs wrapper
schema = {
    "emb_inputs"  : feat_inputs_embedding
}



In [None]:
trainer = trs.trainer.Trainer() \
    .build_model("MatrixFactorization") \
    .build_sequential(inputs) \
    .build_criterion("MSELoss", reduction="mean") \
    .build_optimizer("SGD", lr=1e-4) \
    .set_loader("train", train_dl) \
    .set_loader("validate", test_dl) \
    .set_targets_name("labels") \
    .set_max_num_epochs(10) \
    .set_max_num_iterations(10)

In [12]:
trainer.fit()

Epoch 1 / 1:
step loss : 58.7626:   0%|          | 0/89 [00:01<?, ?it/s]step avg loss at step 0 of epoch 0 : 0.1175
step loss : 55.4865: 100%|██████████| 89/89 [02:17<00:00,  1.21s/it]
epoch avg loss : 56.9743
