# Example of SasRec training/inference with Parquet Module

In [1]:
from typing import Optional

import lightning as L
import pandas as pd
import pyarrow as pa
import torch

from replay.data import (
    FeatureHint,
    FeatureSource,
    FeatureType,
)
from replay.data.nn import (
    TensorFeatureInfo,
    TensorFeatureSource,
    TensorSchema,
)
from replay.metrics import MAP, OfflineMetrics, Precision, Recall
from replay.metrics.torch_metrics_builder import metrics_to_df

L.seed_everything(42)

import warnings

warnings.filterwarnings("ignore")

Seed set to 42


## Preparing data
In this example, we will be using the MovieLens dataset, namely the 1m subset. It's demonstrated a simple case, so only item ids will be used as model input.

---
**NOTE**

Current implementation of SasRec is able to handle item and interactions features. It does not take into account user features. 

---

In [2]:
interactions = pd.read_csv("./data/ml1m_ratings.dat", sep="\t", names=["user_id", "item_id","rating","timestamp"])

In [None]:
interactions["timestamp"] = interactions["timestamp"].astype("int64")
interactions = interactions.sort_values(by="timestamp")
interactions["timestamp"] = interactions.groupby("user_id").cumcount()
interactions

Unnamed: 0,user_id,item_id,rating,timestamp
1000138,6040,858,4,0
1000153,6040,2384,4,1
999873,6040,593,5,2
1000192,6040,2019,5,3
1000007,6040,1961,4,4
...,...,...,...,...
825793,4958,2399,1,446
825438,4958,1407,5,447
825731,4958,2634,3,448
825724,4958,3264,4,449


### Encode catagorical data.
To ensure all categorical data is fit for training, it needs to be encoded using the `LabelEncoder` class. Create an instance of the encoder, providing a `LabelEncodingRule` for each categorcial column in the dataset that will be used in model. Note that ids of users and ids of items are always used.

In [4]:
from replay.preprocessing.label_encoder import LabelEncoder, LabelEncodingRule

encoder = LabelEncoder(
    [
        LabelEncodingRule("user_id", default_value="last"),
        LabelEncodingRule("item_id", default_value="last"),
    ]
)
interactions = interactions.sort_values(by="item_id", ascending=True)
encoded_interactions = encoder.fit_transform(interactions)
encoded_interactions

Unnamed: 0,rating,timestamp,user_id,item_id
0,4,32,0,0
1,4,10,1,0
2,5,12,2,0
3,4,339,3,0
4,4,144,4,0
...,...,...,...,...
1000204,4,281,796,3705
1000205,4,209,1297,3705
1000206,4,748,1883,3705
1000207,5,71,4449,3705


### Split interactions into the train, validation and test datasets using LastNSplitter
We use widespread splitting strategy Last-One-Out. We filter out cold items and users for simplicity.

In [5]:
from replay.splitters import LastNSplitter

splitter = LastNSplitter(
    N=1,
    divide_column="user_id",
    query_column="user_id",
    strategy="interactions",
    drop_cold_users=True,
    drop_cold_items=True
)

test_events, test_gt = splitter.split(encoded_interactions)
validation_events, validation_gt = splitter.split(test_events)
train_events = validation_events

### Dataset preprocessing ("baking")
SasRec expects each user in the batch to provide their events in form of a sequence. For this reason, the event splits must be properly processed using the `groupby_sequences` function provided by RePlay.

In [6]:
from replay.data.nn.utils import groupby_sequences


def bake_data(full_data):
    grouped_interactions = groupby_sequences(events=full_data, groupby_col="user_id", sort_col="timestamp")
    return grouped_interactions


train_events = bake_data(train_events)

validation_events = bake_data(validation_events)
validation_gt = bake_data(validation_gt)

test_events = bake_data(test_events)
test_gt = bake_data(test_gt)

train_events

Unnamed: 0,user_id,rating,timestamp,item_id
0,0,"[5, 5, 1, 3, 5, 5, 5, 5, 5, 5, 1, 1, 1, 5, 5, ...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[859, 309, 2371, 3442, 1108, 329, 367, 3279, 7..."
1,1,"[3, 3, 4, 5, 5, 5, 5, 5, 5, 4, 4, 5, 5, 4, 5, ...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[354, 2426, 253, 1371, 513, 1184, 3131, 309, 8..."
2,2,"[4, 3, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[1008, 1120, 2439, 1066, 3197, 253, 1108, 1107..."
3,3,"[4, 2, 4, 4, 5, 2, 3, 5, 3, 4, 5, 3, 4, 5, 3, ...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[346, 2190, 670, 802, 323, 661, 2480, 2501, 19..."
4,4,"[5, 4, 4, 5, 4, 5, 4, 4, 4, 5, 5, 4, 4, 4, 4, ...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[1120, 758, 2426, 1838, 2621, 3341, 3377, 3502..."
...,...,...,...,...
6035,6035,"[5, 4, 4, 4, 4, 5, 4, 5, 5, 3, 1, 4, 4, 4, 3, ...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[2426, 1279, 3321, 3151, 1178, 2501, 3301, 248..."
6036,6036,"[3, 3, 5, 2, 4, 5, 5, 4, 4, 3, 5, 4, 5, 5, 5, ...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[1592, 2302, 1633, 1813, 2879, 1482, 2651, 200..."
6037,6037,"[4, 5, 1, 2, 3, 5, 3, 5, 4, 5, 5, 4, 3, 4, 3, ...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[1971, 3500, 1666, 2077, 1399, 2748, 2958, 278..."
6038,6038,"[5, 5, 5, 3, 2, 4, 5, 4, 4, 5, 5, 5, 4, 4, 5, ...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[1486, 1485, 3384, 3512, 3302, 3126, 3650, 330..."


To ensure we don't have unknown users in ground truth, we join validation events and validation ground truth (also join test events and test ground truth correspondingly) by user ids to leave only the common ones.  

In [7]:
def add_gt_to_events(events_df, gt_df):
    # Keep common user ids between events_df and gt_df
    events_df = events_df[events_df["user_id"].isin(gt_df["user_id"])]
    gt_df = gt_df[gt_df["user_id"].isin(events_df["user_id"])]

    gt_to_join = gt_df.loc[:, ["user_id", "item_id"]].rename(columns={"item_id": "ground_truth"})

    events_df = events_df.merge(gt_to_join, on="user_id", how="left")
    return events_df

validation_events = add_gt_to_events(validation_events, validation_gt)
test_events = add_gt_to_events(test_events, test_gt)

In [8]:
from pathlib import Path

data_dir = Path("temp/data/")
data_dir.mkdir(parents=True, exist_ok=True)

TRAIN_PATH = data_dir / "train.parquet"
VAL_PATH = data_dir / "val.parquet"
TEST_PATH = data_dir / "test.parquet"

ENCODER_PATH = data_dir / "encoder"

Next we will use the ParquetModule for batch-wise data reading. It is based on PyArrow which requires explicit matching types, so to save via pandas you need to add a PyArrow data schema.

In [9]:
train_schema = pa.schema([
            ("item_id", pa.list_(pa.int64())),
            ("user_id", pa.int64()),
        ]
    )

val_test_schema = pa.schema([
            ("item_id", pa.list_(pa.int64())),
            ("ground_truth", pa.list_(pa.int64())),
            ("user_id", pa.int64()),
        ]
    )

train_events.loc[:, ["user_id", "item_id"]].to_parquet(TRAIN_PATH, schema=train_schema)
validation_events.loc[:, ["user_id", "item_id", "ground_truth"]].to_parquet(VAL_PATH, schema=val_test_schema)
test_events.loc[:, ["user_id", "item_id", "ground_truth"]].to_parquet(TEST_PATH, schema=val_test_schema)

encoder.save(ENCODER_PATH)

# Prepare to model training
### Create the tensor schema
A schema shows the correspondence of columns from the source dataset with the internal representation of tensors inside the model. It is required by the SasRec model to correctly create embeddings for every source column. Note that user_id does not required in `TensorSchema`.

In [10]:
EMBEDDING_DIM = 64

ITEM_FEATURE_NAME = "item_id"

encoder = encoder.load(ENCODER_PATH)
NUM_UNIQUE_ITEMS = len(encoder.mapping["item_id"])

tensor_schema = TensorSchema(
    [
        TensorFeatureInfo(
            name="item_id",
            is_seq=True,
            padding_value=NUM_UNIQUE_ITEMS,
            cardinality=NUM_UNIQUE_ITEMS + 1,  # taking into account padding
            embedding_dim=EMBEDDING_DIM,
            feature_type=FeatureType.CATEGORICAL,
            feature_sources=[TensorFeatureSource(FeatureSource.INTERACTIONS, "item_id")],
            feature_hint=FeatureHint.ITEM_ID,
        )
    ]
)

### Configure ParquetModule and transformation pipelines

The `ParquetModule` class enables training of models on large datasets by reading data in batch-wise way. This class initialized with  paths to every data split, a metadata dict containing information about shape and padding value of every column and a dict of transforms. `ParquetModule`'s  "transform pipelines" are stage-specific modules implementing additional preprocessing to be performed on batch level right before the forward pass.  

For SasRec model, RePlay provides a function that generates a sequence of appropriate transforms for each data split.

Internally this function creates the following transforms:
1) Training:
    1. Create a target, which contains the shifted item sequence that represents the next item in the sequence (for the next item prediction task).
    2. Rename features to match it with expected format by the model during training.
    3. Unsqueeze target (*positive_labels*) and it's padding mask (*target_padding_mask*) for getting required shape of this tensors for loss computation.
    4. Group input features to be embed in expected format.

2) Validation/Inference:
    1. Rename/group features to match it with expected format by the model during valdiation/inference.

**Note:** One of the transforms for the training data prepares the initial sequence for the task of Next Item Prediction so it shifts the sequence of items. For the final sequence length to be correct, you need to set shape of item_id in metadata as **model sequence length + shift**. Default shift value is 1.

In [11]:
from replay.nn.transforms.templates.sasrec import make_default_sasrec_transforms

MAX_SEQ_LEN = 50
BATCH_SIZE = 32
SHIFT = 1

TRANSFORMS = make_default_sasrec_transforms(tensor_schema, query_column="user_id")

def create_meta(shape: int, gt_shape: Optional[int] = None):
    meta = {
        "user_id": {},
        "item_id": {"shape": shape, "padding": tensor_schema["item_id"].padding_value},
    }
    if gt_shape is not None:
        meta.update({"ground_truth": {"shape": gt_shape, "padding": -1}})

    return meta

METADATA = {
    "train": create_meta(shape=MAX_SEQ_LEN+1),
    "validate": create_meta(shape=MAX_SEQ_LEN, gt_shape=1),
    "test": create_meta(shape=MAX_SEQ_LEN, gt_shape=1),
    "predict": create_meta(shape=MAX_SEQ_LEN)
}

In [12]:
from replay.data.nn import ParquetModule

parquet_module = ParquetModule(
    train_path=TRAIN_PATH,
    validate_path=VAL_PATH,
    test_path=TEST_PATH,
    predict_path=TEST_PATH,
    batch_size=BATCH_SIZE,
    metadata=METADATA,
    transforms=TRANSFORMS,
)

**NOTE**: 
You can also create a module specifically for training/inference by providing only their respective datapaths.
In such cases it's possible to pass to ParquetModule either all transforms or transforms for used data splits only.

For example:

In [13]:
parquet_module_train_val = ParquetModule(
    train_path=TRAIN_PATH,
    validate_path=VAL_PATH,
    batch_size=BATCH_SIZE,
    metadata=METADATA,
    transforms=TRANSFORMS
)

## Train model
### Create SasRec model instance and run the training stage using lightning
We may now train the model using the Lightning trainer class. 

RePlay's implementation of SasRec is designed in a modular, **block-based approach**. Instead of passing configuration parameters to the constructor, SasRec is now built by providing fully initialized components that makes the model more flexible and easier to extend. SasRec consists of the body and loss. Body consits of the following components: embedder, aggregator, encoder, mask, output_normalization, loss.

#### Components of SasRec
* `Body` - The body component defines the full model excluding loss.
* `Loss` - The loss component defines how the training loss is computed. All available loss implementations are located in nn/loss.

#### Components of SasRecBody

* `Embedder` -The embedder is responsible for converting input features into embeddings. The default implementation is `SequenceEmbedding`, which supports the following feature types: categorical, categorical_list, numerical, numerical_list

* `Aggregator` - The aggregator combines all embeddings produced by the embedder and adds positional embeddings.
Currently, `SasRecAggregator` is supported. It internally uses one of the following embedding aggregation strategies: `SumAggregator`, `ConcatAggregator`.

* `Encoder` - The encoder represents the core transformer block of the model. The following implementations are currently available: `SasRecTransformerLayer` (default one), `DiffAttentionLayer` (a modified version with differential attention).

* `Mask` - The mask is an object that creates attention mask by input. RePlay supports `DefaultAttentionMask` creating a lower-triangular attention mask.

* `Output Normalization` - Any suitable PyTorch normalization layer may be used as output_normalization, for example: torch.nn.LayerNorm or torch.nn.RMSNorm

In [14]:
from replay.nn import DefaultAttentionMask, SequenceEmbedding, SumAggregator
from replay.nn.loss import CE
from replay.nn.sequential import PositionAwareAggregator, SasRec, SasRecBody, SasRecTransformerLayer


def create_sasrec_model(tensor_schema: TensorSchema,
                        embedding_dim: int = 256,
                        categorical_list_feature_aggregation_method: str = "sum",
                        max_seq_len: int = 50,
                        dropout: float = 0.2,
                        num_heads: int = 2,
                        num_blocks: int = 2,
                        activation="relu"
    ):
    body = SasRecBody(
        embedder=SequenceEmbedding(
            schema=tensor_schema,
            categorical_list_feature_aggregation_method=categorical_list_feature_aggregation_method,
        ),
        embedding_aggregator=PositionAwareAggregator(
            embedding_aggregator=SumAggregator(embedding_dim=embedding_dim),
            max_sequence_length=max_seq_len,
            dropout=dropout,
        ),
        attn_mask_builder=DefaultAttentionMask(
            reference_feature_name=tensor_schema.item_id_feature_name,
            num_heads=num_heads,
        ),
        encoder=SasRecTransformerLayer(
            embedding_dim=embedding_dim,
            num_heads=num_heads,
            num_blocks=num_blocks,
            dropout=dropout,
            activation=activation,
        ),
        output_normalization=torch.nn.LayerNorm(embedding_dim),
    )
    sasrec = SasRec(
        body=body,
        loss=CE(padding_idx=tensor_schema.item_id_features.item().padding_value),
    )
    return sasrec

In [15]:
NUM_BLOCKS = 2
NUM_HEADS = 2
DROPOUT = 0.3
sasrec = create_sasrec_model(
    tensor_schema,
    embedding_dim=EMBEDDING_DIM,
    max_seq_len=MAX_SEQ_LEN,
    dropout=DROPOUT,
    num_heads=NUM_HEADS,
    num_blocks=NUM_BLOCKS
    )

#### Default Configuration

Default SasRec model may be created quickly via method *from_params*. Default model instance has CE loss, original SasRec transformer layes, and embeddings are aggregated via sum.

In [16]:
default_sasrec = SasRec.from_params(
    schema=tensor_schema,
    embedding_dim=EMBEDDING_DIM,
    max_sequence_length=MAX_SEQ_LEN,
    num_heads=NUM_HEADS,
    num_blocks=NUM_BLOCKS,
    dropout=DROPOUT,
    excluded_features=None
    )

A universal PyTorch Lightning module is provided that can work with any RePlay NN model.

In [17]:
from replay.models.nn.optimizer_utils import FatLRSchedulerFactory, FatOptimizerFactory
from replay.nn.lightning import LightningModule

model = LightningModule(
    sasrec,
    optimizer_factory=FatOptimizerFactory(),
    lr_scheduler_factory=FatLRSchedulerFactory(),
)

To facilitate training, we add the following callbacks:
1) `ModelCheckpoint` - to save the best trained model based on its Recall metric. It's a default Lightning Callback.
1) `ComputeMetricsCallback` - to display a detailed validation metric matrix after each epoch. It's a custom RePlay callback for computing recsys metrics on validation and test stages.


In [18]:
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.loggers import CSVLogger

from replay.nn.lightning.callbacks import ComputeMetricsCallback

checkpoint_callback = ModelCheckpoint(
    dirpath="sasrec/checkpoints",
    save_top_k=1,
    verbose=True,
    monitor="recall@10",
    mode="max",
)

validation_metrics_callback = ComputeMetricsCallback(
    metrics=["map", "ndcg", "recall"],
    ks=[1, 5, 10, 20],
    item_count=NUM_UNIQUE_ITEMS,
)

csv_logger = CSVLogger(save_dir="sasrec/logs/train", name="SasRec-example")

trainer = L.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback, validation_metrics_callback],
    logger=csv_logger,
)

trainer.fit(model, datamodule=parquet_module)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name  | Type   | Params | Mode 
-----------------------------------------
0 | model | SasRec | 291 K  | train
-----------------------------------------
291 K     Trainable params
0         Non-trainable params
291 K     Total params
1.164     Total estimated model params size (MB)
39        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 0, global step 189: 'recall@10' reached 0.03709 (best 0.03709), saving model to '/home/replay/sasrec/checkpoints/epoch=0-step=189.ckpt' as top 1


k             1         5         10        20
map     0.002649  0.007919  0.010259  0.012502
ndcg    0.002649  0.010639  0.016405  0.024654
recall  0.002649  0.019040  0.037086  0.069868



Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 1, global step 378: 'recall@10' reached 0.09023 (best 0.09023), saving model to '/home/replay/sasrec/checkpoints/epoch=1-step=378.ckpt' as top 1


k             1         5         10        20
map     0.011424  0.024255  0.029552  0.033440
ndcg    0.011424  0.030393  0.043487  0.057861
recall  0.011424  0.049172  0.090232  0.147517



Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 2, global step 567: 'recall@10' reached 0.12649 (best 0.12649), saving model to '/home/replay/sasrec/checkpoints/epoch=2-step=567.ckpt' as top 1


k             1         5         10        20
map     0.015728  0.035381  0.042149  0.047407
ndcg    0.015728  0.045037  0.061643  0.081116
recall  0.015728  0.074669  0.126490  0.204139



Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 3, global step 756: 'recall@10' reached 0.14040 (best 0.14040), saving model to '/home/replay/sasrec/checkpoints/epoch=3-step=756.ckpt' as top 1


k             1         5         10        20
map     0.013576  0.035381  0.043214  0.048966
ndcg    0.013576  0.046365  0.065645  0.086942
recall  0.013576  0.080132  0.140397  0.225331



Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 4, global step 945: 'recall@10' reached 0.15315 (best 0.15315), saving model to '/home/replay/sasrec/checkpoints/epoch=4-step=945.ckpt' as top 1
`Trainer.fit` stopped: `max_epochs=5` reached.


k             1         5         10        20
map     0.017219  0.040811  0.048909  0.055372
ndcg    0.017219  0.053191  0.073015  0.096932
recall  0.017219  0.091391  0.153146  0.248510



Now we can get the best model path stored in the checkpoint callback.

In [19]:
best_model_path = checkpoint_callback.best_model_path
best_model_path

'/home/replay/sasrec/checkpoints/epoch=4-step=945.ckpt'

## Inference&Test stages

To obtain model scores, we will load the weights from the best checkpoint. To do this, we use the LightningModule, provide there the path to the checkpoint and the model instance.

In [20]:
sasrec = create_sasrec_model(
    tensor_schema,
    embedding_dim=EMBEDDING_DIM,
    max_seq_len=MAX_SEQ_LEN,
    dropout=DROPOUT,
    num_heads=NUM_HEADS,
    num_blocks=NUM_BLOCKS
    )

best_model = LightningModule.load_from_checkpoint(best_model_path, model=sasrec)

Excluding train and validation, PyTorch Lightning supports 2 different stages: **test** and **predict**. 

* Test stage is used just for metric calculation, so it requires ground truth.

* Predict stage is used for obtaining inference scores. In our case, we are calculating metrics so we can calculate them manually by inference scores.

### Test stage
Here we can use the same callback as in validation.

In [21]:
test_metrics_callback = ComputeMetricsCallback(
    metrics=["map", "ndcg", "recall"],
    ks=[1, 5, 10, 20],
    item_count=NUM_UNIQUE_ITEMS,
)
trainer = L.Trainer(callbacks=[test_metrics_callback], inference_mode=True)
trainer.test(best_model, datamodule=parquet_module);

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Testing: |          | 0/? [00:00<?, ?it/s]

k             1         5         10        20
map     0.017224  0.040027  0.047906  0.053678
ndcg    0.017224  0.051358  0.070750  0.091997
recall  0.017224  0.086121  0.146737  0.231202

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
          map@1            0.017224246636033058
         map@10            0.047905657440423965
         map@20             0.05367831140756607
          map@5             0.04002705216407776
         ndcg@1            0.017224246636033058
         ndcg@10            0.0707496926188469
         ndcg@20            0.09199724346399307
         ndcg@5             0.05135849490761757
        recall@1           0.017224246636033058
        recall@10           0.14673733711242676
        recall@20           0.2312023788690567

### Inference stage

During inference, we can use another type of callback from the RePlay's *TopItemsCallback. Such callbacks allow you to get scores for each user throughout the entire catalog and get recommendations in the form of ids of items with the highest score values.


Recommendations can be fetched in four formats: PySpark DataFrame, Pandas DataFrame, Polars DataFrame or raw PyTorch tensors. Each of the types corresponds a callback. In this example, we'll be using the `PandasTopItemsCallback`.

In [22]:
from replay.nn.lightning.callbacks import PandasTopItemsCallback

csv_logger = CSVLogger(save_dir=".logs/test", name="SasRec-example")

TOPK = [1, 5, 10, 20]

pandas_prediction_callback = PandasTopItemsCallback(
    top_k=max(TOPK),
    query_column="user_id",
    item_column="item_id",
    rating_column="score",
)

trainer = L.Trainer(callbacks=[pandas_prediction_callback], logger=csv_logger, inference_mode=True)

trainer.predict(best_model, datamodule=parquet_module, return_predictions=False)

pandas_res = pandas_prediction_callback.get_result()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

In [23]:
pandas_res

Unnamed: 0,user_id,item_id,score
0,0,3341,6.163676
0,0,3383,6.109334
0,0,3510,5.963237
0,0,3550,5.852643
0,0,3512,5.571964
...,...,...,...
6037,6039,2601,5.27216
6037,6039,2470,5.235597
6037,6039,2750,5.109411
6037,6039,2700,5.101314


### Calculating metrics

*test_gt* is already encoded, so we can use it for computing metrics.

In [24]:
result_metrics = OfflineMetrics(
    [Recall(TOPK), Precision(TOPK), MAP(TOPK)], query_column="user_id", rating_column="score"
)(pandas_res, test_gt.explode("item_id"))

In [25]:
metrics_to_df(result_metrics)

k,1,5,10,20
MAP,0.017224,0.040027,0.047906,0.053678
Precision,0.017224,0.017224,0.014674,0.01156
Recall,0.017224,0.086121,0.146737,0.231202
