In [7]:
import time
from copy import deepcopy

import numpy as np
import pandas as pd
import re
import rectools
from rectools import Columns
from rectools.dataset import Dataset
from rectools.dataset import Interactions
from rectools.metrics import (
    Precision,
    Recall,
    MAP,
    NDCG,
    calc_metrics,
    Serendipity,
    MeanInvUserFreq
)
from rectools.model_selection import TimeRangeSplitter
from rectools.models import RandomModel, PopularModel
from tqdm import tqdm

In [8]:
df_interactions = pd.read_csv('../data/interactions.csv')
df_interactions.rename(
    columns={
        "last_watch_dt": rectools.Columns.Datetime,
        "total_dur": rectools.Columns.Weight},
    inplace=True
)

interactions = Interactions(df_interactions)

items = pd.read_csv('../data/items.csv')

In [38]:
class RecoEvaluation:
    def __init__(self, models, metrics, splitter, k):
        self.models = models
        self.metrics = metrics
        self.splitter = splitter
        self.k = k
        self.results = pd.DataFrame(
            columns=['fold', 'model', 'time'] + list(self.metrics.keys()))

    def evaluate(self, interactions):
        n_splits = self.splitter.n_splits

        fold_iterator = self.splitter.split(interactions, collect_fold_stats=True)

        for train_ids, test_ids, fold_info in tqdm(fold_iterator, total=n_splits):
            print(f"Fold {fold_info['i_split']}")
            print(fold_info)

            df_train = interactions.df.iloc[train_ids]
            dataset = Dataset.construct(df_train)

            df_test = interactions.df.iloc[test_ids][Columns.UserItem]
            test_users = np.unique(df_test[Columns.User])

            catalog = df_train[Columns.Item].unique()

            for model_name, model in self.models.items():
                model_copy = deepcopy(model)  # Ensure a deep copy of the model
                start = time.time()
                model_copy.fit(dataset)
                end = time.time()
                recos = model_copy.recommend(
                    users=test_users,
                    dataset=dataset,
                    k=self.k,
                    filter_viewed=True,
                )

                metric_values = calc_metrics(
                    self.metrics,
                    reco=recos,
                    interactions=df_test,
                    prev_interactions=df_train,
                    catalog=catalog,
                )

                res = {"fold": fold_info["i_split"], "model": model_name, "time": end - start}
                res.update(metric_values)

                temp_df = pd.DataFrame([res])
                self.results = pd.concat([self.results, temp_df], ignore_index=True)

    def show_pivot(self, group=None):
        pivot_results = self.results.drop("fold", axis=1).groupby(["model"]).mean()

        if group == 'metrics':
            new_columns = [(re.split("@", col)[0], int(re.split("@", col)[1])) if "@" in col else (col, "")
                       for col in pivot_results.columns]
            pivot_results.columns = pd.MultiIndex.from_tuples(new_columns, names=["Metric", "Value"])

        display(
            pivot_results.style
            .highlight_min(color='lightcoral', axis=0)
            .highlight_max(color='lightgreen', axis=0)
        )

In [44]:
SEED = 42
N_SPLITS = 3
K = 10

models = {
    'RandomModel': RandomModel(random_state=SEED),
    'PopularModel': PopularModel()
}

metrics = {
    'Precision@1': Precision(k=1),
    'Precision@5': Precision(k=5),
    'Precision@10': Precision(k=10),
    'Recall@1': Recall(k=1),
    'Recall@5': Recall(k=5),
    'Recall@10': Recall(k=10),
    'MAP@1': MAP(k=1, divide_by_k=False),
    'MAP@5': MAP(k=5, divide_by_k=False),
    'MAP@10': MAP(k=10, divide_by_k=False),
    'NDCG@1': NDCG(k=1, log_base=3),
    'NDCG@5': NDCG(k=5, log_base=3),
    'NDCG@10': NDCG(k=10, log_base=3),
    'MIUF@1': MeanInvUserFreq(k=1),
    'MIUF@5': MeanInvUserFreq(k=5),
    'MIUF@10': MeanInvUserFreq(k=10),
    'Serendipity@1': Serendipity(k=1),
    'Serendipity@5': Serendipity(k=5),
    'Serendipity@10': Serendipity(k=10)
}

splitter = TimeRangeSplitter(
    "7D",
    n_splits=N_SPLITS,
    filter_already_seen=True,
    filter_cold_items=True,
    filter_cold_users=True,
)

In [45]:
evaluator = RecoEvaluation(models, metrics, splitter, K)
evaluator.evaluate(interactions)

  0%|          | 0/3 [00:00<?, ?it/s]

Fold 0
{'i_split': 0, 'start': Timestamp('2021-08-02 00:00:00', freq='7D'), 'end': Timestamp('2021-08-09 00:00:00', freq='7D'), 'train': 4266013, 'train_users': 797423, 'train_items': 15237, 'test': 263681, 'test_users': 98184, 'test_items': 6602}


 33%|███▎      | 1/3 [00:15<00:30, 15.16s/it]

Fold 1
{'i_split': 1, 'start': Timestamp('2021-08-09 00:00:00', freq='7D'), 'end': Timestamp('2021-08-16 00:00:00', freq='7D'), 'train': 4649162, 'train_users': 850489, 'train_items': 15415, 'test': 279422, 'test_users': 103511, 'test_items': 6698}


 67%|██████▋   | 2/3 [00:31<00:15, 15.93s/it]

Fold 2
{'i_split': 2, 'start': Timestamp('2021-08-16 00:00:00', freq='7D'), 'end': Timestamp('2021-08-23 00:00:00', freq='7D'), 'train': 5051815, 'train_users': 906071, 'train_items': 15577, 'test': 298878, 'test_users': 110076, 'test_items': 6679}


100%|██████████| 3/3 [00:48<00:00, 16.33s/it]


In [46]:
evaluator.show_pivot()

Unnamed: 0_level_0,time,Precision@1,Precision@5,Precision@10,Recall@1,Recall@5,Recall@10,MAP@1,MAP@5,MAP@10,NDCG@1,NDCG@5,NDCG@10,MIUF@1,MIUF@5,MIUF@10,Serendipity@1,Serendipity@5,Serendipity@10
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
PopularModel,1.34734,0.076432,0.052402,0.033903,0.04272,0.137413,0.173492,0.04272,0.078295,0.084109,0.076432,0.057932,0.043084,2.377055,3.066979,3.71339,2e-06,3e-06,2e-06
RandomModel,3.7e-05,0.000169,0.000176,0.000176,6.9e-05,0.000345,0.000687,6.9e-05,0.000155,0.0002,0.000169,0.000173,0.000175,15.616147,15.611575,15.610409,4e-06,6e-06,6e-06


In [47]:
evaluator.show_pivot(group='metrics')

Metric,time,Precision,Precision,Precision,Recall,Recall,Recall,MAP,MAP,MAP,NDCG,NDCG,NDCG,MIUF,MIUF,MIUF,Serendipity,Serendipity,Serendipity
Value,Unnamed: 1_level_1,1,5,10,1,5,10,1,5,10,1,5,10,1,5,10,1,5,10
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
PopularModel,1.34734,0.076432,0.052402,0.033903,0.04272,0.137413,0.173492,0.04272,0.078295,0.084109,0.076432,0.057932,0.043084,2.377055,3.066979,3.71339,2e-06,3e-06,2e-06
RandomModel,3.7e-05,0.000169,0.000176,0.000176,6.9e-05,0.000345,0.000687,6.9e-05,0.000155,0.0002,0.000169,0.000173,0.000175,15.616147,15.611575,15.610409,4e-06,6e-06,6e-06


In [20]:
class RecoVisualisation:
    def __init__(self, model, dataset, user_ids, item_data):
        self.model = model
        self.dataset = dataset
        self.user_ids = user_ids
        self.item_data = item_data

    def visualise(self):
        for user_id in self.user_ids:
            user_history = self.dataset.interactions.df.loc[
                self.dataset.interactions.df.user_id == user_id]
            user_recos = self.model.recommend(
                users=[user_id],
                dataset=self.dataset,
                k=K,
                filter_viewed=True
            )
            user_history = user_history.copy()
            user_recos = user_recos.copy()

            user_history['view_cnt'] = user_history['item_id'].map(
                self.dataset.interactions.df['item_id'].value_counts())

            user_recos['view_cnt'] = user_recos['item_id'].map(
                self.dataset.interactions.df['item_id'].value_counts())

            user_history['days_passed'] = (pd.Timestamp.now() - user_history['datetime']).dt.days

            columns_hist_show = list(
                set(list(self.item_data.columns) + ['item_id', 'view_cnt', 'days_passed']))

            columns_reco_show = list(
                set(list(self.item_data.columns) + ['item_id', 'view_cnt']))

            print(f"\n\n\nUser ID {user_id}")
            if not user_history.empty:
                user_history = user_history.merge(
                    self.item_data,
                    left_on='item_id',
                    right_on='item_id',
                ).sort_values(by='datetime').tail(10)
                print(f"\nUser ID {user_id} - Watched items:")
                display(user_history[columns_hist_show])

            if not user_recos.empty:
                user_recos = user_recos.merge(
                    self.item_data,
                    left_on='item_id',
                    right_on='item_id',
                )
                print(f"\nUser ID {user_id} - Recommended items:")
                display(user_recos[columns_reco_show])

In [21]:
user_ids = [666262, 672861, 955527]

model = RandomModel(random_state=42)
dataset = Dataset.construct(df_interactions)
model.fit(dataset)

reco_viz = RecoVisualisation(model, dataset, user_ids,
                             items[['item_id', 'title', 'genres']])

In [22]:
reco_viz.visualise()




User ID 666262

User ID 666262 - Watched items:


Unnamed: 0,view_cnt,item_id,genres,days_passed,title
0,68581,93,"зарубежные, криминал, детективы, ужасы",854,Дом ночных призраков



User ID 666262 - Recommended items:


Unnamed: 0,view_cnt,item_id,genres,title
0,6,7419,комедии,Ода радости
1,19,9109,"драмы, военные",Последняя битва
2,2,13917,"драмы, детективы, триллеры, вестерн",Преисподняя
3,4,13332,ужасы,Лихорадка
4,180,1331,драмы,Вечность
5,1,15448,"триллеры, криминал, детективы",Леший
6,896,1123,"драмы, мюзиклы, мелодрамы",Богема
7,3,9933,"драмы, комедии",Шедевр
8,728,3287,"единоборства, фитнес, для детей",Единоборства для детей (3-6 лет) Школа героев ...
9,3,8478,комедии,Американский пирог: Все в сборе





User ID 672861

User ID 672861 - Watched items:


Unnamed: 0,view_cnt,item_id,genres,days_passed,title
0,202457,25,"мюзиклы, мультфильм, приключения, комедии",849,Медвежонок Винни и его друзья
1,132865,32,"драмы, мюзиклы, мелодрамы",843,В ритме сердца



User ID 672861 - Recommended items:


Unnamed: 0,view_cnt,item_id,genres,title
0,6,7419,комедии,Ода радости
1,19,9109,"драмы, военные",Последняя битва
2,2,13917,"драмы, детективы, триллеры, вестерн",Преисподняя
3,4,13332,ужасы,Лихорадка
4,180,1331,драмы,Вечность
5,1,15448,"триллеры, криминал, детективы",Леший
6,896,1123,"драмы, мюзиклы, мелодрамы",Богема
7,3,9933,"драмы, комедии",Шедевр
8,728,3287,"единоборства, фитнес, для детей",Единоборства для детей (3-6 лет) Школа героев ...
9,3,8478,комедии,Американский пирог: Все в сборе





User ID 955527

User ID 955527 - Watched items:


Unnamed: 0,view_cnt,item_id,genres,days_passed,title
0,193123,21,для взрослых,855,Признание 5



User ID 955527 - Recommended items:


Unnamed: 0,view_cnt,item_id,genres,title
0,6,7419,комедии,Ода радости
1,19,9109,"драмы, военные",Последняя битва
2,2,13917,"драмы, детективы, триллеры, вестерн",Преисподняя
3,4,13332,ужасы,Лихорадка
4,180,1331,драмы,Вечность
5,1,15448,"триллеры, криминал, детективы",Леший
6,896,1123,"драмы, мюзиклы, мелодрамы",Богема
7,3,9933,"драмы, комедии",Шедевр
8,728,3287,"единоборства, фитнес, для детей",Единоборства для детей (3-6 лет) Школа героев ...
9,3,8478,комедии,Американский пирог: Все в сборе
