In [19]:
from typing import Dict, List

from copy import deepcopy
import pandas as pd
import numpy as np
from rectools import Columns
from rectools.dataset import Interactions, Dataset
from rectools.models import PopularModel, RandomModel
from rectools.model_selection import Splitter, TimeRangeSplitter
from rectools.models.base import ModelBase
from rectools.metrics.base import MetricAtK
from rectools.metrics import calc_metrics, Precision, Recall, Serendipity, MeanInvUserFreq, MAP, NDCG
import time
import os
from pprint import pprint

In [18]:

interactions_url = os.path.join(os.path.dirname(os.getcwd()), 'data/interactions.csv')
items_url = os.path.join(os.path.dirname(os.getcwd()), 'data/items.csv')


interactions_df = pd.read_csv(interactions_url)
interactions_df.rename(
        columns={
                'last_watch_dt': Columns.Datetime,
                'total_dur': Columns.Weight
        }, 
        inplace=True)


items_df = pd.read_csv(items_url)

interactions = Interactions(interactions_df)

In [14]:
def cross_val_score(
        models: Dict[str, ModelBase],
        metrics: Dict[str, MetricAtK],
        cv: Splitter,
        rec_num: int = 10
):
    """
    Calculate cross validation metrics
    """
    iterator = cv.split(interactions)
    results = []
    for train_ids, test_ids, fold_info in iterator:
        print(f"\n==================== Fold {fold_info['i_split']}")
        df_train = interactions.df.iloc[train_ids]
        dataset = Dataset.construct(df_train)
        df_test = interactions.df.iloc[test_ids][Columns.UserItem]
        test_users = np.unique(df_test[Columns.User])
        catalog = df_train[Columns.Item].unique()
        for model_name, model in models.items():
                curr_model = deepcopy(model)
                start_time = time.time()
                curr_model.fit(dataset)
                end_time = time.time()
                recos = curr_model.recommend(
                        users=test_users,
                        dataset=dataset,
                        k=rec_num,
                        filter_viewed=True,
                        )
                metric_values = calc_metrics(
                        metrics,
                        reco=recos,
                        interactions=df_test,
                        prev_interactions=df_train,
                        catalog=catalog,
                        )
                res = {"fold": fold_info["i_split"], "model": model_name, 'learning_time': end_time - start_time}
                res.update(metric_values)
                results.append(res)
    
    # Create DataFrame
    result_df = pd.DataFrame(results)
    metric_names = list(metrics.keys()) + ['learning_time']
    grouped_df = result_df.groupby('model')[metric_names].mean().reset_index()
    return grouped_df


In [24]:
n_splits = 3

models = {
    "random": RandomModel(random_state=42),
    "popular": PopularModel()
}

metrics = {
    "precision@1": Precision(k=1),
    "precision@5": Precision(k=5),
    "precision@10": Precision(k=10),
    "recall@1": Recall(k=1),
    "recall@5": Recall(k=5),
    "recall@10": Recall(k=10),
    "novelty@10": MeanInvUserFreq(k=10),
    "serendipity@10": Serendipity(k=10),
    "MAP@1": MAP(k=1),
    "MAP@5": MAP(k=5),
    "MAP@10": MAP(k=10),
    'NDCG@10': NDCG(k=10)
}

cv = TimeRangeSplitter(
    test_size="7D",
    n_splits=n_splits,
    filter_already_seen=True,
    filter_cold_items=True,
    filter_cold_users=True,
)

result = cross_val_score(models, metrics, cv)
result






Unnamed: 0,model,precision@1,precision@5,precision@10,recall@1,recall@5,recall@10,novelty@10,serendipity@10,MAP@1,MAP@5,MAP@10,NDCG@10,learning_time
0,popular,0.076432,0.052402,0.033903,0.04272,0.137413,0.173492,3.71339,2e-06,0.04272,0.078295,0.084109,0.043084,4.711097
1,random,0.000169,0.000176,0.000176,6.9e-05,0.000345,0.000687,15.610409,6e-06,6.9e-05,0.000155,0.0002,0.000175,0.000137


In [29]:
def highlight_min_max(s):
    styles = [''] * len(s)
    styles[s.idxmin()] = 'background-color: red'
    styles[s.idxmax()] = 'background-color: green'
    return styles

result.style.apply(highlight_min_max, subset=list(metrics.keys()), axis=0)

Unnamed: 0,model,precision@1,precision@5,precision@10,recall@1,recall@5,recall@10,novelty@10,serendipity@10,MAP@1,MAP@5,MAP@10,NDCG@10,learning_time
0,popular,0.076432,0.052402,0.033903,0.04272,0.137413,0.173492,3.71339,2e-06,0.04272,0.078295,0.084109,0.043084,4.711097
1,random,0.000169,0.000176,0.000176,6.9e-05,0.000345,0.000687,15.610409,6e-06,6.9e-05,0.000155,0.0002,0.000175,0.000137


In [38]:
def visual_analize(
        fitted_model: ModelBase,
        interactions_dataset: pd.DataFrame,
        user_ids: List[int],
        item_data: pd.DataFrame
):
    dataset =  Dataset.construct(interactions_dataset)
    recommendations = fitted_model.recommend(
        users=user_ids,
        dataset=dataset,
        k=10,
        filter_viewed=True,
    )
    # Calculate Items History
    item_history = interactions_dataset['item_id'].value_counts().reset_index(name='count').rename(columns={'index': 'item_id'})
    # Merge history
    history_df = interactions_dataset.merge(item_data, on='item_id').merge(item_history, on='item_id').sort_values(by='datetime')
    # Merge recommendations
    recommendations_with_items = recommendations.merge(item_data, on='item_id').merge(item_history, on='item_id')
    result_dict = {}
    for user in user_ids:
        result_dict[user] = {
            'history': history_df[history_df['user_id'] == user][['datetime', 'title', 'genres', 'count']],
            'recommendations': recommendations_with_items[recommendations_with_items['user_id'] == user][['title', 'genres', 'count', 'rank']]
        }
    return result_dict
        

In [39]:
# Using example

model = PopularModel()
dataset = Dataset.construct(interactions_df)
model.fit(dataset)

result = visual_analize(
    model,
    interactions_df,
    [666262, 672861, 955527],
    items_df
)

In [40]:
display(result[666262]['history'])
display(result[666262]['recommendations'])

Unnamed: 0,datetime,title,genres,count
4612573,2021-05-12,Последний викинг,"боевики, историческое, приключения",746
4907745,2021-05-12,Робин Гуд: Начало,"боевики, триллеры, приключения",485
1645045,2021-05-14,Томирис,"боевики, драмы, историческое, военные",10370


Unnamed: 0,title,genres,count,rank
0,Хрустальный,"триллеры, детективы",202457,1
3,Клиника счастья,"драмы, мелодрамы",193123,2
6,Гнев человеческий,"боевики, триллеры",132865,3
9,Девятаев,"драмы, военные, приключения",122119,4
12,Секреты семейной жизни,комедии,91167,5
15,Прабабушка легкого поведения,комедии,74803,6
18,Подслушано,"драмы, триллеры",68581,7
21,Афера,комедии,55043,8
24,Маша,"драмы, триллеры",45367,9
27,Дуров,документальное,40372,10
