In [1]:
%load_ext autoreload
%autoreload 2

import IPython
from pathlib import Path
import os
locals = IPython.extract_module_locals() # type: ignore
notebook_name = "/".join(locals[1]["__vsc_ipynb_file__"].split("/"))
os.chdir(Path(notebook_name).parent.parent.parent)

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import torch

from recsys.evaluation.metrics import map_k, precision_k, recall_k

In [5]:
base_path = Path(".data/hm/base")
intermediate_path = Path(".data/movielens/intermediate/1")

ratings = pd.read_parquet(intermediate_path / "ratings.parquet")
ratings_train = pd.read_parquet(intermediate_path / "ratings_train.parquet")
ratings_validation = pd.read_parquet(intermediate_path / "ratings_validation.parquet")
user_id_map = pd.read_parquet(intermediate_path / "user_id_map.parquet")
movie_id_map = pd.read_parquet(intermediate_path / "movie_id_map.parquet")

In [10]:
n_users = user_id_map["userId"].nunique()
n_items = movie_id_map['movieId'].nunique()

print(n_users, n_items, ratings.shape[0])

162414 47396 15630129


In [11]:
N_candid = 50

random_candidates = np.random.randint(low=0, high=n_items, size=(n_users, N_candid))

random_recommendations_df = pd.DataFrame({
    'session_id': range(n_users),
    'candidates': random_candidates.tolist()
})

In [12]:
most_popular = ratings_train.groupby("item_id")["rating"].agg(['count', 'mean'])
most_popular["score"] = most_popular["count"] * most_popular["mean"]
most_popular = most_popular.sort_values(by="score", ascending=False)

most_popular_recommendations_df = pd.DataFrame({
    'session_id': range(n_users),
    'candidates': [most_popular.index[:N_candid].tolist()] * n_users
})

In [13]:
ground_truth = torch.from_numpy(ratings_validation[["session_id", "item_id"]].values).T

In [14]:
recommendations_tensor = torch.from_numpy(np.array(random_recommendations_df['candidates'].tolist()))
users_idx = torch.from_numpy(random_recommendations_df['session_id'].values)

for k in [12, 30, 50]:
    map = map_k(recommendations_tensor, ground_truth, k=k, users_idx=users_idx, n_users=n_users, n_items=n_items)
    prec = precision_k(recommendations_tensor, ground_truth, k=k, users_idx=users_idx, n_users=n_users, n_items=n_items)
    rec = recall_k(recommendations_tensor, ground_truth, k=k, users_idx=users_idx, n_users=n_users, n_items=n_items)

    print(f"MAP@{k}: {map:.6f} | Precision@{k}: {prec:.6f} | Recall@{k}: {rec:.6f}")

MAP@12: 0.000879 | Precision@12: 0.000733 | Recall@12: 0.000303
MAP@30: 0.000729 | Precision@30: 0.000601 | Recall@30: 0.000462
MAP@50: 0.000670 | Precision@50: 0.000554 | Recall@50: 0.000842


In [15]:
recommendations_tensor = torch.from_numpy(np.array(most_popular_recommendations_df['candidates'].tolist()))
users_idx = torch.from_numpy(most_popular_recommendations_df['session_id'].values)

for k in [12, 30, 50]:
    map = map_k(recommendations_tensor, ground_truth, k=k, users_idx=users_idx, n_users=n_users, n_items=n_items)
    prec = precision_k(recommendations_tensor, ground_truth, k=k, users_idx=users_idx, n_users=n_users, n_items=n_items)
    rec = recall_k(recommendations_tensor, ground_truth, k=k, users_idx=users_idx, n_users=n_users, n_items=n_items)

    print(f"MAP@{k}: {map:.6f} | Precision@{k}: {prec:.6f} | Recall@{k}: {rec:.6f}")

MAP@12: 0.092517 | Precision@12: 0.085166 | Recall@12: 0.025651
MAP@30: 0.081008 | Precision@30: 0.068792 | Recall@30: 0.049847
MAP@50: 0.074059 | Precision@50: 0.059613 | Recall@50: 0.071999
