In [1]:
import os
ROOT = os.path.join('..', '..')
import sys
sys.path.append(ROOT)
#
import numpy as np
import polars as pl
import altair as alt
from tqdm import tqdm

from scripts.data import (
    ml_ratings_df, ml_movies_df, ml_users_df, ml_df, ml_genres,
    bc_ratings_df, bc_books_df, bc_users_df, bc_df,
)
from src.metrics import (
    ml_precision_at_k, ml_recall_at_k, ml_f1_at_k
)
from src.models.simple import ml_popularity_based_recommendation

alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

# Train - Test Split

In [2]:
ml_ratings_df = ml_ratings_df.sort("Timestamp")
ml_ratings_df_len = ml_df.select(pl.len()).collect().item()
train_size = int(ml_ratings_df_len * 0.8)
ml_ratings_train_df = ml_df.head(train_size)
ml_ratings_test_df = ml_df.tail(ml_ratings_df_len - train_size)

# Baseline Popularity-Based Recommender

In [3]:
ml_test_user_id = 6040
n_recommendations = 100
ml_recommendation = ml_popularity_based_recommendation(ml_ratings_train_df, ml_movies_df, n_recommendations)

In [4]:
print(f"Precision@5 : {ml_precision_at_k(5, ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")
print(f"Precision@10: {ml_precision_at_k(10, ml_recommendation, ml_ratings_test_df,  ml_test_user_id)}")
print(f"Precision@15: {ml_precision_at_k(15, ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")

Precision@5 : 1.0
Precision@10: 0.9
Precision@15: 0.8666666666666667


## Recall@K

In [5]:
print(f"Recall@5: {ml_recall_at_k(5, ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")
print(f"Recall@10: {ml_recall_at_k(10, ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")
print(f"Recall@15: {ml_recall_at_k(15, ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")
print(f"Recall@100: {ml_recall_at_k(100, ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")

Recall@5: 0.01466275659824047
Recall@10: 0.026392961876832845
Recall@15: 0.03812316715542522
Recall@100: 0.18181818181818182


## F1@K

In [6]:
print(f"F1@5: {ml_f1_at_k(5,   ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")
print(f"F1@10: {ml_f1_at_k(10, ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")
print(f"F1@15: {ml_f1_at_k(15, ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")
print(f"F1@100: {ml_f1_at_k(100, ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")

F1@5: 0.028901734104017758
F1@10: 0.05128205128199593
F1@15: 0.07303370786508782
F1@100: 0.2811791383216448
