In [6]:
import os
ROOT = os.path.join('..', '..')
import sys
sys.path.append(ROOT)
# 
from tqdm import tqdm
import numpy as np
import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows()

from scripts.data import (
    ml_ratings_df, ml_movies_df, ml_users_df, ml_train_test_split
)

from src.constants import T_SPLIT_POINT

ml_ratings_df = ml_ratings_df.collect().to_pandas()
ml_users_df = ml_users_df.collect().to_pandas()
ml_movies_df = ml_movies_df.collect().to_pandas()

from src.models.simple import PopularityBasedRecommender

# Train - Test Split

In [7]:
MAX_K = 30
ml_ratings_train_df, ml_ratings_test_df = ml_train_test_split(
    ml_ratings_df, MAX_K, T_SPLIT_POINT
)
print('Train shape:', ml_ratings_train_df.shape)
print('Test shape:', ml_ratings_test_df.shape)

Train shape: (85086, 4)
Test shape: (534709, 4)


# Baseline Popularity-Based Recommender

In [8]:
recommender = PopularityBasedRecommender(ml_movies_df, ml_users_df)
recommender.fit(ml_ratings_train_df)

In [9]:
metrics = recommender.evaluate(ml_ratings_test_df, k=MAX_K)

100%|██████████| 3003/3003 [00:02<00:00, 1059.36it/s]


In [10]:
print("Average Precision@K:", round(np.mean(metrics["precision@k"]), 4))
print("Average Recall@K:", round(np.mean(metrics["recall@k"]), 4))
print("Average F1@K:", round(np.mean(metrics["f1@k"]), 4))
print()
print("Best Precision@K:", round(np.max(metrics["precision@k"]), 4))
print("Best Recall@K:", round(np.max(metrics["recall@k"]), 4))
print("Best F1@K:", round(np.max(metrics["f1@k"]), 4))

Average Precision@K: 0.4238
Average Recall@K: 0.102
Average F1@K: 0.1473

Best Precision@K: 1.0
Best Recall@K: 0.439
Best F1@K: 0.507
