In [1]:
import os
ROOT = os.path.join('..', '..')
import sys
sys.path.append(ROOT)
#
import numpy as np
import polars as pl
import altair as alt

from src.metrics import ml_precision_at_k, ml_recall_at_k, ml_f1_at_k

alt.data_transformers.disable_max_rows()

  from pandas.core import (


DataTransformerRegistry.enable('default')

# Train - Test Split

In [2]:
from src.utils import read_ml, ml_train_test_split

ml_complete_df, ml_users_df, ml_ratings_df, ml_movies_df, ml_genres_df = read_ml()

MAX_K = 30
ml_ratings_train_df, ml_ratings_test_df = ml_train_test_split(ml_ratings_df=ml_ratings_df, min_user_test_samples=MAX_K)

# Content Based Filtering

In [3]:
ml_test_user_id = 5245

In [4]:
def create_user_profile(user_id):
    user_ratings = ml_ratings_train_df.filter(pl.col("UserID") == user_id)
    user_movies = user_ratings.join(ml_movies_df, on="MovieID")
    user_profile = user_movies.select([
        pl.sum("Is" + genre[0]).alias(genre[0])
        for genre in ml_genres_df.rows() if genre[0] is not None
    ])
    return user_profile

create_user_profile(ml_test_user_id).collect()

In [5]:
def ml_content_based_recommendation(user_id, n_recommendations=5):
    """
    Calculates scores as the cosine similarity between the user profile and the movie profiles. 
    NOTE: I haven't found a way to use polars' native functions to calculate cosine similarity, so this stuff is highty inefficient
    """
    user_profile = create_user_profile(user_id)
    movie_profiles = ml_movies_df.select([
        pl.col("Is" + genre[0]).cast(pl.UInt32) 
        for genre in ml_genres_df.rows() if genre[0] is not None
    ])
    u = user_profile.collect().to_numpy()[0]
    ms = movie_profiles.collect().to_numpy()
    s = ms.dot(u) / (np.linalg.norm(u) * np.linalg.norm(ms, axis=1)+1e-12)
    scores = ml_movies_df.with_columns(pl.Series(name="score", values=s)).sort("score", descending=True)
    return scores.head(n_recommendations)

ml_recommendation = ml_content_based_recommendation(ml_test_user_id, 20)

In [6]:
print(f"Precision@5 : {ml_precision_at_k(5, ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")
print(f"Precision@10: {ml_precision_at_k(10, ml_recommendation, ml_ratings_test_df,  ml_test_user_id)}")
print(f"Precision@15: {ml_precision_at_k(15, ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")

In [7]:
print(f"Recall@5: {ml_recall_at_k(5, ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")
print(f"Recall@10: {ml_recall_at_k(10, ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")
print(f"Recall@15: {ml_recall_at_k(15, ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")

In [8]:
print(f"F1@5: {ml_f1_at_k(5,   ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")
print(f"F1@10: {ml_f1_at_k(10, ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")
print(f"F1@15: {ml_f1_at_k(15, ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")