In [1]:
import os
ROOT = os.path.join('..', '..')
import sys
sys.path.append(ROOT)
#
import numpy as np
import polars as pl
import altair as alt
from tqdm import tqdm

from scripts.data import (
    ml_ratings_df, ml_movies_df, ml_users_df, ml_df, ml_genres,
    bc_ratings_df, bc_books_df, bc_users_df, bc_df,
)
from src.metrics import (
    ml_precision_at_k, ml_recall_at_k, ml_f1_at_k
)
from src.models.simple import ml_popularity_based_recommendation

alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

# Train - Test Split

In [2]:
ml_ratings_df = ml_ratings_df.sort("Timestamp")
ml_ratings_df_len = ml_df.select(pl.len()).collect().item()
train_size = int(ml_ratings_df_len * 0.8)
ml_ratings_train_df = ml_df.head(train_size)
ml_ratings_test_df = ml_df.tail(ml_ratings_df_len - train_size)

# Item-Item Colaborative Filtering

In [3]:
def ml_item_item_colaborative_filtering_recommendation(user_id, n_recommendations=5):
    """
    
    """
    user_movies = ml_ratings_train_df.filter(pl.col("UserID") == user_id).join(ml_movies_df, on="MovieID")
    user_movies_representation = user_movies.select([
        pl.col("Is" + genre[0]).alias(genre[0])
        for genre in ml_genres.rows() if genre[0] is not None
    ]).collect().to_numpy()
    all_movies_representation = ml_movies_df.select([
        pl.col("Is" + genre[0]).alias(genre[0])
        for genre in ml_genres.rows() if genre[0] is not None
    ]).collect().to_numpy()
    s = all_movies_representation.dot(user_movies_representation.T)
    print(user_movies_representation.shape)
    print(all_movies_representation.shape)
    print(s.shape)
    # scores = ml_movies_df.with_columns(pl.Series(name="score", values=s.flatten()))
    # return scores.head(n_recommendations)

ml_item_item_colaborative_filtering_recommendation(2, 20)

(129, 18)
(3883, 18)
(3883, 129)


In [4]:
print(f"Precision@5 : {ml_precision_at_k(5, ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")
print(f"Precision@10: {ml_precision_at_k(10, ml_recommendation, ml_ratings_test_df,  ml_test_user_id)}")
print(f"Precision@15: {ml_precision_at_k(15, ml_recommendation, ml_ratings_test_df, ml_test_user_id)}")

NameError: name 'ml_recommendation' is not defined

# User-User Colaborative Filtering

In [None]:
def ml_user_user_colaborative_filtering_recommendation(user_id, n_recommendations=5):
    # Get user
    user = ml_users_df.filter(pl.col("UserID") == user_id)
    # Get similar users
    print(user.collect())

ml_user_user_colaborative_filtering_recommendation(2, 20)

shape: (1, 5)
┌────────┬────────┬─────┬───────────────┬──────────┐
│ UserID ┆ Gender ┆ Age ┆ Occupation    ┆ Zip-code │
│ ---    ┆ ---    ┆ --- ┆ ---           ┆ ---      │
│ i32    ┆ str    ┆ i32 ┆ str           ┆ str      │
╞════════╪════════╪═════╪═══════════════╪══════════╡
│ 2      ┆ M      ┆ 56  ┆ self-employed ┆ 70072    │
└────────┴────────┴─────┴───────────────┴──────────┘


In [None]:
ml_users_df.collect()

UserID,Gender,Age,Occupation,Zip-code
i32,str,i32,str,str
1,"""F""",1,"""K-12 student""","""48067"""
2,"""M""",56,"""self-employed""","""70072"""
3,"""M""",25,"""scientist""","""55117"""
4,"""M""",45,"""executive/managerial""","""02460"""
5,"""M""",25,"""writer""","""55455"""
…,…,…,…,…
6036,"""F""",25,"""scientist""","""32603"""
6037,"""F""",45,"""academic/educator""","""76006"""
6038,"""F""",56,"""academic/educator""","""14706"""
6039,"""F""",45,"""other""","""01060"""


In [None]:
ml_users_df.collect()

UserID,Gender,Age,Occupation,Zip-code
i32,str,i32,str,str
1,"""F""",1,"""K-12 student""","""48067"""
2,"""M""",56,"""self-employed""","""70072"""
3,"""M""",25,"""scientist""","""55117"""
4,"""M""",45,"""executive/managerial""","""02460"""
5,"""M""",25,"""writer""","""55455"""
…,…,…,…,…
6036,"""F""",25,"""scientist""","""32603"""
6037,"""F""",45,"""academic/educator""","""76006"""
6038,"""F""",56,"""academic/educator""","""14706"""
6039,"""F""",45,"""other""","""01060"""


In [None]:
# d(item, item)
# user: item1, item2, item3

In [None]:
ml_movies_df.collect()

MovieID,Title,Genres,Year,IsCrime,IsMystery,IsComedy,IsWestern,IsRomance,IsWar,IsAnimation,IsSci-Fi,IsFantasy,IsChildren's,IsDocumentary,IsThriller,IsHorror,IsMusical,IsAction,IsDrama,IsAdventure,IsFilm-Noir
i32,str,list[str],i32,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool
1,"""Toy Story""","[""Animation"", ""Children's"", ""Comedy""]",1995,false,false,true,false,false,false,true,false,false,true,false,false,false,false,false,false,false,false
2,"""Jumanji""","[""Adventure"", ""Children's"", ""Fantasy""]",1995,false,false,false,false,false,false,false,false,true,true,false,false,false,false,false,false,true,false
3,"""Grumpier Old Men""","[""Comedy"", ""Romance""]",1995,false,false,true,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false
4,"""Waiting to Exhale""","[""Comedy"", ""Drama""]",1995,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false
5,"""Father of the Bride Part II""","[""Comedy""]",1995,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
3948,"""Meet the Parents""","[""Comedy""]",2000,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false
3949,"""Requiem for a Dream""","[""Drama""]",2000,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false
3950,"""Tigerland""","[""Drama""]",2000,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false
3951,"""Two Family House""","[""Drama""]",2000,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false
