In [1]:
# ===============================
# 1. Imports
# ===============================
import numpy as np
import pandas as pd
import joblib

from scipy.sparse import csr_matrix
from implicit.als import AlternatingLeastSquares


In [2]:
# ===============================
# 2. Load processed artifacts
# ===============================
interaction_matrix = joblib.load("../data/processed/interaction_matrix.pkl")
interactions = pd.read_csv("../data/processed/interactions.csv")

interaction_matrix.shape



(3125, 6034)

In [3]:
# ===============================
# 3. Train / Test Split (1 interaction per user)
# ===============================
test_interactions = (
    interactions
    .groupby("user_idx", group_keys=False)
    .apply(lambda x: x.sample(min(5,len(x)), random_state=42))
    .reset_index(drop=True)
)

train_interactions = interactions.drop(test_interactions.index)

len(train_interactions), len(test_interactions)



(544206, 30170)

In [4]:
# ===============================
# 4. Build Train Interaction Matrix (ITEM × USER)
# ===============================
n_items = interactions["movie_idx"].max() + 1
n_users = interactions["user_idx"].max() + 1

train_matrix = csr_matrix(
    (
        np.ones(len(train_interactions)),
        (train_interactions["movie_idx"], train_interactions["user_idx"])
    ),
    shape=(n_items, n_users)
)

train_matrix



<3125x6034 sparse matrix of type '<class 'numpy.float64'>'
	with 544206 stored elements in Compressed Sparse Row format>

In [5]:
# ===============================
# 5. TRANSPOSE for implicit ALS (USER × ITEM)
# ===============================
item_user_train = train_matrix.T.tocsr()
item_user_train.shape



(6034, 3125)

In [6]:
# ===============================
# 6. Train ALS Model
# ===============================
als_model = AlternatingLeastSquares(
    factors=128,
    regularization=0.01,
    iterations=40,
    random_state=42
)

als_model.fit(item_user_train * 40)


  check_blas_config()


  0%|          | 0/40 [00:00<?, ?it/s]

In [7]:
# ===============================
# 7. Filter valid users for evaluation
# ===============================
n_users_model = item_user_train.shape[0]

test_interactions_eval = test_interactions[
    test_interactions["user_idx"] < n_users_model
].copy()

len(test_interactions), len(test_interactions_eval)


(30170, 30170)

In [8]:
# ===============================
# 8. Recommendation Function (CORRECT)
# ===============================
def recommend_als(user_idx, model, user_item_matrix, k=50):
    n_users = user_item_matrix.shape[0]

    if user_idx < 0 or user_idx >= n_users:
        return []

    user_items = user_item_matrix[user_idx]

    item_ids, scores = model.recommend(
        userid=user_idx,
        user_items=user_items,
        N=k,
        filter_already_liked_items=True
    )
    return item_ids.tolist()


In [9]:
# ===============================
# 9. Evaluation Metrics
# ===============================
def hit_rate_at_k_als(model, user_item_matrix, test_interactions, k=50):
    hits = 0

    for _, row in test_interactions.iterrows():
        user = row["user_idx"]
        true_item = row["movie_idx"]

        recs = recommend_als(user, model, user_item_matrix, k)
        if true_item in recs:
            hits += 1

    return hits / len(test_interactions)


def recall_at_k_als(model, user_item_matrix, test_interactions, k=50):
    recall_sum = 0

    for _, row in test_interactions.iterrows():
        user = row["user_idx"]
        true_item = row["movie_idx"]

        recs = recommend_als(user, model, user_item_matrix, k)
        recall_sum += int(true_item in recs)

    return recall_sum / len(test_interactions)


def map_at_k_als(model, user_item_matrix, test_interactions, k=50):
    ap_sum = 0

    for _, row in test_interactions.iterrows():
        user = row["user_idx"]
        true_item = row["movie_idx"]

        recs = recommend_als(user, model, user_item_matrix, k)

        if true_item in recs:
            rank = recs.index(true_item) + 1
            ap_sum += 1 / rank

    return ap_sum / len(test_interactions)


In [10]:
# ===============================
# 10. Evaluate ALS Model
# ===============================
hit_50 = hit_rate_at_k_als(
    als_model,
    item_user_train,
    test_interactions_eval,
    k=50
)

recall_50 = recall_at_k_als(
    als_model,
    item_user_train,
    test_interactions_eval,
    k=50
)

map_50 = map_at_k_als(
    als_model,
    item_user_train,
    test_interactions_eval,
    k=50
)

hit_50, recall_50, map_50


(0.001325820351342393, 0.001325820351342393, 0.00017019201032878137)

In [11]:
# ===============================
# 11. Sample Recommendations (Human-readable)
# ===============================
movie_encoder = joblib.load("../models/movie_encoder.pkl")

sample_user = 0

recommended_item_indices = recommend_als(
    sample_user,
    als_model,
    item_user_train,
    k=10
)

recommended_movie_ids = movie_encoder.inverse_transform(
    recommended_item_indices
)

movies = pd.read_csv(
    "../data/raw/movies.dat",
    sep="::",
    engine="python",
    encoding="latin-1",
    names=["movie_id", "title", "genres"]
)

movies[movies["movie_id"].isin(recommended_movie_ids)][
    ["movie_id", "title", "genres"]
]


Unnamed: 0,movie_id,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy
5,6,Heat (1995),Action|Crime|Thriller
6,7,Sabrina (1995),Comedy|Romance
7,8,Tom and Huck (1995),Adventure|Children's
8,9,Sudden Death (1995),Action
9,10,GoldenEye (1995),Action|Adventure|Thriller


In [12]:
hit_10 = hit_rate_at_k_als(
    als_model,
    item_user_train,
    test_interactions_eval,
    k=10
)

recall_10 = recall_at_k_als(
    als_model,
    item_user_train,
    test_interactions_eval,
    k=10
)

map_10 = map_at_k_als(
    als_model,
    item_user_train,
    test_interactions_eval,
    k=10
)

hit_10, recall_10, map_10


(0.00036460059661915813, 0.00036460059661915813, 7.374875704342061e-05)

In [13]:
# ===============================
# 12. Save Model
# ===============================
joblib.dump(als_model, "../models/als_implicit_model.pkl")


['../models/als_implicit_model.pkl']

In [15]:
import joblib
joblib.dump(
     test_interactions_eval,
    "../data/processed/test_interactions_eval.pkl"
)

['../data/processed/test_interactions_eval.pkl']

In [16]:
import joblib
joblib.dump(
     item_user_train,
    "../data/processed/item_user_train.pkl"
)

['../data/processed/item_user_train.pkl']