# 03 — Collaborative Filtering Baselines & Evaluation
Train and evaluate Popularity, Item-based CF, and Implicit MF using shared metrics.


In [None]:
import pandas as pd
from pathlib import Path

from src import config
from src.evaluation import build_ground_truth, evaluate_topk
from src.models.popularity import PopularityRecommender
from src.models.item_based_cf import ItemBasedCF
from src.models.matrix_factorization import ImplicitMFRecommender

USER_COL = config.USER_COL
ITEM_COL = config.ITEM_COL
TIMESTAMP_COL = config.TIMESTAMP_COL

processed_dir = config.PROCESSED_DATA_DIR
train_df = pd.read_parquet(processed_dir / "train_interactions.parquet")
test_df = pd.read_parquet(processed_dir / "test_interactions.parquet")


## Build ground truth and helper mappings


In [None]:
ground_truth = build_ground_truth(test_df, user_col=USER_COL, item_col=ITEM_COL)
users = list(ground_truth.keys())

user_to_items_train = train_df.groupby(USER_COL)[ITEM_COL].apply(list).to_dict()


## Popularity baseline


In [None]:
pop_model = PopularityRecommender(item_col=ITEM_COL)
pop_model.fit(train_df)

def pop_recommend(user_id, k):
    known = user_to_items_train.get(user_id, [])
    return pop_model.recommend(user_id, known, k)

pop_results = evaluate_topk(ground_truth, pop_recommend, users)
pop_results


## Item-based CF


In [None]:
item_cf = ItemBasedCF(user_col=USER_COL, item_col=ITEM_COL, k=50)
item_cf.fit(train_df)

def item_cf_recommend(user_id, k):
    known = user_to_items_train.get(user_id, [])
    return item_cf.recommend(user_id, known, k)

item_cf_results = evaluate_topk(ground_truth, item_cf_recommend, users)
item_cf_results


## Implicit MF (ALS if available, else SVD fallback)


In [None]:
mf = ImplicitMFRecommender(user_col=USER_COL, item_col=ITEM_COL, factors=64, iterations=20)
mf.fit(train_df)

def mf_recommend(user_id, k):
    known = user_to_items_train.get(user_id, [])
    return mf.recommend(user_id, known, k)

mf_results = evaluate_topk(ground_truth, mf_recommend, users)
mf_results


## Compare


In [None]:
comparison = (
    pop_results.assign(model="popularity")
    .append(item_cf_results.assign(model="item_cf"))
    .append(mf_results.assign(model="implicit_mf"))
)
comparison
