# Module 7: Learning to Rank

This notebook contains 3 learning to rank models and associated metrics.

## Import and clean the data

In [1]:
import numpy as np
import pandas as pd
import scipy.sparse
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

DATA_DIR = "data/ml-100k/"

ratings = pd.read_csv(DATA_DIR + 'u.data',
                      sep='\t',
                      names=['user', 'item', 'rating', 'timestamp'])

users = pd.read_csv(DATA_DIR + 'u.user', 
                    sep='|',
                    names=['user', 'age', 'gender', 'occupation', 'zip_code'])

movies = pd.read_csv(DATA_DIR + 'u.item', 
                     sep='|', 
                     encoding='latin-1', 
                     header = None,
                     names = ['item', 'title', 'release_date', 'video_release_date', 'IMDb_URL'] +
                           [f'genre_{i}' for i in range(19)])


ratings.timestamp = pd.to_datetime(ratings.timestamp,
                                   unit = "s")

users = users.drop(columns=['zip_code'])  # Optional
movies = movies.drop(columns=['title', 'release_date', 'video_release_date', 'IMDb_URL'])  # Keep genre booleans

data = ratings.merge(users, on='user').merge(movies, on='item')

# Create design matrix for regression model
# Create categorical variables
factor_vars = data[['gender', 'occupation']]
factors = OneHotEncoder().fit_transform(factor_vars)

age = data[['age']].values

# get the genres
genres = data[[col for col in data.columns if col.startswith('genre_')]].values

user_enc = LabelEncoder().fit_transform(data['user'])
item_enc = LabelEncoder().fit_transform(data['item'])

# Use a sparse matrix for the design matrix
X = scipy.sparse.hstack([
    factors,
    age,
    genres,
    user_enc.reshape(-1, 1),
    item_enc.reshape(-1, 1)
])

y = data['rating'].values

## Pointwise recommender 

This pointwise ranking recommender uses linear regression with L2 regularization (i.e., ridge regression).  The linear model is fit using 80% of the data with movie genre, user age, gender, and occupation as covariates.

In [2]:
from recommenders.evaluation.python_evaluation import ndcg_at_k
from sklearn.linear_model import Ridge
from sklearn.metrics import ndcg_score, root_mean_squared_error
from sklearn.model_selection import train_test_split

data_full = data[['user', 'item', 'rating']].copy()

X_train, X_test, y_train, y_test, train_data, test_data = train_test_split(X,
                                                    y,
                                                    data,
                                                    test_size = 0.2,
                                                    random_state = 1)

# Create and fit the regression model
ALPHA = 1.0
regression_model = Ridge(alpha = ALPHA)
regression_model.fit(X_train,
                     y_train)

regression_predictions = regression_model.predict(X_test)

rmse = root_mean_squared_error(y_test, regression_predictions)

print("Results of pointwise recommender")
print(f"Test RMSE: {rmse:.4f}")

#data_full = data[['user', 'item', 'rating']].copy()
test_data['prediction'] = regression_predictions

eval_ndcg = ndcg_at_k(test_data,
                      test_data,
                      col_user = 'user',
                      col_item = 'item',
                      col_rating = 'rating',
                      col_prediction = 'prediction',
                      score_type = "raw", # binary
                      k = 10)

print(f"Mean nDCG@10: {eval_ndcg:.4f}")

Results of pointwise recommender
Test RMSE: 1.0788
Mean nDCG@10: 0.8830


### Pairwise Using BPR

In this second model, we rank based on the Bayesian Personalized Ranking algorithm which uses a pairwise method and that uses a maximum a posteriori estimate (Rendle et al 2009). The code below was informed by a Jupyter notebook associated with the `recommenders` Python package (“Bayesian Personalized Ranking (BPR)” 2023).

References:
* “Bayesian Personalized Ranking (BPR).” 2023. GitHub. December 21, 2023. https://github.com/recommenders-team/recommenders/blob/main/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb.

* Rendle, Steﬀen, Christoph Freudenthaler, Zeno Gantner, and Lars Schmidt-Thieme. 2009. “BPR: Bayesian Personalized Ranking from Implicit Feedback.” Proceedings of the Twenty-Fifth Conference on Uncertainty in Artificial Intelligence.

In [3]:
import cornac
from recommenders.datasets.python_splitters import python_random_split
from recommenders.models.cornac.cornac_utils import predict_ranking

K = 10
train, test = python_random_split(ratings,
                                  0.8)

bpr_train_data = cornac.data.Dataset.from_uir(train.itertuples(index = False),
                                              seed = 1)

NUM_FACTORS = 200
bpr = cornac.models.BPR(
    k = NUM_FACTORS,
    max_iter = 200,
    learning_rate = 0.01,
    lambda_reg = 0.001,
    verbose = True,
    seed = 1
)

bpr.fit(bpr_train_data)

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 200/200 [00:08<00:00, 23.01it/s, correct=97.48%, skipped=9.70%] 

Optimization finished!





<cornac.models.bpr.recom_bpr.BPR at 0x22ea3998450>

In [6]:
all_predictions = predict_ranking(bpr,
                                  train,
                                  usercol = 'user',
                                  itemcol = 'item',
                                  remove_seen = True)

bpr_eval_data = pd.merge(test,
                     all_predictions,
                     how = "left",
                     on = ["user", "item"])

bpr_eval_data = bpr_eval_data[~bpr_eval_data.prediction.isna()]

k = 10
eval_ndcg = ndcg_at_k(bpr_eval_data,
                      bpr_eval_data,
                      col_user = 'user',
                      col_item = 'item',
                      col_rating = 'rating',
                      col_prediction = 'prediction',
                      score_type = "raw",
                      k = k)

rmse = root_mean_squared_error(bpr_eval_data.rating,
                               bpr_eval_data.prediction)

print("Results of pairwise (BPR) recommender")
print(f"RMSE: {rmse:.4f}")
print(f"Mean nDCG@10: {eval_ndcg:.4f}")

Results of pairwise (BPR) recommender
RMSE: 2.0438
Mean nDCG@10: 0.8846


### Listwise Model

This listwise model uses the `LightGBM` package and the LambdaRank algorithm (Burges et al 2007), and as a listwise model, the loss function considers the ordering of all items (not just pairs as with the pairwise model).

Burges, Christopher J.C., Robert Ragno, and Quoc Viet Le. 2007. “Learning to Rank with Nonsmooth Cost Functions.” In Advances in Neural Information Processing Systems 19, edited by Bernhard Schölkopf, John Platt, and Thomas Hofmann, 193–200. The MIT Press. https://doi.org/10.7551/mitpress/7503.003.0029.


In [7]:
import lightgbm as lgb

# Encode the categorical features
for col in ["gender", "occupation", "user", "item"]:
    data[col] = LabelEncoder().fit_transform(data[col])

feature_cols = ["user", "item", "age", "gender", "occupation"] + [f"genre_{i}" for i in range(19)]

X = data[feature_cols]
y = data["rating"]

X_train, X_test, y_train, y_test, data_train, data_test = train_test_split(
    X, y, data, test_size = 0.2, random_state = 1)

# Compute group sizes (items per user in training set)
group_train = data_train.groupby("user").size().values
group_test = data_test.groupby("user").size().values

# Create datasets for lightgbm
lgb_train_data = lgb.Dataset(X_train,
                             label = y_train,
                             group = group_train)
lgb_test_data = lgb.Dataset(X_test,
                            label = y_test, 
                            group = group_test)

lgb_model_params = {
    "objective": "lambdarank",
    "metric": "ndcg",
    "ndcg_eval_at": [5, 10],
    "learning_rate": 0.05,
    "num_leaves": 31,
    "min_data_in_leaf": 20
}

lgb_model = lgb.train(lgb_model_params,
                  lgb_train_data,
                  valid_sets = [lgb_test_data],
                  num_boost_round = 100)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002979 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 628
[LightGBM] [Info] Number of data points in the train set: 80000, number of used features: 23


In [8]:
lgb_model.predict(X_test)

eval_data = data_test.copy()
eval_data["prediction"] = lgb_model.predict(X_test)

rmse = root_mean_squared_error(eval_data.rating,
                               eval_data.prediction)

eval_ndcg = ndcg_at_k(eval_data,
                      eval_data,
                      col_user = 'user',
                      col_item = 'item',
                      col_rating = 'rating',
                      col_prediction = 'prediction',
                      score_type = "raw", # binary
                      k = 10)

print("Results of listwise recommender")
print(f"RMSE: {rmse:.4f}")
print(f"Mean nDCG@10: {eval_ndcg:.4f}")

Results of listwise recommender
RMSE: 4.2005
Mean nDCG@10: 0.9046
