In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from src.utils import TrainTestSplitter, read_pickles, dl_data_pipeline
from src.models import ItemItemModel, BaseModelAverage
from src.metrics import ml_metrics, predictive_metrics, rank_metrics
from tqdm import tqdm
import xgboost as xgb
from sklearn.metrics import label_ranking_average_precision_score, ndcg_score

tqdm.pandas()

In [14]:
df_movies, df_users, df_ratings = read_pickles("../../data/ml-1m-after_eda/")
df_all = dl_data_pipeline(df_movies, df_users, df_ratings)

train_data, test_data = train_test_split(df_all.reset_index(drop=True), test_size=0.2, random_state=42)

alltrain = train_data.drop(columns=["UserID", "Rating"])
alltest = test_data.drop(columns=["UserID", "Rating"])

dtrain = xgb.DMatrix(data=alltrain, label=train_data['Rating'])
dtest = xgb.DMatrix(data=alltest, label=test_data['Rating'])


# Using XGBoost for Leaning to Rank

XGBoost is considered one of the best tools for implementing learning to rank models due to several strengths it has particularly suited for ranking tasks, such as those encountered with the MovieLens dataset. Here's why XGBoost is highly effective for these tasks:

MovieLens and similar datasets often feature sparse data, where many user-item interactions are missing (i.e., most users have not rated most movies). XGBoost efficiently handles sparse data through its sparse-aware split finding algorithm, which can skip over missing values or assign them a default direction in tree splits, thereby optimizing computation and memory usage.



In [15]:
# Training models
param_pairwise = {
    'objective': 'rank:pairwise',
    'learning_rate': 0.1,
    'gamma': 1.0,
    'min_child_weight': 0.1,
    'max_depth': 6,
    'n_estimators': 100
}

bst_pairwise = xgb.train(param_pairwise, dtrain, num_boost_round=100)

param_ndcg = {
    'objective': 'rank:ndcg',
    'max_depth': 6,
    'eta': 0.1,
    'verbosity': 1
}

bst_ndcg = xgb.train(param_ndcg, dtrain, num_boost_round=100)


Parameters: { "n_estimators" } are not used.



In the context of learning to rank using models like XGBoost, pairwise and NDCG (Normalized Discounted Cumulative Gain) represent two different types of ranking strategies. Both approaches aim to optimize the order of items but do so using different methodologies and objectives. Here's a breakdown of the differences between the two:

### Pairwise Approach
*Concept*: The pairwise approach focuses on comparing pairs of items at a time during the training process. The fundamental idea is to minimize the number of inversions in ranking — that is, cases where a lower-ranked item (according to the model) should actually be ranked higher than a higher-ranked item (again, according to the model).

*Objective*: The model learns by comparing every pair of items within the same query or user session and attempts to correctly order each pair. The loss function typically penalizes the model more when it incorrectly orders a pair that is relatively close in the true order and less when the pair is far apart.

*Suitability*: This approach is useful when the relative order between items is more important than the actual rank positions or the magnitude of scores. It's effective in scenarios where the goal is to maximize the accuracy of item comparisons rather than to achieve an accurate scoring of the items' ranks.

### NDCG Approach
*Concept*: NDCG is a listwise approach that evaluates the entire list of items at once. NDCG measures the gain of each item based on its position in the result list, giving higher importance to hits at higher ranks. This approach directly optimizes the model based on how well it ranks items in the order of their relevance, taking into account the position of items in the ranked list.

*Objective*: The NDCG loss function is designed to maximize the gain from highly relevant items appearing at the top of the list. The gain is discounted at lower ranks, which reflects the reduced utility of items found lower in the list. Thus, a model optimizing for NDCG tries to place the most relevant items at the top, where their contribution to the score is maximized.

*Suitability*: NDCG is particularly effective in situations where the quality of the top-ranked results is much more important than the overall order of all items. This makes it highly relevant for search engines and recommendation systems where the top few results are critical for user satisfaction.

In [16]:
predictions_pairwise = bst_pairwise.predict(dtest)
predictions_ndcg = bst_ndcg.predict(dtest)

## Evaluate the ranking metrics

In [26]:
grouped_test_data = test_data.groupby('UserID')
map_scores = []
ndcg_scores = []

for user_id, group in grouped_test_data:
    group.reset_index(drop=True, inplace=True)
    actual = group['Rating'].values
    
    preds = predictions_pairwise[group.index]

    binary_actual = (actual >= 4).astype(int)

    map_score = label_ranking_average_precision_score([binary_actual], [preds.argsort()[::-1]])
    map_scores.append(map_score)

    if len(preds) > 1:
        ndcg_score_val = ndcg_score([binary_actual], [preds], k=len(actual))
        ndcg_scores.append(ndcg_score_val)

average_map = np.mean(map_scores)
average_ndcg = np.mean(ndcg_scores) if ndcg_scores else 0.0  # Handle cases where ndcg_scores might be empty

print(f"Mean Average Precision (MAP): {average_map}")
print(f"Normalized Discounted Cumulative Gain (NDCG): {average_ndcg}")

Mean Average Precision (MAP): 0.6884599797250325
Normalized Discounted Cumulative Gain (NDCG): 0.8362216845871187


# Using Deep Learning Approach

In [27]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from  src.models import MovieLensDataset, RankingNetwork

df = df_all.copy()
user_encoder = LabelEncoder()
movie_encoder = LabelEncoder()
df['user_id'] = user_encoder.fit_transform(df['UserID'])
df['movie_id'] = movie_encoder.fit_transform(df['MovieID'])
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)


train_dataset = MovieLensDataset(train_df)
train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)

model = RankingNetwork(len(user_encoder.classes_), len(movie_encoder.classes_))
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

def train(model, data_loader, epochs):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for users, movies, ratings in data_loader:
            optimizer.zero_grad()
            outputs = model(users, movies).squeeze()
            loss = criterion(outputs, ratings)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {total_loss / len(data_loader)}')


In [28]:
train(model, train_loader, epochs=15)

user_id = 0
top_movies_df = model.predict_all_movies(user_id, num_top_movies=5)

print(top_movies_df)

Epoch 1, Loss: 1.4477880144454849
Epoch 2, Loss: 0.8809124449469383
Epoch 3, Loss: 0.8369741505029792
Epoch 4, Loss: 0.8192572370981911
Epoch 5, Loss: 0.8079927985819196
Epoch 6, Loss: 0.7996937072742313
Epoch 7, Loss: 0.7927433150896108
Epoch 8, Loss: 0.7859973247357842
Epoch 9, Loss: 0.7799501406101561
Epoch 10, Loss: 0.7740134602926209
Epoch 11, Loss: 0.7681666163397537
Epoch 12, Loss: 0.7624148112112181
Epoch 13, Loss: 0.7569196915977365
Epoch 14, Loss: 0.750835423811231
Epoch 15, Loss: 0.7450900434379919
   MovieID  PredictedRating
0     3308         5.192263
1     2309         5.122907
2      208         5.114661
3      713         5.093707
4     1041         5.076642


## Evaluation using already defined metrics + MAP + NDCG#

In [29]:
test_dataset = MovieLensDataset(test_df)
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=True)

test_metrics = model.evaluate(test_loader)
print(test_metrics)

{'mae': 0.725, 'rmse': 0.919, 'precision': 0.838, 'recall': 0.428, 'f1': 0.566, 'roc_auc': 0.658, 'map': 0.5743128092004386, 'ndcg': 0.9816392953753538}
