<a href="https://colab.research.google.com/github/chandini2595/DecisionTrees_Ensemble_Methods/blob/main/GradientBoostRankingTechniques.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from xgboost import XGBRanker
import numpy as np

# Synthetic ranking dataset
X = np.random.rand(100, 5)  # 100 samples, 5 features
y = np.random.randint(1, 6, size=100)  # Target relevance scores (1-5)
group = [50, 50]  # Two groups, each with 50 queries

# Train/Test Split
X_train, X_test = X[:80], X[80:]
y_train, y_test = y[:80], y[80:]
group_train = [40, 40]
group_test = [10, 10]

# Train XGBoost Ranker
xgb_ranker = XGBRanker(
    objective="rank:pairwise",
    learning_rate=0.1,
    max_depth=3,
    n_estimators=100
)
xgb_ranker.fit(X_train, y_train, group=group_train)

# Predict and Evaluate
y_pred = xgb_ranker.predict(X_test)
print("XGBoost Ranking Predictions:", y_pred)

XGBoost Ranking Predictions: [-0.23583654 -0.6318241   0.2164886  -0.06582259  0.38929135  0.10348877
 -0.02139384 -0.21242191  0.7831328  -0.3459414   0.23294194 -0.42018208
  0.36620423  0.21293423  0.1321689   0.1454249   0.31002226  0.878228
  0.08571082 -0.507482  ]


In [4]:
import lightgbm as lgb

# Dataset and groups
lgb_train = lgb.Dataset(X_train, y_train, group=group_train)
lgb_test = lgb.Dataset(X_test, y_test, group=group_test, reference=lgb_train)

# Train LightGBM Ranker
params = {
    "objective": "lambdarank",
    "metric": "ndcg",
    "learning_rate": 0.1,
    "max_depth": 3,
    "num_leaves": 31,
}

# Use LightGBM train() without the 'verbose_eval' argument
lgb_ranker = lgb.train(
    params,
    lgb_train,
    valid_sets=[lgb_test] # Validation set
)

# Predict and Evaluate
y_pred = lgb_ranker.predict(X_test)
print("LightGBM Ranking Predictions:", y_pred)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000432 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 140
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 5
LightGBM Ranking Predictions: [-1.04312053 -0.56567296  0.39996515 -1.48423009 -0.1603884  -1.19199619
 -0.99017917 -1.5576713   0.98061432 -1.47989746 -0.24184385 -0.67312247
  0.11807397  0.39734897 -0.26341749 -0.64247743  0.84666     0.42011551
 -0.12333771 -1.40899734]


In [6]:
pip install catboost

Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7


In [7]:
from catboost import CatBoostRanker, Pool

# Pool for ranking
train_pool = Pool(X_train, y_train, group_id=[0] * 40 + [1] * 40)  # Group IDs
test_pool = Pool(X_test, y_test, group_id=[0] * 10 + [1] * 10)

# Train CatBoost Ranker
catboost_ranker = CatBoostRanker(
    iterations=100,
    learning_rate=0.1,
    depth=3,
    verbose=10
)
catboost_ranker.fit(train_pool)

# Predict and Evaluate
y_pred = catboost_ranker.predict(test_pool)
print("CatBoost Ranking Predictions:", y_pred)

0:	total: 48.4ms	remaining: 4.79s
10:	total: 65.7ms	remaining: 532ms
20:	total: 76.7ms	remaining: 288ms
30:	total: 98.4ms	remaining: 219ms
40:	total: 115ms	remaining: 166ms
50:	total: 155ms	remaining: 149ms
60:	total: 173ms	remaining: 111ms
70:	total: 192ms	remaining: 78.6ms
80:	total: 204ms	remaining: 47.8ms
90:	total: 221ms	remaining: 21.8ms
99:	total: 241ms	remaining: 0us
CatBoost Ranking Predictions: [-0.83832892 -1.92211078  0.17099447 -0.65879422  0.01718713 -0.11314589
  0.04515274 -1.15155078  1.57567545 -0.45129388 -0.05813121 -0.52333601
  1.02281038 -0.83459155 -0.16179098 -0.43435944  0.06249102  0.7165844
  0.81528979 -0.68430601]


In [8]:
# Store predictions
predictions = {}

# XGBoost
xgb_ranker.fit(X_train, y_train, group=group_train)
predictions['XGBoost'] = xgb_ranker.predict(X_test)

# LightGBM
lgb_ranker = lgb.train(params, lgb_train)
predictions['LightGBM'] = lgb_ranker.predict(X_test)

# CatBoost
catboost_ranker.fit(train_pool)
predictions['CatBoost'] = catboost_ranker.predict(test_pool)

# Print Results
for model, preds in predictions.items():
    print(f"{model} Predictions:", preds)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000039 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 140
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 5
0:	total: 723us	remaining: 71.6ms
10:	total: 9.91ms	remaining: 80.2ms
20:	total: 25.8ms	remaining: 96.9ms
30:	total: 37.2ms	remaining: 82.7ms
40:	total: 48.1ms	remaining: 69.2ms
50:	total: 63.3ms	remaining: 60.8ms
60:	total: 81.4ms	remaining: 52ms
70:	total: 101ms	remaining: 41.2ms
80:	total: 125ms	remaining: 29.3ms
90:	total: 155ms	remaining: 15.4ms
99:	total: 180ms	remaining: 0us
XGBoost Predictions: [-0.23583654 -0.6318241   0.2164886  -0.06582259  0.38929135  0.10348877
 -0.02139384 -0.21242191  0.7831328  -0.3459414   0.23294194 -0.42018208
  0.36620423  0.21293423  0.1321689   0.1454249   0.31002226  0.878228
  0.08571082 -0.507482  ]
LightGBM Predictions: [-1.04312053 -0.56567296  0.39996515 -1.48423