# Information Retrieval - Programming Assignment 4 - Training Learning-to-Rank (Using Programming Assignment 3 by Muhammad Falensi Azmi)

# Setup

In [4]:
!pip install python-terrier -q;
!pip install --upgrade git+https://github.com/terrierteam/pyterrier_t5.git -q;
!pip install datasets;
!pip install gdown;
!pip install optuna


Usage:   
  pip install [options] <requirement specifier> [package-index-options] ...
  pip install [options] -r <requirements file> [package-index-options] ...
  pip install [options] [-e] <vcs project url> ...
  pip install [options] [-e] <local project path> ...
  pip install [options] <archive url/path> ...

no such option: -;

Usage:   
  pip install [options] <requirement specifier> [package-index-options] ...
  pip install [options] -r <requirements file> [package-index-options] ...
  pip install [options] [-e] <vcs project url> ...
  pip install [options] [-e] <local project path> ...
  pip install [options] <archive url/path> ...

no such option: -;




In [1]:
import re
import pyterrier as pt
import pandas as pd
import numpy as np

from datasets import load_dataset
from pyterrier.measures import *

pt.init()  # This initializes PyTerrier

import os
import xgboost as xgb
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import ParameterGrid
import pickle

  from .autonotebook import tqdm as notebook_tqdm
Java started and loaded: pyterrier.java, pyterrier.terrier.java [version=5.10 (build: craigm 2024-08-22 17:33), helper_version=0.0.8]
java is now started automatically with default settings. To force initialisation early, run:
pt.java.init() # optional, forces java initialisation
  pt.init()  # This initializes PyTerrier


# Data Preparation

In [2]:
# Load dataset
url = "https://drive.google.com/drive/folders/12AiGXUUlduwVJJrSOYp4RvLWNS8_X_xH?usp=drive_link"
gdown.download_folder(url, output=".", quiet=True)

['.\\corpus.csv',
 '.\\dev.csv',
 '.\\queries.csv',
 '.\\test.csv',
 '.\\train.csv']

In [2]:
collections = pd.read_csv("corpus.csv")
queries = pd.read_csv("queries.csv")

train_qrels = pd.read_csv("train.csv")
dev_qrels = pd.read_csv("dev.csv")
test_qrels = pd.read_csv("test.csv")

In [3]:
collections.head()

Unnamed: 0,id,text
0,49,"Colorâurine can be a variety of colors, most..."
1,12913,Rio de Janeiro: Annual Weather Averages. Febru...
2,14964,The judiciary (also known as the judicial syst...
3,17272,Painless swelling of the feet and ankles is a ...
4,18352,"Later that day, the National Hurricane Center ..."


In [4]:
queries.head()

Unnamed: 0,id,text
0,597651,what color is amber urine
1,88585,causes of swollen ankles and feet
2,508811,symptoms of strep throat for an adult
3,412886,is ilovemakonnen ovo
4,532152,uneven chest color


In [5]:
train_qrels.head()

Unnamed: 0,query-id,corpus-id,score
0,597651,49,1
1,88585,17272,1
2,508811,28092,1
3,412886,56458,1
4,532152,73237,1


## ✋ TODO

### Preprocessing 1: Change Column Name

In [6]:
# TODO: preprocess data according to needs
collections = collections.rename(columns={'id':'docno'})
collections.head()

Unnamed: 0,docno,text
0,49,"Colorâurine can be a variety of colors, most..."
1,12913,Rio de Janeiro: Annual Weather Averages. Febru...
2,14964,The judiciary (also known as the judicial syst...
3,17272,Painless swelling of the feet and ankles is a ...
4,18352,"Later that day, the National Hurricane Center ..."


In [7]:
queries = queries.rename(columns={'id':'qid', 'text':'query'})
queries.head()

Unnamed: 0,qid,query
0,597651,what color is amber urine
1,88585,causes of swollen ankles and feet
2,508811,symptoms of strep throat for an adult
3,412886,is ilovemakonnen ovo
4,532152,uneven chest color


In [8]:
train_qrels = train_qrels.rename(columns={'query-id':'qid', 'corpus-id':'docno', 'score':'label'})
train_qrels.head()

Unnamed: 0,qid,docno,label
0,597651,49,1
1,88585,17272,1
2,508811,28092,1
3,412886,56458,1
4,532152,73237,1


In [9]:
dev_qrels = dev_qrels.rename(columns={'query-id':'qid', 'corpus-id':'docno', 'score':'label'})
dev_qrels.head()

Unnamed: 0,qid,docno,label
0,174249,7067348,1
1,87892,7069601,1
2,264827,7071066,1
3,206117,7072160,1
4,196232,7072326,1


In [10]:
test_qrels = test_qrels.rename(columns={'query-id':'qid', 'corpus-id':'docno', 'score':'label'})
test_qrels.head()

Unnamed: 0,qid,docno,label
0,19335,1720387,0
1,19335,1871222,0
2,19335,1958102,0
3,19335,2046505,1
4,19335,2186129,0


### Preprocessing 2: convert label to binary

In [11]:
train_qrels.loc[train_qrels['label'] > 1, 'label'] = 1
dev_qrels.loc[dev_qrels['label'] > 1, 'label'] = 1
test_qrels.loc[test_qrels['label'] > 1, 'label'] = 1

In [12]:
print(train_qrels['label'].value_counts())
print(dev_qrels['label'].value_counts())
print(test_qrels['label'].value_counts())

label
1    5535
Name: count, dtype: int64
label
1    1005
Name: count, dtype: int64
label
0    566
1    470
Name: count, dtype: int64


### Preprocessing 3: remove nonalphanumerics and lowercasting

In [13]:
def remove_nonalphanum(text):
  pattern = re.compile('[\W_]+')
  return pattern.sub(' ', text)

def lowercast(text):
  return text.lower()

In [14]:
collections['text'] = collections['text'].apply(remove_nonalphanum)
collections['text'] = collections['text'].apply(lowercast)
collections.head()

Unnamed: 0,docno,text
0,49,colorâ urine can be a variety of colors most o...
1,12913,rio de janeiro annual weather averages februar...
2,14964,the judiciary also known as the judicial syste...
3,17272,painless swelling of the feet and ankles is a ...
4,18352,later that day the national hurricane center n...


In [15]:
queries['query'] = queries['query'].apply(remove_nonalphanum)
queries['query'] = queries['query'].apply(lowercast)
queries.head()

Unnamed: 0,qid,query
0,597651,what color is amber urine
1,88585,causes of swollen ankles and feet
2,508811,symptoms of strep throat for an adult
3,412886,is ilovemakonnen ovo
4,532152,uneven chest color


### Preprocessing 4: Convert docno and qid to string

In [16]:
collections["docno"] = collections["docno"].astype(str)
queries["qid"] = queries["qid"].astype(str)

train_qrels["qid"] = train_qrels["qid"].astype(str)
dev_qrels["qid"] = dev_qrels["qid"].astype(str)
test_qrels["qid"] = test_qrels["qid"].astype(str)

train_qrels["docno"] = train_qrels["docno"].astype(str)
dev_qrels["docno"] = dev_qrels["docno"].astype(str)
test_qrels["docno"] = test_qrels["docno"].astype(str)

### Preprocessing 5: Stemming and Stopwords Removal (done in the `Indexing` code)

### Preprocessing 6: Limit num of queries based on certain rule.

In [17]:
print(train_qrels.shape)
print(dev_qrels.shape)
print(test_qrels.shape)
print(queries.shape)

(5535, 3)
(1005, 3)
(1036, 3)
(6577, 2)


In [18]:
# Verify overlap between queries and qrels
common_train_qids = set(queries['qid']) & set(train_qrels['qid'])
common_val_qids = set(queries['qid']) & set(dev_qrels['qid'])
common_test_qids = set(queries['qid']) & set(test_qrels['qid'])

# # Filter queries to ensure overlap
train_queries = queries[queries['qid'].isin(common_train_qids)]
val_queries = queries[queries['qid'].isin(common_val_qids)]
test_queries = queries[queries['qid'].isin(common_test_qids)]

# Further sampling if needed for efficiency (NOT USED)
train_sample = train_queries
# .sample(n=min(4000, len(train_queries)), random_state=42)
val_sample = val_queries
# .sample(n=min(1000, len(val_queries)), random_state=42)
test_sample = test_queries
# .sample(n=min(250, len(test_queries)), random_state=42)

In [19]:
print(train_queries.shape, val_queries.shape, test_queries.shape)
print(train_sample.shape, val_sample.shape, test_sample.shape)
print(train_qrels.shape, dev_qrels.shape, test_qrels.shape)

(5533, 2) (1001, 2) (43, 2)
(5533, 2) (1001, 2) (43, 2)
(5535, 3) (1005, 3) (1036, 3)


In [20]:
train_topics, dev_topics, test_topics = train_sample, val_sample, test_sample

# Indexing

## ✋ TODO

In [21]:
index_dir = os.path.join(os.getcwd(), "index")

In [22]:
# TODO: adjust indexer

pd_indexer = pt.terrier.DFIndexer(index_dir, \
                          type = pt.index.IndexingType.CLASSIC, \
                          tokeniser = pt.index.TerrierTokeniser('utf'), \
                          stemmer = pt.TerrierStemmer.porter, \
                          stopwords = pt.TerrierStopwords.terrier, \
                          blocks = True, \
                          verbose = True)

index_ref = pd_indexer.index(collections["text"], collections)

  pd_indexer = pt.terrier.DFIndexer(index_dir, \
  0%|          | 0/6992 [00:00<?, ?documents/s]

100%|██████████| 6992/6992 [00:06<00:00, 1087.71documents/s]


In [23]:
index_ref = pt.IndexFactory.of(index_dir)

# Baseline

In [25]:
# Set K=30 sebagai hasil akhir setelah re-rank (SERP)
K = 30

In [28]:
bm25 = pt.terrier.Retriever(index_ref, wmodel = "BM25")

In [29]:
experiment_raw_bm25_dir = os.path.join(os.getcwd(), "result")
result = pt.Experiment(
    [bm25 % K], # TOP K only to make it comparable
    topics=test_topics,
    qrels=test_qrels,
    eval_metrics=["P_30","map","recip_rank","ndcg"],
    names=["BM25"],
    save_dir=experiment_raw_bm25_dir,
    save_mode='overwrite',
    verbose=True,
)

pt.Experiment: 100%|██████████| 1/1 [00:00<00:00,  1.24system/s]


In [30]:
result

Unnamed: 0,name,P_30,map,recip_rank,ndcg
0,BM25,0.270543,0.542489,0.723107,0.691632


# Re-rank

## Feature

### ✋ TODO

In [31]:
# TODO: propose and implement features

# Models (other than BM25)
tfidf = pt.terrier.Retriever(index_ref, wmodel="TF_IDF")
pl2 = pt.terrier.Retriever(index_ref, wmodel="PL2")
bb2 = pt.terrier.Retriever(index_ref, wmodel="BB2")

tfidf_vectorizer = TfidfVectorizer()
tfidf_vectorizer.fit(collections["text"])

def cosine_similarity_feature(doc, query):
    doc_vector = tfidf_vectorizer.transform([doc])
    query_vector = tfidf_vectorizer.transform([query])
    return cosine_similarity(doc_vector, query_vector)[0][0]

def length_ratio_feature(doc, query):
    doc_length = len(doc.split())
    query_length = len(query.split())
    return doc_length / query_length if query_length > 0 else 0

# generate feature
def generate_features(doc, query):
    features = [
        cosine_similarity_feature(doc, query),
        length_ratio_feature(doc, query)
    ]
    return np.array(features)

features = pt.apply.doc_features(lambda row: generate_features(row["text"], row["query"]))

In [32]:
# Get "text" column using `pt.text.get_text(index_ref, "text")`
pipeline = bm25 >> pt.text.get_text(index_ref, "text") >> (features ** bm25 ** tfidf ** pl2 ** bb2)

## Learning-to-Rank

### ✋ TODO

In [35]:
# TODO: perform hyperparameter tuning on the LambdaMART

# Below hyperparameters are taken from the example on PyTerrier documentation:
# https://pyterrier.readthedocs.io/en/latest/ltr.html#gradient-boosted-trees-lambdamart

# XGBoost hyperparameters documentation:
# https://xgboost.readthedocs.io/en/stable/parameter.html

# test using base model
lmart_model = xgb.sklearn.XGBRanker(objective = 'rank:ndcg',
                                    max_depth = '3',
                                    subsample = '0.6',
                                    gamma = '1.0',
                                    verbose = 1,
                                    random_state = 42,
                                    n_jobs=-1,
                                    )

In [36]:
lmart_pipe = pipeline >> pt.ltr.apply_learned_model(lmart_model, form = "ltr")
lmart_pipe.fit(
    train_topics, 
    train_qrels, 
    dev_topics, 
    dev_qrels)

Parameters: { "verbose" } are not used.



In [41]:
with open('lmart_model.pkl', 'wb') as file:
    pickle.dump(lmart_model, file)

## Hyperparameter Tuning

In [82]:
import time

# Define the parameter grid for Grid Search
param_grid = {
    # Learning Parameters
    'subsample' : [0.4, 0.6],
    'gamma': [0.5, 1.0],
    'max_depth': [3]
}

# Initialize variables to store the best model and score
best_model = None
best_score = -1
best_params = None

now = time.time()

# Perform Grid Search
for params in ParameterGrid(param_grid):
    print(f"Testing parameters: {params}")
    lmart_model = xgb.sklearn.XGBRanker(
        objective='rank:ndcg',
        random_state=42,
        n_jobs=-1,
        verbose=2,
        device='cuda',
        **params
    )
    lmart_pipe = pipeline >> pt.ltr.apply_learned_model(lmart_model, form="ltr")
    lmart_pipe.fit(train_topics, train_qrels, dev_topics, dev_qrels)
    
    # Evaluate on validation set
    eval_results = pt.Experiment([lmart_pipe % K], dev_topics, dev_qrels, eval_metrics=["ndcg"], names=["L2R"])
    score = eval_results["ndcg"][0]
    
    # Update the best model if current is better
    if score > best_score:
        best_score = score
        best_model = lmart_pipe
        best_params = params

    print(f"Score: {score}. Best is {best_score} with params {best_params}")
    print(f"time:{int(time.time()-now)}")

Testing parameters: {'gamma': 0.5, 'max_depth': 3, 'subsample': 0.4}


Parameters: { "verbose" } are not used.



Score: 0.8955371784223587. Best is 0.8955371784223587 with params {'gamma': 0.5, 'max_depth': 3, 'subsample': 0.4}
time:6005
Testing parameters: {'gamma': 0.5, 'max_depth': 3, 'subsample': 0.6}


Parameters: { "verbose" } are not used.



Score: 0.8972169310694136. Best is 0.8972169310694136 with params {'gamma': 0.5, 'max_depth': 3, 'subsample': 0.6}
time:12074
Testing parameters: {'gamma': 1.0, 'max_depth': 3, 'subsample': 0.4}


Parameters: { "verbose" } are not used.



Score: 0.8940310131488552. Best is 0.8972169310694136 with params {'gamma': 0.5, 'max_depth': 3, 'subsample': 0.6}
time:18305
Testing parameters: {'gamma': 1.0, 'max_depth': 3, 'subsample': 0.6}


Parameters: { "verbose" } are not used.



Score: 0.8974502623816912. Best is 0.8974502623816912 with params {'gamma': 1.0, 'max_depth': 3, 'subsample': 0.6}
time:24342


In [125]:
print(best_params, best_score)

{'gamma': 1.0, 'max_depth': 3, 'subsample': 0.6} 0.8974502623816912


### Optimization using Optuna (not used, sebagai dokumentasi saja)

In [None]:
# import optuna

# now = time.time()
# def objective(trial):
#     params = {
#         'gamma': trial.suggest_float("gamma", 0.0, 0.1),
#         'max_depth': trial.suggest_int("max_depth", 3, 4),
#         'subsample': trial.suggest_float("subsample", 0.4, 0.6),
#         'colsample_bytree': trial.suggest_float("colsample_bytree", 0.6, 1.0),
#         'lambda': trial.suggest_float("lambda", 0.8, 1.2),
#         'alpha': trial.suggest_float("alpha", 0.8, 1.2),
#     }
    
#     lmart_model = xgb.sklearn.XGBRanker(
#         objective='rank:ndcg',
#         random_state=42,
#         n_jobs=-1,
#         verbose=1,
#         **params
#     )
#     lmart_pipe = pipeline >> pt.ltr.apply_learned_model(lmart_model, form="ltr")
#     lmart_pipe.fit(train_topics, train_qrels, dev_topics, dev_qrels)
    
#     eval_results = pt.Experiment([lmart_pipe % K], dev_topics, dev_qrels, eval_metrics=["ndcg"], names=["L2R"])
#     score = eval_results["ndcg"][0]
    
#     print(f"time:{int(time.time()-now)}")
#     return score

# # Create an Optuna study
# study = optuna.create_study(direction="maximize")
# study.optimize(objective, n_trials=50)

# # Get the best parameters and score
# best_params = study.best_params
# best_score = study.best_value
# print(f"Best parameters: {best_params}")
# print(f"Best score: {best_score}")

[I 2024-11-25 16:06:07,447] A new study created in memory with name: no-name-bf8aaefd-ddc4-48e8-9791-44e48c57b3bb
Parameters: { "verbose" } are not used.

[I 2024-11-25 16:11:15,950] Trial 0 finished with value: 0.889780712576313 and parameters: {'gamma': 0.07529041863109562, 'max_depth': 3, 'subsample': 0.4866770297338993, 'colsample_bytree': 0.7678007085418099, 'lambda': 1.1341187803517139, 'alpha': 0.9390619151576209}. Best is trial 0 with value: 0.889780712576313.


time:308


Parameters: { "verbose" } are not used.

[I 2024-11-25 16:16:13,960] Trial 1 finished with value: 0.8820193322165437 and parameters: {'gamma': 0.09403365053682722, 'max_depth': 4, 'subsample': 0.597264458515771, 'colsample_bytree': 0.9397230791512087, 'lambda': 0.871818856218183, 'alpha': 0.8732424380900944}. Best is trial 0 with value: 0.889780712576313.


time:606


Parameters: { "verbose" } are not used.

[I 2024-11-25 16:21:20,039] Trial 2 finished with value: 0.8935956963698062 and parameters: {'gamma': 0.049541708381048245, 'max_depth': 3, 'subsample': 0.4124214980744642, 'colsample_bytree': 0.7282303118311557, 'lambda': 0.9619513230536485, 'alpha': 0.8791045324846161}. Best is trial 2 with value: 0.8935956963698062.


time:912


Parameters: { "verbose" } are not used.

[I 2024-11-25 16:26:16,253] Trial 3 finished with value: 0.8866147578461632 and parameters: {'gamma': 0.07865365681369148, 'max_depth': 4, 'subsample': 0.44050742224277745, 'colsample_bytree': 0.9691254490550283, 'lambda': 1.1794901640220679, 'alpha': 0.9849464661371194}. Best is trial 2 with value: 0.8935956963698062.


time:1208


Parameters: { "verbose" } are not used.

[I 2024-11-25 16:31:07,169] Trial 4 finished with value: 0.8889570876603313 and parameters: {'gamma': 0.020344988143679167, 'max_depth': 4, 'subsample': 0.544947979317269, 'colsample_bytree': 0.6406241300691647, 'lambda': 1.0936129964913663, 'alpha': 0.9852546185274311}. Best is trial 2 with value: 0.8935956963698062.


time:1499


Parameters: { "verbose" } are not used.

[I 2024-11-25 16:35:59,296] Trial 5 finished with value: 0.8922252107600407 and parameters: {'gamma': 0.01632279775023727, 'max_depth': 3, 'subsample': 0.45316271628925636, 'colsample_bytree': 0.9851365080533618, 'lambda': 1.11440248742133, 'alpha': 1.0567520346145156}. Best is trial 2 with value: 0.8935956963698062.


time:1791


Parameters: { "verbose" } are not used.

[I 2024-11-25 16:40:49,866] Trial 6 finished with value: 0.8849094653907387 and parameters: {'gamma': 0.03712498287714716, 'max_depth': 4, 'subsample': 0.5962411439263138, 'colsample_bytree': 0.8347482321090973, 'lambda': 1.162505011771501, 'alpha': 0.9057435422175167}. Best is trial 2 with value: 0.8935956963698062.


time:2082


Parameters: { "verbose" } are not used.

[I 2024-11-25 16:45:42,907] Trial 7 finished with value: 0.8864452087823471 and parameters: {'gamma': 0.08581980746355304, 'max_depth': 4, 'subsample': 0.5269663338218614, 'colsample_bytree': 0.9426105394610376, 'lambda': 0.8215180390776449, 'alpha': 1.098326847327272}. Best is trial 2 with value: 0.8935956963698062.


time:2375


Parameters: { "verbose" } are not used.

[I 2024-11-25 16:50:40,820] Trial 8 finished with value: 0.8846076586185964 and parameters: {'gamma': 0.09297975112852688, 'max_depth': 4, 'subsample': 0.42276644144205816, 'colsample_bytree': 0.6120014642252265, 'lambda': 1.159947145247931, 'alpha': 1.087328920062565}. Best is trial 2 with value: 0.8935956963698062.


time:2673


Parameters: { "verbose" } are not used.

[I 2024-11-25 16:55:25,191] Trial 9 finished with value: 0.8915596650735998 and parameters: {'gamma': 0.0867554482864213, 'max_depth': 3, 'subsample': 0.4474173280899536, 'colsample_bytree': 0.7461432783245354, 'lambda': 1.1101878796084512, 'alpha': 1.1237120404337118}. Best is trial 2 with value: 0.8935956963698062.


time:2957


Parameters: { "verbose" } are not used.

[I 2024-11-25 16:59:52,005] Trial 10 finished with value: 0.8924193457912137 and parameters: {'gamma': 0.053647007255654806, 'max_depth': 3, 'subsample': 0.40607344587121635, 'colsample_bytree': 0.6947765419046783, 'lambda': 0.9576375595777615, 'alpha': 0.8068726152131747}. Best is trial 2 with value: 0.8935956963698062.


time:3224


Parameters: { "verbose" } are not used.

[I 2024-11-25 17:04:16,227] Trial 11 finished with value: 0.8869823915094998 and parameters: {'gamma': 0.05503994650460371, 'max_depth': 3, 'subsample': 0.40177623745864816, 'colsample_bytree': 0.6941540164063025, 'lambda': 0.960632383162038, 'alpha': 0.8018267786510536}. Best is trial 2 with value: 0.8935956963698062.


time:3488


Parameters: { "verbose" } are not used.

[I 2024-11-25 17:08:49,326] Trial 12 finished with value: 0.8866475998589606 and parameters: {'gamma': 0.05375022409447587, 'max_depth': 3, 'subsample': 0.4006296247341979, 'colsample_bytree': 0.8440071606044135, 'lambda': 0.9814801506540765, 'alpha': 0.8113266891769177}. Best is trial 2 with value: 0.8935956963698062.


time:3761


Parameters: { "verbose" } are not used.

[I 2024-11-25 17:13:33,280] Trial 13 finished with value: 0.8915813097582519 and parameters: {'gamma': 0.03801461836421215, 'max_depth': 3, 'subsample': 0.4762234848777273, 'colsample_bytree': 0.699323208041726, 'lambda': 0.9229534499647616, 'alpha': 1.189978885441411}. Best is trial 2 with value: 0.8935956963698062.


time:4045


Parameters: { "verbose" } are not used.

[I 2024-11-25 17:18:15,264] Trial 14 finished with value: 0.8885820132869697 and parameters: {'gamma': 0.06780554057720853, 'max_depth': 3, 'subsample': 0.423627050407238, 'colsample_bytree': 0.6851635916041655, 'lambda': 1.0462248962233252, 'alpha': 0.8489094418606199}. Best is trial 2 with value: 0.8935956963698062.


time:4327


Parameters: { "verbose" } are not used.

[I 2024-11-25 17:23:06,016] Trial 15 finished with value: 0.8887165578891251 and parameters: {'gamma': 0.035686383321057384, 'max_depth': 3, 'subsample': 0.510071407682939, 'colsample_bytree': 0.7966469467661914, 'lambda': 1.0315771887548841, 'alpha': 0.912629319850099}. Best is trial 2 with value: 0.8935956963698062.


time:4618


Parameters: { "verbose" } are not used.

[I 2024-11-25 17:27:48,549] Trial 16 finished with value: 0.8916095983388908 and parameters: {'gamma': 0.06439227948740324, 'max_depth': 3, 'subsample': 0.46894893018666, 'colsample_bytree': 0.7323335859831831, 'lambda': 0.9174714990952891, 'alpha': 0.8619932646024123}. Best is trial 2 with value: 0.8935956963698062.


time:4901


Parameters: { "verbose" } are not used.

[I 2024-11-25 17:32:27,173] Trial 17 finished with value: 0.8928033357705235 and parameters: {'gamma': 0.045489850005002164, 'max_depth': 3, 'subsample': 0.41923965722255957, 'colsample_bytree': 0.6508640260508862, 'lambda': 1.0256925044520062, 'alpha': 0.833355922128047}. Best is trial 2 with value: 0.8935956963698062.


time:5179


Parameters: { "verbose" } are not used.

[I 2024-11-25 17:37:03,031] Trial 18 finished with value: 0.8883932137877503 and parameters: {'gamma': 0.008250277833892594, 'max_depth': 3, 'subsample': 0.4274941652556027, 'colsample_bytree': 0.6407695609810831, 'lambda': 1.0278207689343961, 'alpha': 0.9565368757232138}. Best is trial 2 with value: 0.8935956963698062.


time:5455


Parameters: { "verbose" } are not used.

[I 2024-11-25 17:41:44,276] Trial 19 finished with value: 0.891414651132983 and parameters: {'gamma': 0.027476217077300035, 'max_depth': 3, 'subsample': 0.45707438896844294, 'colsample_bytree': 0.881047217670224, 'lambda': 1.0529157132586293, 'alpha': 1.0304191806139755}. Best is trial 2 with value: 0.8935956963698062.


time:5736


Parameters: { "verbose" } are not used.

[I 2024-11-25 17:46:37,840] Trial 20 finished with value: 0.8896923849676994 and parameters: {'gamma': 0.04418771115524942, 'max_depth': 3, 'subsample': 0.5105355993327091, 'colsample_bytree': 0.6087551686223582, 'lambda': 0.8803288252821716, 'alpha': 0.8939338726624969}. Best is trial 2 with value: 0.8935956963698062.


time:6030


Parameters: { "verbose" } are not used.

[I 2024-11-25 17:51:12,274] Trial 21 finished with value: 0.8902526708245767 and parameters: {'gamma': 0.05971483236657881, 'max_depth': 3, 'subsample': 0.41517723217511465, 'colsample_bytree': 0.6681744963472656, 'lambda': 0.9744367402399429, 'alpha': 0.8357209736725456}. Best is trial 2 with value: 0.8935956963698062.


time:6304


Parameters: { "verbose" } are not used.

[I 2024-11-25 17:55:59,675] Trial 22 finished with value: 0.8880677460820635 and parameters: {'gamma': 0.046799194798629096, 'max_depth': 3, 'subsample': 0.40035591481836263, 'colsample_bytree': 0.7236556124200597, 'lambda': 0.9494424629723921, 'alpha': 0.8285727598332239}. Best is trial 2 with value: 0.8935956963698062.


time:6592


Parameters: { "verbose" } are not used.

[I 2024-11-25 18:00:57,321] Trial 23 finished with value: 0.8908729896816157 and parameters: {'gamma': 0.047607499602570796, 'max_depth': 3, 'subsample': 0.4357952388178742, 'colsample_bytree': 0.7813982381966206, 'lambda': 0.9211302986466746, 'alpha': 0.8701774781057223}. Best is trial 2 with value: 0.8935956963698062.


time:6889


Parameters: { "verbose" } are not used.

[I 2024-11-25 18:05:42,885] Trial 24 finished with value: 0.8903617552471951 and parameters: {'gamma': 0.02864037546232906, 'max_depth': 3, 'subsample': 0.41722135298121604, 'colsample_bytree': 0.6543805379585622, 'lambda': 1.0186775357893412, 'alpha': 0.8001156403370072}. Best is trial 2 with value: 0.8935956963698062.


time:7175


Parameters: { "verbose" } are not used.

[I 2024-11-25 18:10:37,061] Trial 25 finished with value: 0.8911287919514832 and parameters: {'gamma': 0.06993700366943476, 'max_depth': 3, 'subsample': 0.43589533051920293, 'colsample_bytree': 0.711358947937192, 'lambda': 1.0018367924421696, 'alpha': 0.9375353293889743}. Best is trial 2 with value: 0.8935956963698062.


time:7469


Parameters: { "verbose" } are not used.

[I 2024-11-25 18:15:32,891] Trial 26 finished with value: 0.8870044727433235 and parameters: {'gamma': 0.0574855659189064, 'max_depth': 3, 'subsample': 0.5625482344344384, 'colsample_bytree': 0.7547511241679895, 'lambda': 1.073581200867327, 'alpha': 0.837610447690007}. Best is trial 2 with value: 0.8935956963698062.


time:7765


Parameters: { "verbose" } are not used.

[I 2024-11-25 18:20:05,817] Trial 27 finished with value: 0.8924886203186395 and parameters: {'gamma': 0.043200191529945725, 'max_depth': 3, 'subsample': 0.41331370148720736, 'colsample_bytree': 0.6684916384278765, 'lambda': 0.8792420847512357, 'alpha': 0.8911343753854778}. Best is trial 2 with value: 0.8935956963698062.


time:8038


Parameters: { "verbose" } are not used.

[I 2024-11-25 18:24:33,234] Trial 28 finished with value: 0.8913930347354004 and parameters: {'gamma': 0.040444352834972296, 'max_depth': 3, 'subsample': 0.4638765297759703, 'colsample_bytree': 0.6652900005484431, 'lambda': 0.8035487690346431, 'alpha': 0.8866273746534374}. Best is trial 2 with value: 0.8935956963698062.


time:8305


Parameters: { "verbose" } are not used.

[I 2024-11-25 18:29:15,752] Trial 29 finished with value: 0.8862772141912449 and parameters: {'gamma': 0.02924037402120406, 'max_depth': 3, 'subsample': 0.4725265341267726, 'colsample_bytree': 0.7712731133596313, 'lambda': 0.8640542188483418, 'alpha': 0.9355914429199966}. Best is trial 2 with value: 0.8935956963698062.


time:8588


Parameters: { "verbose" } are not used.

[I 2024-11-25 18:34:13,594] Trial 30 finished with value: 0.8917685757947525 and parameters: {'gamma': 0.00305678476282447, 'max_depth': 3, 'subsample': 0.49517114833238696, 'colsample_bytree': 0.6360247809772948, 'lambda': 0.8991234827423548, 'alpha': 0.9610717697976008}. Best is trial 2 with value: 0.8935956963698062.


time:8886


Parameters: { "verbose" } are not used.

[I 2024-11-25 18:39:02,606] Trial 31 finished with value: 0.8922308631778515 and parameters: {'gamma': 0.051034357312537706, 'max_depth': 3, 'subsample': 0.40957481401423923, 'colsample_bytree': 0.6845122971235406, 'lambda': 0.9421299462554187, 'alpha': 0.8264110799095685}. Best is trial 2 with value: 0.8935956963698062.


time:9175


Parameters: { "verbose" } are not used.

[I 2024-11-25 18:43:44,887] Trial 32 finished with value: 0.8884869044550215 and parameters: {'gamma': 0.061036540730364026, 'max_depth': 3, 'subsample': 0.43095677549260086, 'colsample_bytree': 0.7204023557669583, 'lambda': 0.8413204635627014, 'alpha': 0.8675107632882035}. Best is trial 2 with value: 0.8935956963698062.


time:9457


Parameters: { "verbose" } are not used.

[I 2024-11-25 18:48:11,721] Trial 33 finished with value: 0.8907109188182265 and parameters: {'gamma': 0.07223395825092305, 'max_depth': 3, 'subsample': 0.41331156217207476, 'colsample_bytree': 0.6243845221388199, 'lambda': 0.99281827518381, 'alpha': 0.9163125351948176}. Best is trial 2 with value: 0.8935956963698062.


time:9724


Parameters: { "verbose" } are not used.

[I 2024-11-25 18:52:41,900] Trial 34 finished with value: 0.8839981703596455 and parameters: {'gamma': 0.043669829081835254, 'max_depth': 4, 'subsample': 0.4499819161323232, 'colsample_bytree': 0.671272535306348, 'lambda': 0.8571172102125768, 'alpha': 0.8521073535086704}. Best is trial 2 with value: 0.8935956963698062.


time:9994


Parameters: { "verbose" } are not used.

[I 2024-11-25 18:57:32,474] Trial 35 finished with value: 0.8889885040364006 and parameters: {'gamma': 0.07745884753638223, 'max_depth': 3, 'subsample': 0.4402786318758176, 'colsample_bytree': 0.654076833855679, 'lambda': 0.8958218989100016, 'alpha': 0.8870346499059617}. Best is trial 2 with value: 0.8935956963698062.


time:10285


Parameters: { "verbose" } are not used.

[I 2024-11-25 19:02:05,871] Trial 36 finished with value: 0.8900058538245296 and parameters: {'gamma': 0.05074458551612035, 'max_depth': 3, 'subsample': 0.4101023274499877, 'colsample_bytree': 0.7467713658945296, 'lambda': 1.003033959722492, 'alpha': 1.0067971280503982}. Best is trial 2 with value: 0.8935956963698062.


time:10558


Parameters: { "verbose" } are not used.

[I 2024-11-25 19:06:37,271] Trial 37 finished with value: 0.8884400951330773 and parameters: {'gamma': 0.019046454899208854, 'max_depth': 4, 'subsample': 0.42451997616811055, 'colsample_bytree': 0.8247956884586731, 'lambda': 1.071084464048554, 'alpha': 0.821734409676334}. Best is trial 2 with value: 0.8935956963698062.


time:10829


Parameters: { "verbose" } are not used.

[I 2024-11-25 19:11:09,198] Trial 38 finished with value: 0.8919621296727842 and parameters: {'gamma': 0.03207641854742212, 'max_depth': 3, 'subsample': 0.4426367845000071, 'colsample_bytree': 0.6026187520793946, 'lambda': 0.9631003183860665, 'alpha': 0.9198457159362516}. Best is trial 2 with value: 0.8935956963698062.


time:11101


Parameters: { "verbose" } are not used.

[I 2024-11-25 19:15:56,743] Trial 39 finished with value: 0.8839645190000195 and parameters: {'gamma': 0.04262174859656328, 'max_depth': 4, 'subsample': 0.5806745664437769, 'colsample_bytree': 0.7043834229718762, 'lambda': 0.9362097988107502, 'alpha': 0.8782099602769482}. Best is trial 2 with value: 0.8935956963698062.


time:11389


Parameters: { "verbose" } are not used.

[I 2024-11-25 19:20:35,182] Trial 40 finished with value: 0.8892792847473551 and parameters: {'gamma': 0.023354997045986647, 'max_depth': 3, 'subsample': 0.4845202013082136, 'colsample_bytree': 0.6293539488539364, 'lambda': 0.8967519694984354, 'alpha': 0.9722328313992379}. Best is trial 2 with value: 0.8935956963698062.


time:11667


Parameters: { "verbose" } are not used.

[I 2024-11-25 19:25:16,022] Trial 41 finished with value: 0.8896890259091856 and parameters: {'gamma': 0.05360249243800933, 'max_depth': 3, 'subsample': 0.409820573506272, 'colsample_bytree': 0.6832525182156554, 'lambda': 0.9433127314971955, 'alpha': 0.8237433559915595}. Best is trial 2 with value: 0.8935956963698062.


time:11948


Parameters: { "verbose" } are not used.

[I 2024-11-25 19:29:50,375] Trial 42 finished with value: 0.8917638808355989 and parameters: {'gamma': 0.04957803668203426, 'max_depth': 3, 'subsample': 0.40822690224175323, 'colsample_bytree': 0.688918024810604, 'lambda': 0.9737103757610274, 'alpha': 0.8483777570625753}. Best is trial 2 with value: 0.8935956963698062.


time:12222


Parameters: { "verbose" } are not used.

[I 2024-11-25 19:34:29,155] Trial 43 finished with value: 0.8890569778338729 and parameters: {'gamma': 0.06428228204539986, 'max_depth': 3, 'subsample': 0.41968000790027293, 'colsample_bytree': 0.7354939349012053, 'lambda': 0.9377145579117486, 'alpha': 0.8129781795336583}. Best is trial 2 with value: 0.8935956963698062.


time:12501


Parameters: { "verbose" } are not used.

[I 2024-11-25 19:39:09,924] Trial 44 finished with value: 0.8902173170197282 and parameters: {'gamma': 0.0339963614319902, 'max_depth': 3, 'subsample': 0.4314294989169092, 'colsample_bytree': 0.6436447307965819, 'lambda': 0.84483458492262, 'alpha': 0.8418023722244468}. Best is trial 2 with value: 0.8935956963698062.


time:12782


Parameters: { "verbose" } are not used.

[I 2024-11-25 19:43:32,424] Trial 45 finished with value: 0.8905915698630429 and parameters: {'gamma': 0.052778752108015616, 'max_depth': 3, 'subsample': 0.4056394747962099, 'colsample_bytree': 0.7068369792505909, 'lambda': 0.9935074325877891, 'alpha': 0.8146557330389936}. Best is trial 2 with value: 0.8935956963698062.


time:13044


Parameters: { "verbose" } are not used.

[I 2024-11-25 19:48:05,206] Trial 46 finished with value: 0.8910928023080699 and parameters: {'gamma': 0.03866627711153659, 'max_depth': 3, 'subsample': 0.4203604190553616, 'colsample_bytree': 0.6774135002993975, 'lambda': 0.9579592654568053, 'alpha': 0.898766447612539}. Best is trial 2 with value: 0.8935956963698062.


time:13317


Parameters: { "verbose" } are not used.

[I 2024-11-25 19:52:35,450] Trial 47 finished with value: 0.8897236451159897 and parameters: {'gamma': 0.057953337053414364, 'max_depth': 3, 'subsample': 0.5384137704848473, 'colsample_bytree': 0.7591689235786043, 'lambda': 0.9125236376216808, 'alpha': 0.8577950340185779}. Best is trial 2 with value: 0.8935956963698062.


time:13588


Parameters: { "verbose" } are not used.

[I 2024-11-25 19:57:15,129] Trial 48 finished with value: 0.8858792108523451 and parameters: {'gamma': 0.047747525854816675, 'max_depth': 3, 'subsample': 0.40057850672414885, 'colsample_bytree': 0.8893331495547188, 'lambda': 1.0086463759089521, 'alpha': 0.8772117953431369}. Best is trial 2 with value: 0.8935956963698062.


time:13867


Parameters: { "verbose" } are not used.

[I 2024-11-25 20:02:01,079] Trial 49 finished with value: 0.8824619835829111 and parameters: {'gamma': 0.08397478809546507, 'max_depth': 4, 'subsample': 0.45380642565779505, 'colsample_bytree': 0.6533208035238253, 'lambda': 0.8807013227732342, 'alpha': 1.1342702853357063}. Best is trial 2 with value: 0.8935956963698062.


time:14153
Best parameters: {'gamma': 0.049541708381048245, 'max_depth': 3, 'subsample': 0.4124214980744642, 'colsample_bytree': 0.7282303118311557, 'lambda': 0.9619513230536485, 'alpha': 0.8791045324846161}
Best score: 0.8935956963698062


# Evaluation


## ✋ TODO

In [43]:
final_eval_result_dir = os.path.join(os.getcwd(), "result_final")
eval_results = pt.Experiment([bm25 % K, lmart_pipe % K],
                            topics=test_queries,
                            qrels=test_qrels,
                            eval_metrics=["P_30","map","recip_rank","ndcg"],
                            names=["BM25", "L2R"],
                            baseline=0,
                            save_dir=final_eval_result_dir,
                            save_mode="overwrite",
                            verbose=True,
                            )

pt.Experiment: 100%|██████████| 2/2 [00:28<00:00, 14.09s/system]


In [44]:
eval_results

Unnamed: 0,name,map,recip_rank,P_30,ndcg,map +,map -,map p-value,recip_rank +,recip_rank -,recip_rank p-value,P_30 +,P_30 -,P_30 p-value,ndcg +,ndcg -,ndcg p-value
0,BM25,0.542489,0.723107,0.270543,0.691632,,,,,,,,,,,,
1,L2R,0.559806,0.741711,0.27907,0.709557,23.0,15.0,0.613267,7.0,5.0,0.648245,5.0,1.0,0.124827,24.0,14.0,0.505503


In [48]:
with open('lmart_model_tp4.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

In [49]:
pipe_new = pipeline >> pt.ltr.apply_learned_model(loaded_model, form = "ltr")

In [50]:
final_eval_result_dir = os.path.join(os.getcwd(), "result_final")
eval_results = pt.Experiment([bm25 % K, pipe_new % K],
                            topics=test_queries,
                            qrels=test_qrels,
                            eval_metrics=["P_30","map","recip_rank","ndcg"],
                            names=["BM25", "L2R"],
                            baseline=0,
                            save_dir=final_eval_result_dir,
                            save_mode="overwrite",
                            verbose=True,
                            )
eval_results

pt.Experiment: 100%|██████████| 2/2 [00:30<00:00, 15.30s/system]


Unnamed: 0,name,map,recip_rank,P_30,ndcg,map +,map -,map p-value,recip_rank +,recip_rank -,recip_rank p-value,P_30 +,P_30 -,P_30 p-value,ndcg +,ndcg -,ndcg p-value
0,BM25,0.542489,0.723107,0.270543,0.691632,,,,,,,,,,,,
1,L2R,0.559806,0.741711,0.27907,0.709557,23.0,15.0,0.613267,7.0,5.0,0.648245,5.0,1.0,0.124827,24.0,14.0,0.505503
