In [1]:
import pyltr
import numpy as np
import pandas as pd

In [2]:
# input array of qids of each query-document pair
# output sorted qids 
def get_qids(Qids):
    qs = list(set(Qids))
    qs.sort()
    
    return qs

def compute_MRR(relevance_list, feature_list):
    # sort arrays by the feature value
    feature_list, relevance_list = (list(t) for t in zip(*sorted(zip(feature_list, relevance_list), reverse = True)))
    # MRR@100, only look at top 100
    if len(feature_list) > 100:
        feature_list = feature_list[0:100]
        relevance_list = relevance_list[0:100]

    idx = np.where(relevance_list)[0]#get the indexesof relevant document
    if any(idx):
        return 1/(idx[0]+1) # index starts from 0 so add 1
    else:
        return 0

In [3]:
def evaluate_BFC_NDCG(EX, Ey, Eqids, model):
    
    # get a list of qids
    eqs = get_qids(Eqids)
    metric = pyltr.metrics.NDCG(k=5)
    
    #EX_best_feature = [np.array(i[best_feature]) for i in EX]
    
    ndcg_qs = np.array([])
    # iterate over qid
    for i in eqs: 
        idxs = np.where(Eqids == i)[0]
        EX_i = [EX[idx] for idx in idxs] 
        relevance_list = [Ey[idx] for idx in idxs] # true score
        
        Epred_i = model.predict(EX_i)
        # get the evaluation score for the query
        #score = ndcg_score(np.asarray([relevance_list]), np.asarray([feature_list]), k=5)
        score = metric.evaluate_preds(i, np.asarray(relevance_list), np.asarray(Epred_i))
        ndcg_qs = np.append(ndcg_qs, [score], axis = 0)

    return ndcg_qs

def evaluate_BFC_MRR(EX, Ey, Eqids, model):
    
    # get a list of qids
    eqs = get_qids(Eqids)
    
    #EX_best_feature = [np.array(i[best_feature]) for i in EX]
    
    mrr_qs = np.array([])
    # iterate over qid
    for i in eqs: 
        idxs = np.where(Eqids == i)[0]
        EX_i = [EX[idx] for idx in idxs]  # target score
        relevance_list = [Ey[idx] > 0 for idx in idxs] # true score, convert a boolean list (relevant or non relevant)
        # get the evaluation score for the query
        
        Epred_i = model.predict(EX_i)
        
        score = compute_MRR(relevance_list, Epred_i)
        mrr_qs = np.append(mrr_qs, [score], axis = 0)

    return mrr_qs

In [4]:
direc_name = './MQ2007/Fold'
train_name = '/train.txt'
test_name = '/test.txt'

In [5]:
qids = np.array([]) # store all the qids
ndcg_qs = np.array([]) # store all ndcg scores
mrr_qs = np.array([]) # store all mrr scores


for fold_no in range(5):
    with open(direc_name + str(fold_no+1) + train_name) as trainfile, \
             open(direc_name + str(fold_no+1) + test_name) as evalfile:
        TX, Ty, Tqids, _ = pyltr.data.letor.read_dataset(trainfile)
        EX, Ey, Eqids, _ = pyltr.data.letor.read_dataset(evalfile)
    
    Tqids = np.array([int(l) for l in Tqids])
    Eqids = np.array([int(l) for l in Eqids])
    
    scorer = pyltr.metrics.NDCG(k=5)
    model = pyltr.models.LambdaMART(metric = scorer, n_estimators=500, verbose=0)
    model.fit(TX, Ty, Tqids)
    
    e_qids = np.array(get_qids(Eqids))
    
    eval_ndcg = evaluate_BFC_NDCG(EX, Ey, Eqids, model)
    eval_mrr = evaluate_BFC_MRR(EX, Ey, Eqids, model)
    
    qids = np.append(qids, e_qids)
    ndcg_qs = np.append(ndcg_qs, eval_ndcg)
    mrr_qs = np.append(mrr_qs, eval_mrr)

In [6]:
qids_df = pd.DataFrame.from_dict(qids)
qids_df.columns = ['Q_ID']
ndcg_df = pd.DataFrame.from_dict(ndcg_qs)
ndcg_df.columns = ['NDCG@5']
mrr_df = pd.DataFrame.from_dict(mrr_qs)
mrr_df.columns = ['MRR@100']

In [8]:
qids_df

Unnamed: 0,Q_ID
0,7968.0
1,7979.0
2,7993.0
3,7995.0
4,8002.0
5,8005.0
6,8013.0
7,8020.0
8,8023.0
9,8024.0


In [10]:
qids_df.to_csv('BL_QIDS.csv', index=False)
ndcg_df.to_csv('BL_NDCG.csv',index=False)
mrr_df.to_csv('BL_MRR.csv',index=False)