# Best-Feature Calibration

In [591]:
!pip install pyltr
!pip install more_itertools



In [592]:
import warnings
import pyltr
warnings.filterwarnings('ignore')
from sklearn.cluster import KMeans
import numpy as np
from more_itertools import sort_together
from sklearn.metrics import ndcg_score
from sklearn.metrics import label_ranking_average_precision_score

## Find best feature

In [593]:
# input array of qids of each query-document pair
# output sorted qids 
def get_qids(Qids):
    qs = list(set(Qids))
    qs.sort()
    
    return qs

In [594]:
def compute_MRR(relevance_list, feature_list,):
    # sort arrays by the feature value
    feature_list, relevance_list = (list(t) for t in zip(*sorted(zip(feature_list, relevance_list), reverse = True)))
    # MRR@100, only look at top 100
    if len(feature_list) > 100:
        feature_list = feature_list[0:100]
        relevance_list = relevance_list[0:100]

    idx = np.where(relevance_list)[0]#get the indexesof relevant document
    if any(idx):
        return 1/(idx[0]+1) # index starts from 0 so add 1
    else:
        return 0

In [595]:
## input training data
## output the index of best feature according to ndcg@5
def find_best_feature(TX, Ty, Tqids):
    # get a list of qids
    tqs = get_qids(Tqids)
    
    ndcg_features = np.empty((0,np.size(tqs)), float) # store ndcg@5 of each qid for each feature

    # iterate over features
    for feature_no in range(np.size(TX, 1)):
        single_feature = [np.array(i[feature_no]) for i in TX]
        ndcg_qs = np.array([])
        # iterate over qid
        for i in tqs: 
            idxs = np.where(Tqids == i)[0]
            feature_list = [single_feature[idx] for idx in idxs] # target score
            relevance_list = [Ty[idx] for idx in idxs] # true score
            # get the evaluation score for the query
            score = ndcg_score(np.asarray([relevance_list]), np.asarray([feature_list]), k=5)
            ndcg_qs = np.append(ndcg_qs, [score], axis = 0)

        ndcg_features = np.append(ndcg_features, [ndcg_qs], axis = 0)
            
    # get the average ndcg score for each feature
    feature_avg_score = np.mean(ndcg_features, axis=1)
    
    # return index of the feature with highest score
    return np.argmax(feature_avg_score)

In [596]:
## input evaluation data and index of best feature from training 
## output an array of ndcg score for each qids
def evaluate_BFC_NDCG(EX, Ey, Eqids, best_feature):
    
    # get a list of qids
    eqs = get_qids(Eqids)
    
    EX_best_feature = [np.array(i[best_feature]) for i in EX]
    ndcg_qs = np.array([])
    # iterate over qid
    for i in eqs: 
        idxs = np.where(Eqids == i)[0]
        feature_list = [EX_best_feature[idx] for idx in idxs] # target score
        relevance_list = [Ey[idx] for idx in idxs] # true score
        # get the evaluation score for the query
        score = ndcg_score(np.asarray([relevance_list]), np.asarray([feature_list]), k=5)
        ndcg_qs = np.append(ndcg_qs, [score], axis = 0)

    return ndcg_qs
    

In [597]:
## input evaluation data and index of best feature from training 
## output an array of MRR score for each qids
def evaluate_BFC_MRR(EX, Ey, Eqids, best_feature):
    
    # get a list of qids
    eqs = get_qids(Eqids)
    
    EX_best_feature = [np.array(i[best_feature]) for i in EX]
    mrr_qs = np.array([])
    # iterate over qid
    for i in eqs: 
        idxs = np.where(Eqids == i)[0]
        feature_list = [EX_best_feature[idx] for idx in idxs] # target score
        relevance_list = [Ey[idx] > 0 for idx in idxs] # true score, convert a boolean list (relevant or non relevant)
        # get the evaluation score for the query
        score = compute_MRR(relevance_list, feature_list)
        mrr_qs = np.append(mrr_qs, [score], axis = 0)

    return mrr_qs
    

In [598]:
direc_name = './MQ2007/Fold'
train_name = '/train.txt'
valid_name = '/vali.txt'
test_name = '/test.txt'

In [599]:
best_features = np.array([]) # store best features for each fold
qids = np.array([]) # store all the qids
ndcg_qs = np.array([]) # store all ndcg scores
mrr_qs = np.array([]) # store all mrr scores

for fold_no in range(5):
    with open(direc_name + str(fold_no+1) + train_name) as trainfile, \
             open(direc_name + str(fold_no+1) + valid_name) as valifile, \
             open(direc_name + str(fold_no+1) + test_name) as evalfile:
        TX, Ty, Tqids, _ = pyltr.data.letor.read_dataset(trainfile)
        VX, Vy, Vqids, _ = pyltr.data.letor.read_dataset(valifile)
        EX, Ey, Eqids, _ = pyltr.data.letor.read_dataset(evalfile)
    
    Tqids = np.array([int(i) for i in Tqids])
    Vqids = np.array([int(i) for i in Vqids])
    Eqids = np.array([int(i) for i in Eqids])
    
    e_qids = np.array(get_qids(Eqids))
    
    best_feature = find_best_feature(TX, Ty, Tqids)
    eval_ndcg = evaluate_BFC_NDCG(EX, Ey, Eqids, best_feature)
    eval_mrr = evaluate_BFC_MRR(EX, Ey, Eqids, best_feature)
    
    best_features = np.append(best_features, [best_feature])
    qids = np.append(qids, e_qids)
    ndcg_qs = np.append(ndcg_qs, eval_ndcg)
    mrr_qs = np.append(mrr_qs, eval_mrr)

In [587]:
best_features

array([38.])

In [589]:
np.mean(ndcg_qs)

0.4297203061551881

In [590]:
np.mean(mrr_qs)

0.5502663262322336