###### Assignment 3, Evalution

This notebook can be used for evaluating an entity ranking against the ground truth.

In [1]:
import math

BASE_RANKING_FILE = "data/ranking_baseline.csv"
MLM_RANKING_FILE = "data/ranking_model1.csv"
SDM_ELR_RANKING_FILE = "data/ranking_model2.csv"
FSDM_ELR_RANKING_FILE = "data/ranking_model3.csv"
QRELS_FILE = "data/qrels.csv"

## Loading ranking files

In [2]:
def load_rankings(ranking_file):
    rankings = {}
    with open(ranking_file, "r") as fin:
        header = fin.readline().strip()
        if header != "QueryId,EntityId":
            raise Exception("Incorrect file format!")
        for line in fin.readlines():
            qid, _ = line.strip().split(",\"")
            docid = _[:-1]  # remove trailing "
            if qid not in rankings:
                rankings[qid] = []
            rankings[qid].append(docid)
    return rankings

## Loading relevance judgments

In [3]:
def load_qrels(qrels_file):
    qrels = {}
    with open(qrels_file, "r") as fin:
        header = fin.readline().strip()
        if header != "QueryId,EntityId,Relevance":
            raise Exception("Incorrect file format!")
        for line in fin.readlines():
            qid, _ = line.strip().split(",\"")
            docid, rel = _.split("\",")
            if qid not in qrels:
                qrels[qid] = {}
            qrels[qid][docid] = int(rel)
    return qrels

## Computing NDCG scores

In [4]:
def dcg(rel, p):
    dcg = rel[0]
    for i in range(1, min(p, len(rel))): 
        dcg += rel[i] / math.log(i + 1, 2)  # rank position is indexed from 1..
    return dcg

def compute_ndcg(rankings, qrels, k=100):
    sum_ndcg = 0
    for qid, ranking in sorted(rankings.items()):
        gt = qrels[qid]    
        gains = []  # holds corresponding relevance levels for the ranked docs
        for doc_id in ranking[:k]: 
            gain = gt.get(doc_id, 0)
            gains.append(gain)

        # relevance levels of the idealized ranking
        gain_ideal = sorted([v for _, v in gt.items()], reverse=True)

        ndcg = dcg(gains, k) / dcg(gain_ideal, k)
        sum_ndcg += ndcg

    return sum_ndcg / len(rankings)

## Results

In [5]:
qrels = load_qrels(QRELS_FILE)

for ranking in [BASE_RANKING_FILE, MLM_RANKING_FILE, SDM_ELR_RANKING_FILE, FSDM_ELR_RANKING_FILE]:
    rankings = load_rankings(ranking)
    print(f'{ranking}')
    print(f'\tNDCG@10: {compute_ndcg(rankings, qrels, k=10):.4f}')
    print(f'\tNDCG@100: {compute_ndcg(rankings, qrels, k=100):.4f}', end='\n\n')

data/ranking_baseline.csv
	NDCG@10: 0.3791
	NDCG@100: 0.4421

data/ranking_model1.csv
	NDCG@10: 0.3410
	NDCG@100: 0.4114

data/ranking_model2.csv
	NDCG@10: 0.3190
	NDCG@100: 0.3987

data/ranking_model3.csv
	NDCG@10: 0.3424
	NDCG@100: 0.4132

