In [21]:
import json
import numpy as np
from metric import evaluate, ndcg, average_precision, precision
from collections import defaultdict
from itertools import groupby

In [7]:
def load_judge_file(filepath, scale=int, file_format='ir', reverse=False):
    qd_judge = defaultdict(lambda: defaultdict(lambda: None))
    with open(filepath, 'r') as fp:
        for l in fp:
            if file_format == 'ir':
                q, d, r = l.rstrip().split('\t')
            elif file_format == 'text':
                r, q, d = l.rstrip().split(' ')
            r = scale(r)
            if reverse:
                qd_judge[d][q] = r
            else:
                qd_judge[q][d] = r
    return qd_judge

In [54]:
def rank_judge(json_filename, judgement_filename, file_format='text', reverse=False):
    rel_level = 2
    def relevance_mapper(r):
        if r < 0:
          return 0
        if r >= rel_level:
          return rel_level - 1
        return r
    test_qd_judge = load_judge_file(judgement_filename, file_format=file_format, reverse=reverse)
    for q in test_qd_judge:
        for d in test_qd_judge[q]:
          test_qd_judge[q][d] = relevance_mapper(test_qd_judge[q][d])
    ranks = json.load(open(json_filename, 'r'))
    def dedup(q, docs):
        ndocs = []
        docs_set = set()
        for d in docs:
            if d not in docs_set:
                ndocs.append(d)
                docs_set.add(d)
        return ndocs
    ranks = dict((q, dedup(q, ranks[q]))for q in ranks)
    print('avg #docs per query: {}'.format(np.mean([len(ranks[q]) for q in ranks])))
    for metric, top_k in [('precision', 1), ('precision', 5), ('precision', 10), 
                          ('average_precision', 1), ('average_precision', 5), ('average_precision', 10), ('average_precision', 1000)]:
        scores = evaluate(ranks, test_qd_judge, metric=eval(metric), top_k=top_k)
        avg_score = np.mean(list(scores.values()))
        print('{} {}: {}'.format(metric, top_k, avg_score))

In [55]:
rank_judge('ranking/exp1/ranking.30.json', 'data/apple_small/test.prep.pointwise', 'text', False)

avg #docs per query: 366.8448275862069
precision 1: 0.0603448275862069
precision 5: 0.034482758620689655
precision 10: 0.02758620689655173
average_precision 1: 0.0603448275862069
average_precision 5: 0.08994252873563219
average_precision 10: 0.09903530377668308
average_precision 1000: 0.07631416566679959


In [56]:
rank_judge('../MatchZoo/experiment/apple_small/ranking/tfidf.json', 'data/apple_small/test.prep.pointwise', 'text', False)

avg #docs per query: 366.8448275862069
precision 1: 0.0
precision 5: 0.0
precision 10: 0.0008620689655172415
average_precision 1: 0.0
average_precision 5: 0.0
average_precision 10: 0.0014367816091954023
average_precision 1000: 0.006736864853039645
