In [1]:
from collections import defaultdict
from operator import itemgetter
from random import shuffle

from res_file_reader import ResFileReader

N_iteration = 10000

In [2]:
def create_permutation_vectors(model):
    vectors = dict()
    for qid in model.keys():
        vectors[qid] = create_permutation_vector(len(model[qid]))

    return vectors


def create_permutation_vector(size):
    vector = [0 if i < size/2 else 1 for i in range(size)]
    shuffle(vector)
    return vector


In [3]:
def map(out, th):
    num_queries = len(out)
    MAP = 0.0
    for qid in out:
        candidates = out[qid]
        # compute the number of relevant docs
        # get a list of precisions in the range(0,th)
        avg_prec = 0
        precisions = []
        num_correct = 0
        for i in xrange(min(th, len(candidates))):
            if candidates[i] == "true":
                num_correct += 1
                precisions.append(num_correct/float((i+1)))

        if precisions:
            avg_prec = sum(precisions)/float(len(precisions))

        MAP += avg_prec
    return MAP / float(num_queries)


In [4]:
def read_res_pred_files(res_fname, pred_fname, format, verbose=False,
                        reranking_th=0.0,
                        ignore_noanswer=False):

    lineReader = ResFileReader(format)
    lineReader_pred = ResFileReader(format)

    ir, svm = defaultdict(list), defaultdict(list)
    conf_matrix = {'true' : {'true' : 0, 'false' : 0}, 'false' : {'true' : 0, 'false' : 0}}
    lineNo = 0
    for line_res, line_pred in zip(open(res_fname), open(pred_fname)):
        lineNo = lineNo + 1
        # Process the line from the res file.
        qid, aid, relevant, ir_score = lineReader.read_line(line_res)
        pred_qid, pred_aid, pred_relevant, pred_score = lineReader_pred.read_line(line_pred)

        if (qid != pred_qid) or (aid != pred_aid):
            print('ERROR: ID mismatch on line ' + str(lineNo) + ':')
            print('in ' + res_fname + ' we have (' + qid + ',' + aid + '),')
            print ('but in ' + pred_fname + ' we have (' + pred_qid + ',' + pred_aid + ')')
            quit()

        if (relevant != 'true') and (relevant != 'false'):
            print('ERROR: wrong label on line ' + str(lineNo) + ' in ' + res_fname + ': "' + relevant + '"')
            print('Allowed values are only "true" and "false"')
            quit()

        if (pred_relevant != 'true') and (pred_relevant != 'false'):
            print('ERROR: wrong label on line ' + str(lineNo) + ' in ' + pred_fname + ': "' + pred_relevant + '"')
            print('Allowed values are only "true" and "false"')
            quit()

        ir[qid].append( (relevant, ir_score, aid) )
        svm[qid].append( (relevant, pred_score, aid) )
        conf_matrix[relevant][pred_relevant] = conf_matrix[relevant][pred_relevant] + 1

    if verbose:
        analyze_file = open(pred_fname + ".analysis", "w")
        info_file = open(pred_fname + ".correctpos", "w")

    # Remove questions that contain no correct answer
    if ignore_noanswer:
        for qid in ir.keys():
            candidates = ir[qid]
            if all(relevant == "false" for relevant,_,_ in candidates):
                del ir[qid]
                del svm[qid]

    for qid in ir:
        # Sort by IR score.
        ir_sorted = sorted(ir[qid], key = itemgetter(1), reverse = True)

        # Sort by SVM prediction score.
        svm_sorted = svm[qid]
        max_score = max([score for rel, score, aid in svm_sorted])
        if max_score >= reranking_th:
            svm_sorted = sorted(svm_sorted, key = itemgetter(1), reverse = True)

        ir[qid] = [rel for rel, score, aid in ir_sorted]
        svm[qid] = [rel for rel, score, aid in svm_sorted]

    if verbose:
        analyze_file.close()
        info_file.close()

    return ir, svm, conf_matrix


In [5]:
def permutation_test(m_1, m_2, true=10):
    vectors = create_permutation_vectors(m_1)
    map_m_1 = map(out=m_1, th=10)
    map_m_2 = map(out=m_2, th=10)
    r_real = map_m_1 - map_m_2
    r_perm = []

    for i in range(N_iteration):
        m_1_p = dict()
        m_2_p = dict()
        for qid in m_1.keys():
            vector = vectors[qid]
            shuffle(vector)
            m_label_per_1 = [a if v == 1 else b for a, b, v in zip(m_2[qid], m_1[qid], vector)]
            m_label_per_2 = [b if v == 1 else a for a, b, v in zip(m_2[qid], m_1[qid], vector)]
            m_1_p[qid] = m_label_per_1
            m_2_p[qid] = m_label_per_2

        map_m_1_p = map(out=m_1_p, th=true)
        map_m_2_p = map(out=m_2_p, th=true)
        r = map_m_1_p - map_m_2_p
        r_perm .append(r)

    bottom_2_5 = int(2.5 / 100 * N_iteration)
    top_2_5 = int((1 - 2.5 / 100) * N_iteration)
    r_perm.sort()
    interval = r_perm[bottom_2_5], r_perm[top_2_5]

    if r_real >= interval[0] and r_real <= interval[1]:
        return False, map_m_1, map_m_2
    else:
        return True, map_m_1, map_m_2


In [6]:
def compare_models(model_1_path, model_2, path, true_path):
    ir1, svm1, conf_matrix = read_res_pred_files(res_fname=true_path, pred_fname=model_1_path, format='trec')
    ir2, svm2, conf_matrix = read_res_pred_files(res_fname=true_path, pred_fname=model_2_path, format='trec')
    
    return permutation_test(svm1, svm2, 10)