## Cover ID evaluation scripts

In [45]:
%matplotlib inline
from __future__ import division, print_function
import numpy as np

import SHS_data
import evaluation

### Evaluating a query

```Python
def evaluate_query(query, retrieved, correct_uris=None):
"""Evaluate retrieval results for a given query.

    Args:
        query (str): query URI
        retrieved (list): ordered list of top k retrieved documents
            (k can be anything)
        clique_uris (list): list of documents to be found. Set to None
            to look up using ID, pass a list to save i/o time.

    Returns:
        dict: dictionary of results with evaluation metrics as keys.
            currently implemented:
            - ap (average precision)
            - precision at 1
            - recall at 5
"""
...
```

More elegant average precision formulation than currently used (but not very usable):

```Python
precisions = [__precision__(ranks[:i+1], ranks[i]) for i in range(len(ranks))]
```

In [103]:
reload(evaluation)

cliques_by_name, cliques_by_uri = SHS_data.read_cliques()

n_test = 1989
test_clique = cliques_by_name.keys()[n_test]
print('CLIQUE: ' + test_clique)

test_uris = cliques_by_name[test_clique]
test_query, test_relevant = test_uris[0], test_uris[1:]
print('QUERY: ' + test_query)
print('RELEVANT: {}'.format(test_relevant))

test_noise = cliques_by_name.values()[n_test+1]
print('NOISE: {}'.format(test_noise))

test_retrieved = test_noise + test_relevant
print('RETRIEVED: {}'.format(test_retrieved))

res = evaluation.evaluate_query(test_query, test_retrieved)
print('RESULT: {}'.format(res))

CLIQUE: 4457_I've Got A Crush On You
QUERY: TRKVQBG128F427E511
RELEVANT: ['TRQUXMZ128F92D16D2', 'TRQJONS128F428D06E']
NOISE: ['TRQFPOO128F92E27A4', 'TRTTHWG128F92F6F57', 'TRSNEJR128F424E83B', 'TRCJVOO128F4274715']
RETRIEVED: ['TRQFPOO128F92E27A4', 'TRTTHWG128F92F6F57', 'TRSNEJR128F424E83B', 'TRCJVOO128F4274715', 'TRQUXMZ128F92D16D2', 'TRQJONS128F428D06E']
set(['TRKVQBG128F427E511', 'TRQUXMZ128F92D16D2', 'TRQJONS128F428D06E'])
RESULT: {'ap': 0.26666666666666666, 'p1': 0.0, 'r5': 0.33333333333333331}


In [104]:
retrieved = ['TRQFPOO128F92E27A4', 'TRTTHWG128F92F6F57',  # these two are incorrect
             'TRQUXMZ128F92D16D2', 'TRQJONS128F428D06E']  # these two are correct
res = evaluation.evaluate_query(test_query, retrieved)
print('RESULT: {}'.format(res))

set(['TRKVQBG128F427E511', 'TRQUXMZ128F92D16D2', 'TRQJONS128F428D06E'])
RESULT: {'ap': 0.41666666666666663, 'p1': 0.0, 'r5': 0.66666666666666663}


In [109]:
print(set(test_query))
set(cliques_by_name[test_clique]) - set(test_query)

set(['1', 'B', 'E', 'G', 'F', 'K', '4', 'Q', 'R', '5', 'T', '7', 'V', '2', '8'])


{'TRKVQBG128F427E511', 'TRQJONS128F428D06E', 'TRQUXMZ128F92D16D2'}