# TREC 2019 Precision Medicine

In [4]:
import json
from json2html import *
from IPython.display import HTML
import pandas

import os, sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)
from trec_utils import utils, running, evaluation

In [5]:
config = utils.load_config()
config

{'ELASTIC': 'http://your-elasticsearch-server:9200',
 'ABSTRACTS': '/abstracts',
 'TRIALS': '/trials'}

In [3]:
topics_all = utils.get_topics('./topics/topics2017.xml')
qrels_all = utils.get_qrels('./gold-standard/abstracts.2017.qrels')

# Sample investigation of missing documents in a topic
Note: Remember in the 2019 collection extra abstracts are note considered, so these experiments will give different results than in 2018.

In [4]:
TOPIC = 30
topic = topics_all[(topics_all.topic==TOPIC)]
qrels = utils.qrels_of_topics(qrels_all, topic)
topic

Unnamed: 0,topic,disease,gene,gene1,gene2,gene3,sex,age,age_group
30,30,Pancreatic adenocarcinoma,"RB1, TP53, KRAS",RB1,TP53,KRAS,female,57,aged


In [5]:
run_params = {
    'run_id':'imi_mug_abst',
    'query_template':'submitted/imi_mug_abs1.json',
}

run, run_params = running.run(topic, 'ABSTRACTS', run_params)
results, aggregated = evaluation.evaluate(qrels[(qrels.relev.isin([2]))], run)
aggregated

RUN: imi_mug_abst TOPICS: 1


{'recall_1000': 0.5094, 'ndcg': 0.3908, 'Rprec': 0.2264, 'P_10': 0.2}

Check relevant documents missed and why (for topic 2 for example)

In [6]:
run_set = set(run[(run.TOPIC_NO==TOPIC)].ID)
qrels_set = set(qrels[(qrels.relev.isin([2]))].doc_id)
qrels_set.difference(run_set)

{'10029438',
 '10076772',
 '11102889',
 '11180876',
 '11753042',
 '12174924',
 '12175546',
 '15069678',
 '15473338',
 '15608367',
 '16170025',
 '16995472',
 '1983826',
 '21894048',
 '21945955',
 '23344532',
 '25216706',
 '25823825',
 '26590425',
 '26940582',
 '27183870',
 '27281208',
 '27461834',
 '27571409',
 '27864333',
 '7559078',
 '7642971',
 '7874757',
 '7902444',
 '8178941',
 '8283078',
 '9209954',
 '9241070',
 '9626473',
 '9658319',
 '9661918',
 '9820739',
 '9888666',
 'AACR_2012-1829',
 'AACR_2014-4455',
 'AACR_2015-4741',
 'AACR_2015-5240',
 'AACR_2016-1254',
 'AACR_2016-3018',
 'AACR_2017-1391',
 'ASCO_122516-143',
 'ASCO_170697-176',
 'ASCO_188862-199',
 'ASCO_193758-199',
 'ASCO_33977-65',
 'ASCO_54010-74',
 'ASCO_88462-115'}

# Sample runs

In [9]:
%%time
run_params = {
    'run_id':'DEFAULT_RUN',
    'query_template':'variable/baseline_sex_age.json',
    'disease_tie_breaker':0.4,
    'disease_multi_match_type':'best_fields',
    'disease_boost':1.5,
    'gene_tie_breaker':0.4,
    'gene_multi_match_type':'cross_fields',
    'gene_boost':1
}

run, params = running.run(topics_all, 'ABSTRACTS', run_params)
results, aggregated = evaluation.evaluate(qrels_all, run)

aggregated

RUN: DEFAULT_RUN TOPICS: 30


{'recall_1000': 0.5366, 'ndcg': 0.4731, 'Rprec': 0.2392, 'P_10': 0.4467}

In [10]:
%%time
run_params = {
    'run_id':'DEFAULT_RUN',
    'query_template':'variable/baseline_sex_age2.json',
    'disease_tie_breaker':0.4,
    'disease_multi_match_type':'best_fields',
    'disease_boost':1.5,
    'gene_tie_breaker':0.4,
    'gene_multi_match_type':'cross_fields',
    'gene_boost':1
}

run, params = running.run(topics_all, 'ABSTRACTS', run_params)
results, aggregated = evaluation.evaluate(qrels_all, run)

aggregated

RUN: DEFAULT_RUN TOPICS: 30
CPU times: user 7.19 s, sys: 161 ms, total: 7.35 s
Wall time: 1min 16s


## Experiment and do grid search

In [5]:
experiment_params_grid = {
    'query_template':['variable/baseline_sex_age_all_fields.json'],
    'disease_tie_breaker':[0.4],
    'disease_multi_match_type':['best_fields'],
    'disease_boost':[1.5],
    'gene_tie_breaker':[0.4, 0.5],
    'gene_multi_match_type':['cross_fields'],
    'gene_boost':[1]
}

results = running.experiment(topics_all, qrels_all, 'ABSTRACTS', experiment_params_grid)
results

EXPERIMENT BEGIN: 2018-08-05 17:25:43.713796
RUNS: 2
1
RUN: variable/baseline_sex_age_all_fields.json-0.4-best_fields-1.5-0.4-cross_fields-1 TOPICS: 30
('variable/baseline_sex_age_all_fields.json', 0.5409, 0.5033, 0.2688, '0.4', 'best_fields', '1.5', '0.4', 'cross_fields', '1')
2
RUN: variable/baseline_sex_age_all_fields.json-0.4-best_fields-1.5-0.5-cross_fields-1 TOPICS: 30
('variable/baseline_sex_age_all_fields.json', 0.5375, 0.4833, 0.2703, '0.4', 'best_fields', '1.5', '0.5', 'cross_fields', '1')
EXPERIMENT END: 2018-08-05 17:26:23.565487


Unnamed: 0,template,ndcg,P_10,Rprec,dis_tb,dis_mm_type,dis_b,gene_tb,gene_mm_type,gene_b
0,variable/baseline_sex_age_all_fields.json,0.5409,0.5033,0.2688,0.4,best_fields,1.5,0.4,cross_fields,1
1,variable/baseline_sex_age_all_fields.json,0.5375,0.4833,0.2703,0.4,best_fields,1.5,0.5,cross_fields,1


## Load and split TOPICS and GOLD STANDARD from last year (30 topics)
Also, split them into training, test, and development set.

In [6]:
topics_train, topics_test, topics_dev = utils.split_topics(topics_all)
qrels_train, qrels_test, qrels_dev = utils.split_qrels(qrels_all, topics_train, topics_test, topics_dev)

In [7]:
assert(set([topic for topic in topics_train['topic']])==set([topic for topic in qrels_train['topic']]))
assert(set([topic for topic in topics_test['topic']])==set([topic for topic in qrels_test['topic']]))
assert(set([topic for topic in topics_dev['topic']])==set([topic for topic in qrels_dev['topic']]))

## FIXME: Test run with default_params

In [8]:
run_params = {
    'run_id':'mugctbase1',
    'query_template':'submitted/baseline_sex_age.json',
}

training_run_df, training_run_params = running.run(topics_train, 'ABSTRACTS', run_params)

RUN: mugctbase1 TOPICS: 12


In [9]:
training_results, training_aggregated = evaluation.evaluate(qrels_train, training_run_df)
training_score = pandas.Series(training_aggregated, name='training ' + '(' + str(len(topics_train)) + ' topics)')

In [10]:
test_run_df, test_run_params = running.run(topics_test, 'ABSTRACTS', training_run_params)

RUN: mugctbase1 TOPICS: 9


In [11]:
training_score

P_10           0.4417
Rprec          0.2416
ndcg           0.5008
recall_1000    0.5857
Name: training (12 topics), dtype: float64

In [12]:
test_results, test_aggregated = evaluation.evaluate(qrels_test, test_run_df)
test_score = pandas.Series(test_aggregated, name='test ' + '(' + str(len(topics_test)) + ' topics)')

In [13]:
pandas.DataFrame([training_score, test_score])

Unnamed: 0,P_10,Rprec,ndcg,recall_1000
training (12 topics),0.4417,0.2416,0.5008,0.5857
test (9 topics),0.5556,0.3077,0.5622,0.5921
