### get info using bs id

In [4]:
def query_dataservice_bs_id(url, bs_id, bs_attrs, pt_attrs, dx_attrs):
    bs_id = bs_id.rstrip('\n')
    bs_url = url + '/biospecimens/' + bs_id
    # sys.stderr.write(bs_url + '\n')
    bs_info = request('GET', bs_url)
    result = []
    if bs_info.json()['_status']['code'] == 404:
        result.append(bs_info.json()['_status']['message'])
        sys.stderr.write(bs_id + ' not found!\n')
        return bs_id, result
    dx_url = url + bs_info.json()['_links']['diagnoses']
    dx_dict = {}
    dx_obj = request('GET', dx_url) if len(dx_attrs) > 0 else 'NoDX'
    # dir(bs_info)
    pt_url = bs_info.json()['_links']['participant']
    pt_info = request('GET', url + pt_url)
    result.append(pt_info.json()['results']['kf_id'])
    for attr in bs_attrs:
        # sys.stderr.write(attr + ': ')
        res = bs_info.json()['results'][attr]
        if res is None:
            res = 'NULL'
        # sys.stderr.write(res + '\n')
        result.append(str(res))
    for attr in pt_attrs:
        # sys.stderr.write(attr + ': ')
        res = pt_info.json()['results'][attr]
        if res is None:
            res = 'NULL'
        # sys.stderr.write(res + '\n')
        result.append(res)
    for attr in dx_attrs:
        dx_dict[attr] = []
        for cur_res in dx_obj.json()['results']:
            dx_dict[attr].append(str(cur_res[attr]))
        result.append(','.join(dx_dict[attr]))
                                 
    return bs_id, result
    

### Query dataservice by pt ID

In [5]:
def query_dataservice_pt_id(url, pt_id, bs_attrs, pt_attrs, dx_attrs, out_fh):
    pt_url = url + '/participants/' + pt_id
    # sys.stderr.write(bs_url + '\n')
    pt_info = request('GET', pt_url)
    bs_url = pt_info.json()['_links']['biospecimens']
    bs_list = request('GET', url + bs_url)
    for bs_info in bs_list.json()['results']:
        dx_url = url + bs_info['_links']['diagnoses']
        dx_dict = {}
        dx_obj = request('GET', dx_url) if len(dx_attrs) > 0 else 'NoDX'
        # dir(bs_info)

        result = []
        result.append(bs_info['kf_id'])
        result.append(pt_info.json()['results']['kf_id'])
        
        for attr in bs_attrs:
            # sys.stderr.write(attr + ': ')
            res = bs_info[attr]
            if res is None:
                res = 'NULL'
            # sys.stderr.write(res + '\n')
            result.append(res)
        for attr in pt_attrs:
            # sys.stderr.write(attr + ': ')
            res = pt_info.json()['results'][attr]
            if res is None:
                res = 'NULL'
            # sys.stderr.write(res + '\n')
            result.append(res)
        for attr in dx_attrs:
            dx_dict[attr] = []
            for cur_res in dx_obj.json()['results']:
                dx_dict[attr].append(str(cur_res[attr]))
            result.append(','.join(dx_dict[attr]))
        out_fh.write('\t'.join(result) + '\n')
                                     
    

## set up imports and output file

In [12]:
import sys
from requests import request
import concurrent.futures

fname = '/Users/brownm28/Documents/2018-Oct-23_peddy_run/project_summaries/investigator_reports/schiffman/bs_ids_to_search.txt'
file_list = fname
bs_attrs = ['external_aliquot_id', 'analyte_type', 'source_text_tissue_type', 'composition', 'external_sample_id','age_at_event_days']
pt_attrs = ['external_id', 'gender', 'ethnicity', 'race']
dx_attrs = []
# dx_attrs = ['source_text_diagnosis', 'source_text_tumor_location']
url = 'https://kf-api-dataservice.kidsfirstdrc.org'
out_fh = open('/Users/brownm28/Documents/2018-Oct-23_peddy_run/project_summaries/investigator_reports/schiffman/bs_info.txt', 'w')
out_fh.write('BS_ID\tPT_ID\t' + '\t'.join(bs_attrs) + '\t' + '\t'.join(pt_attrs))
if len(dx_attrs) > 0:
    sys.stdout.write('\t' + '\t'.join(dx_attrs) )
out_fh.write('\n')

    

1

## get by bs id

In [13]:

x = 1
m = 10
th = 40
with concurrent.futures.ThreadPoolExecutor(th) as bs_exec:
    bs_results = {
    bs_exec.submit(query_dataservice_bs_id, url, bs_id, bs_attrs, pt_attrs, dx_attrs): bs_id for bs_id in open(file_list)}
    for bs_result in concurrent.futures.as_completed(bs_results):
        if x % m == 0:
            sys.stderr.write('Processed ' + str(x) + ' bs ids\n')
        (cur_bs_id, bs_info) = bs_result.result()
        out_fh.write(cur_bs_id + '\t' + '\t'.join(bs_info) + '\n')
        x += 1
out_fh.close()
sys.stderr.write('Done!')

Processed 10 bs ids
Processed 20 bs ids
Processed 30 bs ids
Done!

## Get by pt id

In [6]:
out_fh = open('/Users/brownm28/Documents/2018-Aug-15_cbttc_launch/portal_metadata/check_and_balances/audit/PHGG_PATCH/pt_to_patch.txt', 'w')
for pt_id in open(file_list):
    pt_id = pt_id.strip('\n')
    query_dataservice_pt_id(url, pt_id, bs_attrs, pt_attrs, dx_attrs, out_fh)
out_fh.close()
sys.stderr.write('Completed')


## get info using cram - bs id pairs

In [None]:
for cram_id_pair in open(file_list):
    (cram, bs_id) = cram_id_pair.rstrip('\n').split('\t')
    sys.stderr.write(cram + '\t' + bs_id + '\n')
    bs_info = query_dataservice(url, bs_id, bs_attrs, pt_attrs)
    sys.stdout.write(bs_id + '\t' + '\t'.join(bs_info) + '\t' + cram + '\n')