In [16]:
import requests
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


SOLR_URL = 'https://solr.monarchinitiative.org/solr/golr/select'

file_filter_map = {
    'gene-disease': [
        'subject_category:"gene"',
        'object_category:"disease"'
    ],
    
    'gene-phenotype': [
        'subject_category:"gene"',
        'object_category:"phenotype"'
    ],
    'disease-phenotype': [
        'subject_category:"disease"',
        'object_category:"phenotype"'
    ],
}

fields = ['subject', 'subject_label', 'subject_taxon', 'subject_taxon_label',
        'object', 'object_label', 'relation', 'relation_label',
        'evidence', 'evidence_label']

solr_params = {
    'wt': 'json',
    'q': '*:*',
    'fl': fields,
    'rows': 1000,
}

for file_name, filt in file_filter_map.items():
    file = open("./{}.tsv".format(file_name), 'w')
    file.write("\t".join(fields) + "\n")
    solr_params['start'] = 0
    solr_params['fq'] = filt
    resultCount = solr_params['rows']

    while solr_params['start'] < resultCount:
        solr_request = requests.get(SOLR_URL, params=solr_params)
        response = solr_request.json()
        resultCount = response['response']['numFound']
    
        for doc in response['response']['docs']:
            for field in fields:
                if field in doc:
                    if isinstance(doc[field], list):
                        file.write("{}\t".format("|".join(doc[field])))
                    else:
                        file.write("{}\t".format(doc[field]))
                else:
                    file.write("\t")
            file.write("\n")
        
        if solr_params['start'] % 100000 == 0:
            logger.info("Processed {} {} documents".format(solr_params['start'], file_name))
        
        solr_params['start'] += solr_params['rows']
    
    file.close()



INFO:__main__:Processed 0 gene-phenotype documents
INFO:__main__:Processed 100000 gene-phenotype documents
INFO:__main__:Processed 200000 gene-phenotype documents
INFO:__main__:Processed 300000 gene-phenotype documents
INFO:__main__:Processed 400000 gene-phenotype documents
INFO:__main__:Processed 500000 gene-phenotype documents
INFO:__main__:Processed 600000 gene-phenotype documents
INFO:__main__:Processed 700000 gene-phenotype documents
INFO:__main__:Processed 0 gene-disease documents
INFO:__main__:Processed 0 disease-phenotype documents
INFO:__main__:Processed 100000 disease-phenotype documents
INFO:__main__:Processed 200000 disease-phenotype documents
