In [5]:
import requests
import pandas as pd

# Get percent of phenotype category annotated 
solr_url = 'https://solr-dev.monarchinitiative.org/solr/golr/select'

categories = {
   'UPHENO:0001001': 'Phenotype',
   'UBERON:0001016PHENOTYPE': 'Nervous System Phenotype',
   'UBERON:0000949PHENOTYPE': 'Endocrine System Phenotype',
   'UBERON:0001009PHENOTYPE': 'Circulatory System Phenotype', 
   'UBERON:0001007PHENOTYPE': 'Digestive System Phenotype',
   'UBERON:0002405PHENOTYPE': 'Immune System Phenotype',
   'UBERON:0002416PHENOTYPE': 'Integumental System Phenotype' ,
   'UBERON:0002204PHENOTYPE': 'Musculoskeletal System Phenotype',
   'UBERON:0000990PHENOTYPE': 'Reproductive System Phenotype',
   'UBERON:0001004PHENOTYPE': 'Respiratory System Phenotype',
   'UBERON:0000915PHENOTYPE': 'Thoracic Segment of Trunk Phenotype' 
}
behavior_categories = ['GO:0007610PHENOTYPE', 'NBO:0000313PHENOTYPE', 'NBO:0000313PHENOTYPE',]

params = {
    'q': '*:*',
    'facet.limit': '3000',
    'indent': 'on',
    'facet.pivot': 'object_closure,subject_taxon_label',
    'facet.method': 'enum',
    'fq': ['subject_category:gene',
           'object_category:phenotype'],
    'facet.mincount': '1',
    'rows': '0',
    'facet': 'true',
    'wt': 'json',
    'facet.sort': 'count'
}

solr_request = requests.get(solr_url, params=params)
response = solr_request.json()

# Takes a minute
# Count of g2p assocs
response['response']['numFound']

738306

In [12]:
rows = [field['value'] for field in response['facet_counts']['facet_pivot']['object_closure,subject_taxon_label'][0]['pivot']]
                           
rows

['Drosophila melanogaster',
 'Mus musculus',
 'Homo sapiens',
 'Danio rerio',
 'Caenorhabditis elegans',
 'Bos taurus',
 'Rattus norvegicus',
 'Sus scrofa',
 'Canis lupus familiaris',
 'Gallus gallus',
 'Felis catus',
 'Equus caballus',
 'Ovis aries',
 'Saccharomyces cerevisiae',
 'Capra hircus',
 'Phoenicopterus ruber',
 'Mustela putorius furo',
 'Oryctolagus cuniculus',
 'Numida meleagris',
 'Macaca mulatta',
 'Neovison vison',
 'Oryzias latipes',
 'Meleagris gallopavo',
 'Mesocricetus auratus',
 'Bubalus bubalis',
 'Vulpes vulpes',
 'Acinonyx jubatus',
 'Anser caerulescens caerulescens',
 'Aspidoscelis inornata',
 'Bos grunniens',
 'Chaetodipus intermedius',
 'Coereba flaveola',
 'Coturnix japonica',
 'Escherichia coli',
 'Herpailurus yaguarondi',
 'Holbrookia maculata',
 'Mammuthus primigenius',
 'NCBITaxon:2387',
 'Panthera onca',
 'Panthera tigris tigris',
 'Peromyscus polionotus',
 'Sciurus carolinensis',
 'Ursus americanus',
 'Vulpes lagopus']

In [23]:
facet_table = pd.DataFrame(columns=list(categories.values()), index=rows)


for field in response['facet_counts']['facet_pivot']['object_closure,subject_taxon_label']:
    if field['value'] in categories:
        for species in field['pivot']:
            facet_table.at[species['value'], categories[field['value']]] = species['count']

facet_table

Unnamed: 0,Circulatory System Phenotype,Immune System Phenotype,Thoracic Segment of Trunk Phenotype,Reproductive System Phenotype,Respiratory System Phenotype,Phenotype,Digestive System Phenotype,Nervous System Phenotype,Integumental System Phenotype,Musculoskeletal System Phenotype,Endocrine System Phenotype
Drosophila melanogaster,1916.0,,,10118.0,1752.0,215543,2911.0,31617.0,23680.0,3494.0,186.0
Mus musculus,39245.0,30071.0,12338.0,12617.0,5285.0,205662,13594.0,35215.0,9273.0,35508.0,10519.0
Homo sapiens,27804.0,21892.0,14178.0,8694.0,11426.0,195166,22580.0,51286.0,15906.0,51584.0,12369.0
Danio rerio,8569.0,2534.0,3361.0,249.0,2482.0,52925,4797.0,12757.0,5121.0,7872.0,1604.0
Caenorhabditis elegans,,,,10537.0,,52629,282.0,641.0,34.0,939.0,
Bos taurus,11.0,,2.0,3.0,,141,9.0,33.0,14.0,33.0,3.0
Rattus norvegicus,177.0,40.0,26.0,21.0,7.0,1687,33.0,130.0,18.0,56.0,37.0
Sus scrofa,5.0,6.0,,2.0,,25,,8.0,2.0,5.0,1.0
Canis lupus familiaris,49.0,39.0,13.0,13.0,9.0,487,37.0,195.0,23.0,78.0,12.0
Gallus gallus,1.0,4.0,,2.0,,18,,1.0,3.0,3.0,1.0


In [None]:
facet_table.to_csv("./facet_counts.tsv", sep='\t')