In [2]:
import requests
import pandas as pd

# Get percent of phenotype category annotated 
solr_url = 'https://solr.monarchinitiative.org/solr/golr/select'

categories = {
   'UPHENO:0001001': 'Phenotype',
   'UBERON:0001016PHENOTYPE': 'Nervous System Phenotype',
   'UBERON:0000949PHENOTYPE': 'Endocrine System Phenotype',
   'UBERON:0001009PHENOTYPE': 'Circulatory System Phenotype', 
   'UBERON:0001007PHENOTYPE': 'Digestive System Phenotype',
   'UBERON:0002405PHENOTYPE': 'Immune System Phenotype',
   'UBERON:0002416PHENOTYPE': 'Integumental System Phenotype' ,
   'UBERON:0002204PHENOTYPE': 'Musculoskeletal System Phenotype',
   'UBERON:0000990PHENOTYPE': 'Reproductive System Phenotype',
   'UBERON:0001004PHENOTYPE': 'Respiratory System Phenotype',
   'UBERON:0000915PHENOTYPE': 'Thoracic Segment of Trunk Phenotype',
   'MP:0005376': 'Homeostasis/Metabolism Phenotype',
   'MP:0005378': 'Growth/Size/Body Region Phenotype '
}
behavior_categories = ['GO:0007610PHENOTYPE', 'NBO:0000313PHENOTYPE', 'NBO:0000313PHENOTYPE',]

params = {
    'q': '*:*',
    'facet.limit': '3000',
    'indent': 'on',
    'facet.pivot': 'object_closure,subject_taxon_label',
    'facet.method': 'enum',
    'fq': ['subject_category:gene',
           'object_category:phenotype'],
    'facet.mincount': '1',
    'rows': '0',
    'facet': 'true',
    'wt': 'json',
    'facet.sort': 'count'
}

solr_request = requests.get(solr_url, params=params)
response = solr_request.json()

# Takes a minute
# Count of g2p assocs
response['response']['numFound']

735814

In [3]:
rows = [field['value'] for field in response['facet_counts']['facet_pivot']['object_closure,subject_taxon_label'][0]['pivot']]
                           
rows

['Drosophila melanogaster',
 'Mus musculus',
 'Homo sapiens',
 'Caenorhabditis elegans',
 'Danio rerio',
 'Bos taurus',
 'Rattus norvegicus',
 'Sus scrofa',
 'Canis lupus familiaris',
 'Gallus gallus',
 'Felis catus',
 'Equus caballus',
 'Ovis aries',
 'Saccharomyces cerevisiae',
 'Capra hircus',
 'Phoenicopterus ruber',
 'Mustela putorius furo',
 'Oryctolagus cuniculus',
 'Numida meleagris',
 'Macaca mulatta',
 'Neovison vison',
 'Oryzias latipes',
 'Meleagris gallopavo',
 'Mesocricetus auratus',
 'Bubalus bubalis',
 'Vulpes vulpes',
 'Acinonyx jubatus',
 'Anser caerulescens caerulescens',
 'Aspidoscelis inornata',
 'Bos grunniens',
 'Chaetodipus intermedius',
 'Coereba flaveola',
 'Coturnix japonica',
 'Escherichia coli',
 'Herpailurus yaguarondi',
 'Holbrookia maculata',
 'Mammuthus primigenius',
 'NCBITaxon:2387',
 'Panthera onca',
 'Panthera tigris tigris',
 'Peromyscus polionotus',
 'Sciurus carolinensis',
 'Ursus americanus',
 'Vulpes lagopus']

In [4]:
facet_table = pd.DataFrame(columns=list(categories.values()), index=rows)


for field in response['facet_counts']['facet_pivot']['object_closure,subject_taxon_label']:
    if field['value'] in categories:
        for species in field['pivot']:
            facet_table.at[species['value'], categories[field['value']]] = species['count']

facet_table

Unnamed: 0,Respiratory System Phenotype,Homeostasis/Metabolism Phenotype,Circulatory System Phenotype,Digestive System Phenotype,Phenotype,Growth/Size/Body Region Phenotype,Integumental System Phenotype,Endocrine System Phenotype,Nervous System Phenotype,Musculoskeletal System Phenotype,Immune System Phenotype,Thoracic Segment of Trunk Phenotype,Reproductive System Phenotype
Drosophila melanogaster,1752.0,,1916.0,2911.0,215543,,23680.0,186.0,31608.0,3494.0,,,10118.0
Mus musculus,5273.0,40398.0,39104.0,13578.0,205323,28168.0,9266.0,10508.0,35186.0,35303.0,30049.0,12237.0,12605.0
Homo sapiens,11299.0,20791.0,27424.0,22462.0,193732,51135.0,15815.0,12292.0,50498.0,51144.0,21759.0,14009.0,8608.0
Caenorhabditis elegans,,9469.0,,282.0,52629,2784.0,34.0,,641.0,939.0,,,10537.0
Danio rerio,2424.0,4537.0,8359.0,4707.0,52207,13403.0,5039.0,1598.0,12541.0,7732.0,2487.0,3277.0,252.0
Bos taurus,,13.0,11.0,9.0,141,33.0,14.0,3.0,33.0,33.0,,2.0,3.0
Rattus norvegicus,7.0,118.0,177.0,33.0,1687,43.0,18.0,37.0,130.0,55.0,40.0,26.0,21.0
Sus scrofa,,5.0,5.0,,25,2.0,2.0,1.0,8.0,5.0,6.0,,2.0
Canis lupus familiaris,9.0,80.0,49.0,37.0,487,96.0,23.0,12.0,195.0,78.0,39.0,13.0,13.0
Gallus gallus,,3.0,1.0,,18,2.0,3.0,1.0,1.0,3.0,4.0,,2.0


In [10]:
ratio_table = pd.DataFrame(columns=list(categories.values()), index=rows)

for index, row in facet_table.iterrows():
    for column in categories.values():
        ratio_table.at[index, column] = row[column]/row['Phenotype']

ratio_table

Unnamed: 0,Respiratory System Phenotype,Homeostasis/Metabolism Phenotype,Circulatory System Phenotype,Digestive System Phenotype,Phenotype,Growth/Size/Body Region Phenotype,Integumental System Phenotype,Endocrine System Phenotype,Nervous System Phenotype,Musculoskeletal System Phenotype,Immune System Phenotype,Thoracic Segment of Trunk Phenotype,Reproductive System Phenotype
Drosophila melanogaster,0.00812831,,0.00888918,0.0135054,1,,0.109862,0.000862937,0.146644,0.0162102,,,0.0469419
Mus musculus,0.0256815,0.196753,0.190451,0.06613,1,0.137189,0.0451289,0.0511779,0.171369,0.171939,0.14635,0.0595988,0.0613911
Homo sapiens,0.0583228,0.107318,0.141556,0.115944,1,0.263947,0.0816334,0.0634485,0.260659,0.263994,0.112315,0.0723112,0.0444325
Caenorhabditis elegans,,0.17992,,0.00535826,1,0.0528986,0.000646032,,0.0121796,0.0178419,,,0.200213
Danio rerio,0.0464306,0.0869041,0.160113,0.0901603,1,0.256728,0.0965196,0.0306089,0.240217,0.148103,0.0476373,0.0627694,0.00482694
Bos taurus,,0.0921986,0.0780142,0.0638298,1,0.234043,0.0992908,0.0212766,0.234043,0.234043,,0.0141844,0.0212766
Rattus norvegicus,0.00414938,0.0699467,0.10492,0.0195614,1,0.025489,0.0106698,0.0219324,0.0770599,0.0326023,0.0237107,0.015412,0.0124481
Sus scrofa,,0.2,0.2,,1,0.08,0.08,0.04,0.32,0.2,0.24,,0.08
Canis lupus familiaris,0.0184805,0.164271,0.100616,0.0759754,1,0.197125,0.0472279,0.0246407,0.400411,0.160164,0.0800821,0.026694,0.026694
Gallus gallus,,0.166667,0.0555556,,1,0.111111,0.166667,0.0555556,0.0555556,0.166667,0.222222,,0.111111


In [11]:
ratio_table.to_csv("./facet_counts.tsv", sep='\t')