# Workflow 2, Module 1A

## Phenotypic similarity

Currently, we don't have gene-phenotype associations.  So we will look for diseases that are phenotypically similar to FA, and then get the genes enriched for those diseases.

### Set up functions

In [7]:
robokop_server = 'robokop.renci.org'

In [13]:
import requests
import pandas as pd
from IPython.core.display import display, HTML


In [9]:
def enrichment(type1,identlist,type2,threshhold=None,maxresults=None,numtype1=None,include_descendants=None,rebuild=None):
    url=f'http://{robokop_server}/api/simple/enriched/{type1}/{type2}'
    params = { 'threshhold': threshhold, 'maxresults': maxresults, 
              'num_type1':numtype1, 'identifiers': identlist, 
              'include_descendants':include_descendants, 'rebuild': rebuild }
    params = { k:v for k,v in params.items() if v is not None }
    response=requests.post(url, json = params)
    print( f'Return Status: {response.status_code}' )
    if response.status_code == 200:
        return response.json()
    return []

In [10]:
def similarity(type1,ident,type2,by_type,threshhold=None,maxresults=None,rebuild=None):
    url=f'http://{robokop_server}/api/simple/similarity/{type1}/{ident}/{type2}/{by_type}'
    params = { 'threshhold': threshhold, 'maxresults': maxresults, 'rebuild': rebuild }
    params = { k:v for k,v in params.items() if v is not None }
    response=requests.get(url, params = params)
    print( 'Return code:',response.status_code )
    return response.json()

### Phenotypic similarity to FA

In [6]:
FA='MONDO:0019391'
similar_diseases = pd.DataFrame(similarity('disease',FA,'disease','phenotypic_feature'))
similar_diseases

Return code: 200


Unnamed: 0,id,name,similarity
0,MONDO:0001713,inherited aplastic anemia,0.741036
1,MONDO:0012197,idiopathic aplastic anemia,0.490765
2,MONDO:0015610,acquired aplastic anemia,0.455882
3,MONDO:0019747,hematological disorder with renal involvement,0.444976


## Genes enriched for those diseases

In [7]:
diseases = list(similar_diseases['id'])
generes = enrichment('disease',diseases,'gene',include_descendants=True)
genes=pd.DataFrame(generes)
genes

Return Status: 200


Unnamed: 0,id,name,p
0,HGNC:11824,TINF2,5.513774e-09
1,HGNC:4824,HBA2,6.675760e-07
2,HGNC:4823,HBA1,6.675760e-07
3,HGNC:4827,HBB,1.209357e-06
4,HGNC:11730,TERT,1.183441e-05
5,HGNC:26054,SLC25A38,3.104781e-05
6,HGNC:8609,PARN,1.042034e-04
7,HGNC:14377,NHP2,1.422655e-04
8,HGNC:48,ABCB7,1.883451e-04
9,HGNC:25070,ACD,1.883451e-04


# Table 4 examples

In [21]:
def execute_example(disease_id,disease_name,nn="n3"):
    display(HTML(f'<h3>{disease_name}</h3>'))
    similar_diseases = pd.DataFrame(similarity('disease',disease_id,'disease','phenotypic_feature'))
    try:  
        diseases = list(similar_diseases['id'])
    except:
        print("No Similar diseases found")
        return
    generes = enrichment('disease',diseases,'gene',include_descendants=True)
    genes=pd.DataFrame(generes)
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):
        display(genes)

In [14]:
execute_example("MONDO:0004979","Asthma")

Return code: 200
Return Status: 200


Unnamed: 0,id,name,p
0,HGNC:1884,CFTR,5.237932e-09
1,HGNC:2950,DNAH5,1.560091e-08
2,HGNC:10602,SCNN1G,2.164233e-07
3,HGNC:10600,SCNN1B,2.164233e-07
4,HGNC:1301,CFAP298,2.555163e-07
5,HGNC:16725,LRRC6,2.555163e-07
6,HGNC:9198,POLR2K,2.555163e-07
7,HGNC:19412,ZMYND10,3.238517e-07
8,HGNC:10599,SCNN1A,3.418649e-07
9,HGNC:29937,CCDC65,3.627174e-07


In [15]:
execute_example("MONDO:0005148","type 2 diabetes mellitus")

Return code: 200
Return Status: 200


Unnamed: 0,id,name,p
0,HGNC:8056,NUDT9,5.690289e-08
1,HGNC:3681,FGF3,1.996731e-06
2,HGNC:5466,IGF2,7.789932e-06
3,HGNC:12762,WFS1,1.331252e-05
4,HGNC:9602,PTGIR,1.343874e-05
5,NCBIGene:105375733,LOC105375733,3.639424e-05
6,HGNC:8086,OAS1,4.982505e-05
7,NCBIGene:105377142,LOC105377142,6.056239e-05
8,HGNC:24212,CISD2,6.483535e-05
9,HGNC:610,APOC3,7.389454e-05


In [22]:
execute_example("MONDO:0005180","parkinson disease")

Return code: 200
No Similar diseases found


In [23]:
execute_example("MONDO:0005311","atherosclerosis")

Return code: 200
Return Status: 200


Unnamed: 0,id,name,p
0,HGNC:9602,PTGIR,5.566768e-07
1,NCBIGene:105375733,LOC105375733,4.61134e-06
2,NCBIGene:105377142,LOC105377142,7.68171e-06
3,HGNC:6667,LPA,8.603177e-06
4,HGNC:10721,SELP,1.177282e-05
5,HGNC:7876,NOS3,1.260743e-05
6,HGNC:1869,CETP,1.48014e-05
7,HGNC:6710,LTA4H,2.147642e-05
8,HGNC:30046,PINX1,2.147642e-05
9,HGNC:6547,LDLR,2.567468e-05


In [24]:
execute_example("MONDO:0004975","Alzheimers disease")

Return code: 200
Return Status: 200


Unnamed: 0,id,name,p
0,HGNC:37,ABCA7,6.238273e-11
1,HGNC:11185,SORL1,3.420273e-10
2,HGNC:9508,PSEN1,3.795295e-10
3,HGNC:1059,BLMH,3.861069e-10
4,HGNC:8624,PAXIP1,3.861069e-10
5,HGNC:9509,PSEN2,1.318731e-09
6,HGNC:582,APBB2,3.401177e-09
7,HGNC:17158,PLD3,4.854269e-09
8,HGNC:620,APP,1.199998e-08
9,HGNC:188,ADAM10,4.695861e-08


In [25]:
execute_example("MONDO:0019600","xeroderma pigmentosum")

Return code: 200
Return Status: 200


Unnamed: 0,id,name,p
0,HGNC:25458,MFSD11,2.054688e-13
1,HGNC:4657,GTF2H3,2.054688e-13
2,HGNC:3434,ERCC2,4.327184e-09
3,HGNC:3436,ERCC4,1.045346e-08
4,HGNC:1594,CCNH,6.429222e-07
5,HGNC:12830,XRCC3,8.650137e-07
6,HGNC:12828,XRCC1,1.020027e-06
7,HGNC:29814,MUS81,1.398077e-06
8,HGNC:21157,GTF2H5,1.593423e-06
9,HGNC:8125,OGG1,2.645064e-06


In [26]:
execute_example("MONDO:0019391","fanconi anemia")

Return code: 200
No Similar diseases found


In [27]:
execute_example("MONDO:0008978","Chordoma")

Return code: 200
Return Status: 200


Unnamed: 0,id,name,p
0,HGNC:24059,TMPRSS11D,0.002089
1,HGNC:28449,C11orf95,0.004175
2,HGNC:12474,UBE2D1,0.004592
3,HGNC:17856,HACL1,0.007089
4,HGNC:7627,NAB2,0.009167
5,HGNC:11340,SS18,0.011241
6,HGNC:11368,STAT6,0.014138
7,HGNC:9277,PPM1D,0.017029
8,HGNC:23720,CREB3L2,0.027298
9,HGNC:11998,TP53,0.029747


In [28]:
execute_example("MONDO:0010041","ARSACS")

Return code: 200
Return Status: 200


Unnamed: 0,id,name,p
0,HGNC:16877,MFN2,0.001278
1,HGNC:1237,UBQLN4,0.001755
2,HGNC:16944,RBM17,0.00234
3,HGNC:9828,RAE1,0.00234
4,HGNC:33279,ATXN1L,0.00234
5,HGNC:23156,U2AF2,0.002924
6,HGNC:12477,UBE2E1,0.007587
7,HGNC:4376,GMPR,0.00875
8,HGNC:24501,PLEKHG4,0.01339
9,HGNC:14214,CIC,0.015125


In [29]:
execute_example("MONDO:0008263","polycystic kidney disease 1")

Return code: 200
Return Status: 200


Unnamed: 0,id,name,p
0,HGNC:17793,VTI1B,0.000251
1,HGNC:14068,HDAC5,0.000836
2,HGNC:4138,GANAB,0.001422
3,HGNC:30579,SSH1,0.001923
4,HGNC:17293,PRKD2,0.006021


In [30]:
execute_example("MONDO:0016575","primary ciliary dyskinesia")

Return code: 200
Return Status: 200


Unnamed: 0,id,name,p
0,HGNC:32700,CCDC103,3e-06
1,HGNC:23247,DNAL1,3e-06
2,HGNC:18744,DNAI2,4e-06
3,HGNC:24245,DRC1,4e-06
4,HGNC:26560,CCDC114,4e-06
5,HGNC:28303,CCDC151,5e-06
6,HGNC:21057,RSPH9,5e-06
7,HGNC:20188,DNAAF2,5e-06
8,HGNC:26090,CCDC40,5e-06
9,HGNC:1301,CFAP298,5e-06


In [31]:
execute_example("MONDO:0018911","MODY")

Return code: 200
No Similar diseases found


In [32]:
execute_example("MONDO:0002508","gingivitis")

Return code: 200
Return Status: 200


Unnamed: 0,id,name,p
0,HGNC:11369,STATH,3.008871e-07
1,HGNC:26956,INO80,4.507926e-07
2,HGNC:2528,CTSC,7.473105e-07
3,HGNC:6397,KPNA4,1.527537e-06
4,HGNC:16203,BPIFA2,4.33238e-06
5,NCBIGene:107986777,LOC107986777,9.346858e-06
6,HGNC:10874,SIGLEC5,3.117043e-05
7,HGNC:437,ALPI,4.825189e-05
8,HGNC:2755,DEDD,4.825189e-05
9,HGNC:3309,ELANE,8.316447e-05


In [33]:
execute_example("MONDO:0004609","HSV")

Return code: 200
Return Status: 200


Unnamed: 0,id,name,p
0,HGNC:11935,CD40LG,3.955089e-07
1,HGNC:3380,EPB41L3,7.200823e-07
2,HGNC:30076,POLR3C,4.402961e-06
3,HGNC:1698,CD79A,5.089832e-06
4,HGNC:12017,TPR,7.028549e-06
5,HGNC:33742,CAVIN4,8.799539e-06
6,HGNC:10614,CCL16,2.196697e-05
7,HGNC:19288,ACMSD,5.260617e-05
8,HGNC:10903,SLAMF1,9.623512e-05
9,HGNC:3518,EXTL3,0.0003659717


In [34]:
execute_example("MONDO:0004619","Measles")

Return code: 200
Return Status: 200


Unnamed: 0,id,name,p
0,HGNC:1698,CD79A,2.856692e-09
1,HGNC:11935,CD40LG,1.006285e-08
2,HGNC:6118,IRF3,3.414835e-06
3,HGNC:11849,TLR3,8.141739e-06
4,HGNC:3011,DPT,2.8454e-05
5,HGNC:5419,IFNA13,3.710342e-05
6,HGNC:465,AMHR2,6.359778e-05
7,HGNC:24711,MFSD6,8.986948e-05
8,HGNC:13328,IFIT5,0.0001154177
9,HGNC:5417,IFNA1,0.0001192925


In [35]:
execute_example("MONDO:0002026","Candidiasis")

Return code: 200
Return Status: 200


Unnamed: 0,id,name,p
0,HGNC:14558,CLEC7A,1.753455e-16
1,HGNC:16391,CARD9,2.601346e-09
2,HGNC:13608,FBXW2,1.257788e-07
3,HGNC:18358,IL17RC,4.061354e-07
4,HGNC:16404,IL17F,2.098957e-06
5,HGNC:1343,TRAF3IP2,3.550553e-06
6,HGNC:14556,CLEC6A,4.357161e-06
7,HGNC:2649,CYP51A1,4.357161e-06
8,HGNC:11362,STAT1,6.678471e-06
9,HGNC:5985,IL17RA,1.637477e-05


In [36]:
execute_example("MONDO:0018019","Lead Poisoning")

Return code: 200
No Similar diseases found


In [37]:
execute_example("MONDO:0001441","Pica")

Return code: 200
No Similar diseases found


In [38]:
execute_example("MONDO:0006689","carcinoid syndrome ")

Return code: 200
Return Status: 200


Unnamed: 0,id,name,p
0,HGNC:819,ATP4A,0.005268
