In [17]:
from pprint import pprint
import os
import sys
import pandas as pd
from BioLink.biolink_client import BioLinkWrapper

In [18]:
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [8]:
# MVP1 workflow
input_disease = 'MONDO:0019391'  # Fanconi Anemia

blw = BioLinkWrapper()

# get fa related genes from BioLink
fa_gene_associations = blw.disease2genes(input_disease)
fa_gene_curies = fa_gene_associations['objects']

# Mod1A Human Functional Similarity analysis

In [39]:
from Modules.Mod1A_functional_sim import FunctionalSim

In [40]:
# human analysis
fsim = FunctionalSim()
fsim.load_gene_set(gene_set=fa_gene_curies)
fsim.load_associations(group='human')
fsim.gene_set
human_fsim_genes = fsim.compute_similarity()
human_fsim_genes_df = pd.DataFrame(data=human_fsim_genes)
human_fsim_genes_df

loading ontology -- this can take a while


Unnamed: 0,input_gene,input_gene_curie,sim_gene_name,sim_hit_curie,sim_score
0,SLX4,HGNC:23845,SLX1A,UniProtKB:Q9BQ83,0.760638
1,FANCA,HGNC:3582,FANCD2,UniProtKB:Q9BXW9,0.700461
2,FANCB,HGNC:3583,FAAP100,UniProtKB:Q0VG06,0.810345
3,FANCB,HGNC:3583,FAAP24,UniProtKB:Q9BTP7,0.903846
4,FANCB,HGNC:3583,FANCE,UniProtKB:Q9HB96,0.957447
5,FANCB,HGNC:3583,INIP,UniProtKB:Q9NRY2,0.862745
6,FANCD2,HGNC:3585,FANCA,UniProtKB:O15360,0.700461
7,FANCE,HGNC:3586,FAAP100,UniProtKB:Q0VG06,0.775862
8,FANCE,HGNC:3586,FANCB,UniProtKB:Q8NB91,0.957447
9,FANCE,HGNC:3586,FAAP24,UniProtKB:Q9BTP7,0.865385


# Mod1B Phenotype Similarity Analysis

In [31]:
from Modules.Mod1B_phenotype_similarity import PhenotypeSim

# get fa related phenotypes from Biolink
fa_disease_associations = blw.disease2phenotypes(disease_curie=input_disease)
fa_phenotype_curies = fa_disease_associations['objects']

In [32]:
fa_phenotype_curies

['HP:0000010',
 'HP:0000027',
 'HP:0000028',
 'HP:0000047',
 'HP:0000054',
 'HP:0000072',
 'HP:0000077',
 'HP:0000081',
 'HP:0000083',
 'HP:0000085',
 'HP:0000086',
 'HP:0000089',
 'HP:0000104',
 'HP:0000107',
 'HP:0000125',
 'HP:0000126',
 'HP:0000135',
 'HP:0000164',
 'HP:0000175',
 'HP:0000218',
 'HP:0000238',
 'HP:0000252',
 'HP:0000268',
 'HP:0000286',
 'HP:0000297',
 'HP:0000316',
 'HP:0000324',
 'HP:0000340',
 'HP:0000347',
 'HP:0000365',
 'HP:0000369',
 'HP:0000414',
 'HP:0000431',
 'HP:0000453',
 'HP:0000463',
 'HP:0000470',
 'HP:0000483',
 'HP:0000486',
 'HP:0000505',
 'HP:0000508',
 'HP:0000518',
 'HP:0000520',
 'HP:0000568',
 'HP:0000581',
 'HP:0000582',
 'HP:0000601',
 'HP:0000639',
 'HP:0000789',
 'HP:0000813',
 'HP:0000815',
 'HP:0000824',
 'HP:0000864',
 'HP:0000953',
 'HP:0000957',
 'HP:0000978',
 'HP:0001000',
 'HP:0001009',
 'HP:0001017',
 'HP:0001045',
 'HP:0001053',
 'HP:0001172',
 'HP:0001199',
 'HP:0001249',
 'HP:0001251',
 'HP:0001263',
 'HP:0001347',
 'HP:00013

In [33]:
phenosim = PhenotypeSim(phenotype_set=fa_phenotype_curies)
phenosim_disease_associations = phenosim.phenotype_simsearch(return_type='disease')

In [37]:
phenosim_genes = []
for disease in phenosim_disease_associations['data']:
    disease_genes = blw.disease2genes(disease_curie=disease['matchId'])
    disease['genes'] = disease_genes['objects']
    phenosim_genes.append(disease)
phenosim_genes_df = pd.DataFrame(data=phenosim_genes)

In [38]:
phenosim_genes_df

Unnamed: 0,genes,matchId,matchLabel,percentageScore,rank,rawScore,score,significance
0,"[HGNC:1100, HGNC:1101, HGNC:11892, HGNC:12829,...",DOID:13636,Fanconi anemia,7719,12,77.192996,7719.299615,
1,"[HGNC:1100, HGNC:3582]",OMIM:227650,"Fanconi Anemia, Complementation Group a",5328,16,53.281557,5328.155713,
2,[HGNC:3584],OMIM:227645,"Fanconi anemia, complementation group C",5328,16,53.281557,5328.155713,
3,"[HGNC:28623, HGNC:3585]",OMIM:227646,"Fanconi Anemia, Complementation Group D2",5328,16,53.281557,5328.155713,
4,[HGNC:3586],OMIM:600901,"Fanconi Anemia, Complementation Group E",5328,16,53.281557,5328.155713,
5,[],OMIM:606895,"Symphalangism, Distal, With Microdontia, Denta...",5094,19,50.936417,5093.64165,
6,"[HGNC:16466, HGNC:30497, HGNC:3811, HGNC:4319]",OMIM:200990,acrocallosal syndrome,5042,21,50.417038,5041.703753,
7,"[HGNC:10301, HGNC:10306, HGNC:10310, HGNC:1032...",DOID:12449,aplastic anemia,4637,25,46.370548,4637.054834,
8,[HGNC:25009],OMIM:616435,"Fanconi anemia, complementation group T",4453,34,44.531556,4453.155616,
9,[HGNC:27230],OMIM:268300,Roberts syndrome,4452,35,44.519949,4451.994932,
