In [3]:
import phizz
import pandas as pd

In [5]:
spg_genotypes = pd.read_csv('hpo_term_analysis/omim_codes.csv')
omim_codes = spg_genotypes.OMIM

In [21]:
hpo_terms = {
    'hpo_term': [],
    'description': []
}

for omim in omim_codes:
    prepend = 'OMIM:'
    omim = str(omim).strip('.0')
    omim = prepend + omim
    if not omim == 'OMIM:nan':
        query_dict =  phizz.query_disease([omim])
    else:
        continue
    for result in query_dict:
        hpo_terms['hpo_term'].append(result['hpo_term'])
        hpo_terms['description'].append(result['description'])
        
hpo_terms_df = pd.DataFrame(data=hpo_terms).drop_duplicates('hpo_term')
hpo_terms_df.head(10)

Unnamed: 0,hpo_term,description
0,HP:0003487,Babinski sign
1,HP:0003743,Genetic anticipation
2,HP:0001347,Hyperreflexia
3,HP:0003419,Low back pain
4,HP:0002166,Impaired vibration sensation in the lower limbs
5,HP:0000639,Nystagmus
6,HP:0003587,Insidious onset
7,HP:0010550,Paraplegia
8,HP:0000726,Dementia
9,HP:0003676,Progressive


In [27]:
with open('gene_lists/panelapp_gene_list.txt', 'r') as f:
    gene_list = f.readlines()
    
gene_list = [x.strip('\n') for x in gene_list]
gene_list[:10]

['SPART',
 'WASHC5',
 'ADAR',
 'AFG3L2',
 'AIMP1',
 'ALDH18A1',
 'ALS2',
 'AP4B1',
 'AP4E1',
 'AP4M1']

In [42]:
gene_phenotype_matrix = pd.DataFrame(columns=[hpo_term for hpo_term in hpo_terms_df.hpo_term], index=gene_list)
gene_phenotype_matrix

Unnamed: 0,HP:0003487,HP:0003743,HP:0001347,HP:0003419,HP:0002166,HP:0000639,HP:0003587,HP:0010550,HP:0000726,HP:0003676,...,HP:0000763,HP:0007083,HP:0000519,HP:0002500,HP:0003552,HP:0001760,HP:0003701,HP:0001123,HP:0002411,HP:0002486
SPART,,,,,,,,,,,...,,,,,,,,,,
WASHC5,,,,,,,,,,,...,,,,,,,,,,
ADAR,,,,,,,,,,,...,,,,,,,,,,
AFG3L2,,,,,,,,,,,...,,,,,,,,,,
AIMP1,,,,,,,,,,,...,,,,,,,,,,
ALDH18A1,,,,,,,,,,,...,,,,,,,,,,
ALS2,,,,,,,,,,,...,,,,,,,,,,
AP4B1,,,,,,,,,,,...,,,,,,,,,,
AP4E1,,,,,,,,,,,...,,,,,,,,,,
AP4M1,,,,,,,,,,,...,,,,,,,,,,


In [44]:
from intermine.webservice import Service
service = Service("http://www.humanmine.org/humanmine/service")
template = service.get_template('Gene_Disease_HPO')

In [None]:
present_hpo_terms = {}

for gene in gene_list:
    rows = template.rows(
        A = {"op": "LOOKUP", "value": gene, "extra_value": ""}
    )
    present_hpo_terms_for_gene = []
    for row in rows:
        row_list = list(row.itervalues())
        present_hpo_terms_for_gene.append(row_list[-2])
        
    present_hpo_terms[gene] = present_hpo_terms_for_gene
    
"""
of type: 
{'gene_name': ['list', 'of', 'hpo_terms'],}
"""

In [90]:
for row in gene_phenotype_matrix.iterrows():
    gene_name = row[0]
    present_terms = present_hpo_terms[gene_name]
    column_names = list(gene_phenotype_matrix.columns.values)
    for column_name in column_names:
        if column_name in present_terms:
            row[1][column_name] = 1
        elif column_name not in present_terms:
            row[1][column_name] = 0
        else:
            # barf!
            raise Error

In [92]:
gene_phenotype_matrix

Unnamed: 0,HP:0003487,HP:0003743,HP:0001347,HP:0003419,HP:0002166,HP:0000639,HP:0003587,HP:0010550,HP:0000726,HP:0003676,...,HP:0000763,HP:0007083,HP:0000519,HP:0002500,HP:0003552,HP:0001760,HP:0003701,HP:0001123,HP:0002411,HP:0002486
SPART,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
WASHC5,1,0,1,0,1,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0
ADAR,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AFG3L2,1,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
AIMP1,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
ALDH18A1,1,1,1,0,0,1,0,0,0,1,...,0,0,1,0,0,0,0,0,0,0
ALS2,1,0,1,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
AP4B1,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AP4E1,1,0,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AP4M1,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
