# BTE -- Question #2 -- Use Case Workflow

In [1]:
import pandas as pd
from biothings_explorer.user_query_dispatcher import FindConnection
from biothings_explorer.hint import Hint
ht = Hint()

## 1. Get Disease Symptoms and Symptom Information 

### 1.1 Get Disease

In [2]:
disease_name = 'xeroderma pigmentosum'
disease = ht.query('MONDO:0019600')['Disease'][0]
print(disease)

{'MONDO': 'MONDO:0019600', 'DOID': 'DOID:0050427', 'UMLS': 'C0043346', 'name': 'xeroderma pigmentosum', 'MESH': 'D014983', 'ORPHANET': '910', 'primary': {'identifier': 'MONDO', 'cls': 'Disease', 'value': 'MONDO:0019600'}, 'display': 'MONDO(MONDO:0019600) DOID(DOID:0050427) ORPHANET(910) UMLS(C0043346) MESH(D014983) name(xeroderma pigmentosum)', 'type': 'Disease'}


### 1.2 Get 'PhenotypicFeatures' Related to Disease

In [3]:
fc = FindConnection(input_obj=disease, output_obj='PhenotypicFeature', intermediate_nodes=None)
fc.connect(verbose=False)
disease_to_phenotypicFeature = fc.display_table_view()

Note: all equivalent names for the disease input are as follows: 

In [4]:
for name in fc.fc.display_node_info(disease_name)['equivalent_ids']['name']: print(name)

ANGIOMA PIGMENTOSUM ATROPHICUM
ATROPHODERMA PIGMENTOSUM
KAPOSI DERMATOSIS
KAPOSI DISEASE
MELANOSIS LENTICULARIS PROGRESSIVA
PIGMENTED EPITHELIOMATOSIS
XERODERMA OF KAPOSI
XERODERMA PIGMENTOSA
XERODERMA PIGMENTOSUM
XERODERMA PIGMENTOSUM SYNDROME
XP


In [5]:
symptom_dict = {}
for index, row in disease_to_phenotypicFeature.iterrows(): 
    output_name = row["output_name"]
    freq = [_item['info']['frequency'] for _item in fc.fc.G[disease_name][output_name].values() if "frequency" in _item["info"]][0][0]
    freq_value = ht.query(freq)['PhenotypicFeature'][0]['name']
    symptom_dict[fc.fc.display_node_info(output_name)['equivalent_ids']['HP'][0]] = {
        "names": fc.fc.display_node_info(output_name)['equivalent_ids']['name'],
        "frequency": freq_value,
    }
# print(symptom_dict)

In [6]:
all_node_types = ['Gene', 'SequenceVariant', 'ChemicalSubstance', 'Disease', 
                'MolecularActivity', 'BiologicalProcess', 'CellularComponent', 
                'Pathway', 'AnatomicalEntity', 'PhenotypicFeature']

for key in symptom_dict:
    print(str(key) + ' is running')
    edges_out_count = 0
    UMLS = ''
    # a sign or symptom may show up as a phenotypic feature, a disease, or biological process
    for y in ['PhenotypicFeature','Disease','BiologicalProcess']:
        if y == 'PhenotypicFeature':
            a = ht.query(key)[y]
            if len(a) > 0: 
                b = a[0]
                if 'UMLS' in b: 
                    UMLS = b['UMLS']
                for node_intermediate in all_node_types:
                    try: 
                        fc = FindConnection(input_obj=b, output_obj=node_intermediate, intermediate_nodes=None)
                        fc.connect(verbose=False)
                        df = fc.display_table_view()
                        if(df.shape[0] > 0):
                            df = df[df["output_name"] != disease_name]
                            edges_out_count = edges_out_count + df.shape[0]
                    except: 
                         print(str(y) + " input to " + node_intermediate + " output for " + str(key) + ' failed')
        if(y =='Disease') | (y == 'BiologicalProcess'):
            for z in symptom_dict[key]["names"]:
                if((y == 'Disease') & (len(UMLS) > 0)): 
                    try: 
                        a = ht.query(UMLS)[y]
                    except: 
                        a = []
                else:
                    try: 
                        a = ht.query(z)[y]
                    except: 
                        a = []
                for b in a: 
                    if b['name'].lower() == z.lower():
                        for node_intermediate in all_node_types:
                            try: 
                                fc = FindConnection(input_obj=b, output_obj=node_intermediate, intermediate_nodes=None)
                                fc.connect(verbose=False)
                                df = fc.display_table_view()
                                if(df.shape[0] > 0):
                                    df = df[df["output_name"] != disease_name]
                                    edges_out_count = edges_out_count + df.shape[0]

                            except: 
                                print(str(y) + " input to " + node_intermediate + " output for " + str(key) + ' failed')
    symptom_dict[key]["edges_out_count"] = edges_out_count

HP:0012733 is running
API 3.3 semmed_phenotype failed
API 3.6 semmed_phenotype failed
API 3.1 semmed_phenotype failed
API 3.7 semmed_phenotype failed
API 3.10 semmed_phenotype failed
API 3.2 semmed_phenotype failed
API 3.4 semmed_phenotype failed
API 3.8 semmed_phenotype failed
API 3.11 semmed_phenotype failed
API 3.9 semmed_phenotype failed
API 3.12 semmed_phenotype failed
API 3.5 semmed_phenotype failed
API 3.13 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.9 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.11 semmed_phenotype failed
API 1.7 semmed_phenotype failed
API 1.10 semmed_phenotype failed
API 1.8 semmed_phenotype failed
API 1.13 semmed_phenotype failed
API 1.12 semmed_phenotype failed
API 2.5 semmed_phenotype failed
API 2.1 semmed_phenotype failed
API 2.6 semmed_phenotype failed
API 2.2 semmed_phenotype failed
API 2.8 se

API 1.3 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.2 semmed_phenotype failed
HP:0009830 is running
API 3.8 semmed_phenotype failed
API 3.3 semmed_phenotype failed
API 3.2 semmed_phenotype failed
API 3.1 semmed_phenotype failed
API 3.6 semmed_phenotype failed
API 3.9 semmed_phenotype failed
API 3.4 semmed_phenotype failed
API 3.5 semmed_phenotype failed
API 3.7 semmed_phenotype failed
API 3.12 semmed_phenotype failed
API 3.11 semmed_phenotype failed
API 3.13 semmed_phenotype failed
API 3.10 semmed_phenotype failed
API 1.7 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.8 semmed_phenotype failed
API 1.11 semmed_phenotype failed
API 1.2 semme

API 1.1 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.8 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.7 semmed_phenotype failed
API 1.9 semmed_phenotype failed
API 1.10 semmed_phenotype failed
API 1.11 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.1 semmed_phenotype failed
HP:0002750 is running
API 3.2 semmed_phenotype failed
API 3.1 semmed_phenotype failed
API 3.6 semmed_phenotype failed
API 3.3 semmed_phenotype failed
API 3.4 semmed_phenotype failed
API 3.8 semmed_phenotype failed
API 3.7 semmed_phenotype failed
API 3.10 semmed_

API 2.4 semmed_phenotype failed
API 2.6 semmed_phenotype failed
API 2.7 semmed_phenotype failed
API 2.2 semmed_phenotype failed
API 2.5 semmed_phenotype failed
API 2.1 semmed_phenotype failed
API 2.3 semmed_phenotype failed
API 2.9 semmed_phenotype failed
API 2.8 semmed_phenotype failed
API 2.12 semmed_phenotype failed
API 2.14 semmed_phenotype failed
API 2.11 semmed_phenotype failed
API 2.10 semmed_phenotype failed
API 2.16 semmed_phenotype failed
API 2.18 semmed_phenotype failed
API 2.13 semmed_phenotype failed
API 2.17 semmed_phenotype failed
API 2.15 semmed_phenotype failed
API 2.19 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.7 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.9 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.8 semmed_phenotype failed
API 1.10 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.11 semmed_phenotype failed
API 1.4 semmed_phenotype fai

API 1.2 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.8 semmed_phenotype failed
API 1.7 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.13 semmed_phenotype failed
API 1.11 semmed_phenotype failed
API 1.9 semmed_phenotype failed
API 1.10 semmed_phenotype failed
API 1.12 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 2.2 semmed_phenotype failed
API 2.5 semmed_phenotype failed
API 2.1 semmed_phenotype failed
API 2.3 semmed_phenotype failed
API 2.6 semmed_phenotype failed
API 2.8 semmed_phenotype failed
API 2.4 semmed_phenotype failed
API 2.13 semmed_phenotype failed
API 2.7 semmed_phenotype failed
API 2.11 semmed_phenotype failed
API 2.10 semmed_phenotype failed
API 2.12 semmed_phenotype failed
API 2.15 semmed_phenotype failed
API 2.14 semmed_phenotype failed
API 2.17 semmed_phenotype failed
API 2.19 semmed_phenotype failed
API 2.9 semmed_phenotype failed
API 2.16 semmed_phenotype fa

API 1.2 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.1 semmed_phenotype failed
HP:0002071 is running
API 3.1 semmed_phenotype failed
API 3.3 semmed_phenotype failed
API 3.5 semmed_phenotype failed
API 3.2 semmed_phenotype failed
API 3.13 semmed_phenotype failed
API 3.8 semmed_phenotype failed
API 3.11 semmed_phenotype failed
API 3.9 semmed_phenotype failed
API 3.4 semmed_phenotype failed
API 3.7 semmed_phenotype failed
API 3.6 semmed_phenotype failed
API 3.10 semmed_phenotype failed
API 3.12 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.12 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.11 semmed_phenotype failed
API 1.13 semmed_phenotype failed
API 1.7 semmed_phenotype failed
API 1.9 semmed_phenotype failed
API 1.8 sem

API 1.2 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.8 semmed_phenotype failed
API 1.7 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.11 semmed_phenotype failed
API 1.10 semmed_phenotype failed
API 1.9 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 5.1 pharos failed
Disease input to Disease output for HP:0000491 failed
HP:0000486 is running
API 3.2 semmed_phenotype failed
API 3.1 semmed_phenotype failed
API 3.3 semmed_phenotype failed
API 3.7 semmed_phenotype failed
API 3.4 semmed_phenotype failed
API 

API 2.2 semmed_phenotype failed
API 2.7 semmed_phenotype failed
API 2.10 semmed_phenotype failed
API 2.1 semmed_phenotype failed
API 2.4 semmed_phenotype failed
API 2.5 semmed_phenotype failed
API 2.6 semmed_phenotype failed
API 2.8 semmed_phenotype failed
API 2.11 semmed_phenotype failed
API 2.3 semmed_phenotype failed
API 2.13 semmed_phenotype failed
API 2.9 semmed_phenotype failed
API 2.14 semmed_phenotype failed
API 2.12 semmed_phenotype failed
API 2.15 semmed_phenotype failed
API 2.16 semmed_phenotype failed
API 2.17 semmed_phenotype failed
API 2.18 semmed_phenotype failed
API 2.19 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.8 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.9 semmed_phenotype failed
API 1.10 semmed_phenotype failed
API 1.7 semmed_phenotype failed
API 1.11 semmed_phenotype failed
API 1.4 semmed_phenotype fai

API 1.4 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.9 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.11 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.10 semmed_phenotype failed
API 1.8 semmed_phenotype failed
API 1.7 semmed_phenotype failed
API 1.13 semmed_phenotype failed
API 1.12 semmed_phenotype failed
API 2.1 semmed_phenotype failed
API 2.2 semmed_phenotype failed
API 2.4 semmed_phenotype failed
API 2.5 semmed_phenotype failed
API 2.8 semmed_phenotype failed
API 2.3 semmed_phenotype failed
API 2.14 semmed_phenotype failed
API 2.13 semmed_phenotype failed
API 2.10 semmed_phenotype failed
API 2.11 semmed_phenotype failed
API 2.12 semmed_phenotype failed
API 2.9 semmed_phenotype failed
API 2.19 semmed_phenotype failed
API 2.6 semmed_phenotype failed
API 2.17 semmed_phenotype failed
API 2.7 semmed_phenotype failed
API 2.16 semmed_phenotype failed
API 2.15 semmed_phenotype fa

API 1.1 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.1 semmed_phenotype failed
HP:0008734 is running
API 3.1 semmed_phenotype failed
API 3.4 semmed_phenotype failed
API 3.5 semmed_phenotype failed
API 3.3 semmed_phenotype failed
API 3.8 semmed_phenotype failed
API 3.2 semmed_phenotype failed
API 3.7 semmed_phenotype failed
API 3.12 semmed_phenotype failed
API 3.11 semmed_phenotype failed
API 3.6 semmed_phenotype failed
API 3.10 semmed_phenotype failed
API 3.13 semmed_phenotype failed
API 3.9 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.10 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.7 semmed_phenotype failed
API 1.8 semmed_phenotype failed
API 1.9 semmed_phenotype failed
API 1.11 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.13 sem

API 1.2 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.7 semmed_phenotype failed
API 1.8 semmed_phenotype failed
API 1.11 semmed_phenotype failed
API 1.10 semmed_phenotype failed
API 1.9 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 5.1 pharos failed
Disease input to Disease output for HP:0001596 failed
HP:0001508 is running
API 3.2 semmed_phenotype failed
API 3.3 semmed_phenotype failed
API 3.1 semmed_phenotype failed
API 3.7 semmed_phenotype failed
API 3.13 semmed_phenotype failed
API

API 2.3 semmed_phenotype failed
API 2.6 semmed_phenotype failed
API 2.1 semmed_phenotype failed
API 2.2 semmed_phenotype failed
API 2.8 semmed_phenotype failed
API 2.10 semmed_phenotype failed
API 2.5 semmed_phenotype failed
API 2.12 semmed_phenotype failed
API 2.7 semmed_phenotype failed
API 2.16 semmed_phenotype failed
API 2.4 semmed_phenotype failed
API 2.13 semmed_phenotype failed
API 2.14 semmed_phenotype failed
API 2.9 semmed_phenotype failed
API 2.11 semmed_phenotype failed
API 2.17 semmed_phenotype failed
API 2.18 semmed_phenotype failed
API 2.19 semmed_phenotype failed
API 2.15 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.10 semmed_phenotype failed
API 1.7 semmed_phenotype failed
API 1.8 semmed_phenotype failed
API 1.11 semmed_phenotype failed
API 1.9 semmed_phenotype failed
API 1.4 semmed_phenotype fai

API 1.1 semmed_phenotype failed
API 1.8 semmed_phenotype failed
API 1.7 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.12 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.10 semmed_phenotype failed
API 1.13 semmed_phenotype failed
API 1.11 semmed_phenotype failed
API 1.9 semmed_phenotype failed
API 2.3 semmed_phenotype failed
API 2.10 semmed_phenotype failed
API 2.6 semmed_phenotype failed
API 2.5 semmed_phenotype failed
API 2.2 semmed_phenotype failed
API 2.1 semmed_phenotype failed
API 2.8 semmed_phenotype failed
API 2.4 semmed_phenotype failed
API 2.9 semmed_phenotype failed
API 2.14 semmed_phenotype failed
API 2.13 semmed_phenotype failed
API 2.7 semmed_phenotype failed
API 2.11 semmed_phenotype failed
API 2.18 semmed_phenotype failed
API 2.12 semmed_phenotype failed
API 2.15 semmed_phenotype failed
API 2.17 semmed_phenotype failed
API 2.19 semmed_phenotype fa

API 1.4 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 5.1 pharos failed
HP:0001053 is running
API 3.1 semmed_phenotype failed
API 3.3 semmed_phenotype failed
API 3.4 semmed_phenotype failed
API 3.2 semmed_phenotype failed
API 3.7 semmed_phenotype failed
API 3.6 semmed_phenotype failed
API 3.9 semmed_phenotype failed
API 3.5 semmed_phenotype failed
API 3.10 semmed_phenotype failed
API 3.13 semmed_phenotype failed
API 3.8 semmed_phenotype failed
API 3.11 semmed_phenotype failed
API 3.12 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.7 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.8 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.10 semmed_phenotype failed
API 1.13 semmed_phenotype failed
API 1.12 semmed_phenot

API 1.1 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.7 semmed_phenotype failed
API 1.8 semmed_phenotype failed
API 1.9 semmed_phenotype failed
API 1.10 semmed_phenotype failed
API 1.11 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.2 semmed_phenotype failed
HP:0000518 is running
API 3.1 semmed_phenotype failed
API 3.2 semmed_phenotype failed
API 3.12 semmed_phenotype failed
API 3.5 semmed_phenotype failed
API 3.3 semmed_phenotype failed
API 3.11 semmed_phenotype failed
API 3.6 semmed_phenotype failed
API 3.13 semme

API 2.3 semmed_phenotype failed
API 2.1 semmed_phenotype failed
API 2.2 semmed_phenotype failed
API 2.7 semmed_phenotype failed
API 2.12 semmed_phenotype failed
API 2.10 semmed_phenotype failed
API 2.6 semmed_phenotype failed
API 2.8 semmed_phenotype failed
API 2.17 semmed_phenotype failed
API 2.5 semmed_phenotype failed
API 2.4 semmed_phenotype failed
API 2.19 semmed_phenotype failed
API 2.13 semmed_phenotype failed
API 2.18 semmed_phenotype failed
API 2.11 semmed_phenotype failed
API 2.9 semmed_phenotype failed
API 2.14 semmed_phenotype failed
API 2.16 semmed_phenotype failed
API 2.15 semmed_phenotype failed
API 1.1 semmed_phenotype failed
API 1.2 semmed_phenotype failed
API 1.6 semmed_phenotype failed
API 1.4 semmed_phenotype failed
API 1.5 semmed_phenotype failed
API 1.3 semmed_phenotype failed
API 1.8 semmed_phenotype failed
API 1.7 semmed_phenotype failed
API 1.9 semmed_phenotype failed
API 1.10 semmed_phenotype failed
API 1.11 semmed_phenotype failed
API 1.3 semmed_phenotype fai

In [7]:
disease_symptom_df = pd.DataFrame.from_dict(symptom_dict, orient='index').sort_values(by=['edges_out_count'])
disease_symptom_df = pd.concat([disease_symptom_df[disease_symptom_df["frequency"] == "Very frequent"],
               disease_symptom_df[disease_symptom_df["frequency"] == "Frequent"],
               disease_symptom_df[disease_symptom_df["frequency"] == "Occasional"],
               disease_symptom_df[disease_symptom_df["frequency"] == "Rare"],
               disease_symptom_df[disease_symptom_df["frequency"] == "Unknown"]
              ])
disease_symptom_df

Unnamed: 0,names,frequency,edges_out_count
HP:0000524,"[CONJUNCTIVAL TELANGIECTASES, CONJUNCTIVAL TEL...",Very frequent,52
HP:0001029,[POIKILODERMA],Very frequent,59
HP:0001480,[FRECKLING],Very frequent,78
HP:0100585,"[TELANGIECTASIA OF THE SKIN, TELEANGIECTASIA O...",Very frequent,103
HP:0001009,"[CUTANEOUS TELANGIECTASIA, SPIDER VEINS, TELAN...",Very frequent,136
HP:0001072,"[DIFFUSELY THICKENED SKIN, PACHYDERMIA, THICK ...",Very frequent,166
HP:0000992,"[CUTANEOUS PHOTOSENSITIVITY, PHOTOSENSITIVE SK...",Very frequent,221
HP:0006887,"[INTELLECTUAL DISABILITY, PROGRESSIVE, MENTAL ...",Very frequent,235
HP:0000963,[THIN SKIN],Very frequent,420
HP:0000958,"[DRY SKIN, XEROSIS]",Very frequent,531


## 2. Get Genes Directly Related to Disease

In [51]:
def get_disease_to_gene_results(disease_input):

    # keep track of number of occurrences from direct disease -> gene connection
    print("running disease -> gene")
    disease_to_gene_results = {}
    #directly related
    fc = FindConnection(input_obj=disease_input, output_obj='Gene', intermediate_nodes=None)
    fc.connect(verbose=False)
    disease_to_genes = fc.display_table_view()
    print(disease_to_genes.shape)

    disease_to_genes = disease_to_genes[~disease_to_genes['output_id'].str.contains('UMLS')]


    i = list(disease_to_genes["output_name"])
    d = {x:i.count(x) for x in i}
    sorted_disease_to_genes = {k: v for k, v in sorted(d.items(), key=lambda item: item[1])}
    disease_to_gene_results["sorted_disease_to_genes"] = sorted_disease_to_genes
    # print("occurences of genes directly related to genes")
    # print(disease_to_gene_results["sorted_disease_to_genes"])

    one_step_genes_pub_counts = {}
    for index, row in disease_to_genes.iterrows():
        current_pubcount = 0
        if(row["pred1_pubmed"] != None):
            current_pubcount = current_pubcount + row["pred1_pubmed"].count(",") + 1
        if row["output_name"] in one_step_genes_pub_counts:
            one_step_genes_pub_counts[row["output_name"]] = one_step_genes_pub_counts[row["output_name"]] + current_pubcount
        else: 
            one_step_genes_pub_counts[row["output_name"]] = current_pubcount
    disease_to_gene_results["one_step_genes_pub_counts"] =  one_step_genes_pub_counts

    disease_to_genes_list = list(reversed(list(sorted_disease_to_genes.keys())))
    disease_to_gene_results["disease_to_genes_list"] = disease_to_genes_list

    return(disease_to_gene_results)

In [52]:
disease_to_gene_results = get_disease_to_gene_results(disease)
disease_to_gene_results

running disease -> gene
(2045, 9)


{'sorted_disease_to_genes': {'MGI:6258233': 1,
  'MGI:6258236': 1,
  'MGI:1195972': 1,
  '18S_rRNA': 1,
  '28S_rRNA': 1,
  '45S_rRNA': 1,
  '7SK': 1,
  'CASC15': 1,
  'CYP26B1': 1,
  'CFTR': 1,
  'USP28': 1,
  'CRY1': 1,
  'CD4': 1,
  'UBR7': 1,
  'CDH17': 1,
  'GABARAPL2': 1,
  'RFC2': 1,
  'KIAA2022': 1,
  'GNA11': 1,
  'AQR': 1,
  'ST6GALNAC1': 1,
  'NOX3': 1,
  'TNFRSF1A': 1,
  'PIGB': 1,
  'TRIP13': 1,
  'KRT14': 1,
  'FGF4': 1,
  'KEAP1': 1,
  'TNS1': 1,
  'RRN3': 1,
  'PPP1R15A': 1,
  'PPP1R13B': 1,
  'SIRT4': 1,
  'OTUB2': 1,
  'OSGEP': 1,
  'ADH1A': 1,
  'CDC6': 1,
  'CBX5': 1,
  'VCL': 1,
  'SIRT1': 1,
  'CDC34': 1,
  'GADD45B': 1,
  'MAPK1': 1,
  'DMC1': 1,
  'HSCB': 1,
  'XBP1': 1,
  'HMOX1': 1,
  'MCM5': 1,
  'TXN2': 1,
  'RBX1': 1,
  'ACO2': 1,
  'BRD1': 1,
  'SUPT16H': 1,
  'POLE2': 1,
  'ALKBH1': 1,
  'MTHFD1': 1,
  'APEX1': 1,
  'EFS': 1,
  'NFKBIA': 1,
  'CYP24A1': 1,
  'AURKA': 1,
  'EEF1A2': 1,
  'CSNK2A1': 1,
  'FERMT1': 1,
  'TBL1X': 1,
  'USP11': 1,
  'TIMP1': 1,

## 3. Get Top Genes Related to Disease through 1 Intermediate Node

In [10]:
# get results using intermediates 
node_type_list = (['Gene', 'SequenceVariant', 'ChemicalSubstance', 'Disease', 
                'MolecularActivity', 'BiologicalProcess', 'CellularComponent', 
                'Pathway', 'AnatomicalEntity', 'PhenotypicFeature'])

In [11]:
def predict_many(input_object_list, output_type_list, intermediate_node_list):
    df_list = []
    for input_object in input_object_list: 
        for output_type in output_type_list: 
            for inter in intermediate_node_list:
                try: 
                    print("Intermediate Node type running:")
                    print(inter)
                    fc = FindConnection(input_obj=input_object, output_obj=output_type, intermediate_nodes=[inter])
                    fc.connect(verbose=False)
                    df = fc.display_table_view()
                    rows = df.shape[0]
                    if(rows > 0):
                        df_list.append(df)
                except:
                    print("FAILED")
    if(len(df_list) > 0):
        return pd.concat(df_list)
    else:
        return None

In [12]:
disease_to_all_nodes_to_genes = predict_many([disease],['Gene'],node_type_list)
# Save an object to a file
%store disease_to_all_nodes_to_genes

Intermediate Node type running:
Gene
API 3.3 semmed_gene failed
API 3.2 semmed_gene failed
API 3.6 semmed_gene failed
API 3.5 semmed_gene failed
API 4.1 cord_gene failed
API 3.4 semmed_gene failed
API 3.1 semmed_gene failed
API 3.8 semmed_gene failed
API 3.7 semmed_gene failed
API 1.1 mygene failed
API 3.9 semmed_gene failed
API 3.10 semmed_gene failed
Intermediate Node type running:
SequenceVariant
0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human')
Unable to fetch results from mydisease.info
Intermediate Node type running:
ChemicalSubstance
0, message='Attempt to decode JSON with unexpected mimetype

In [13]:
%store -r disease_to_all_nodes_to_genes

In [14]:
def get_disease_to_nodes_to_gene_results(disease_all_nodes_genes,max_two_step_gene_count,symptom_list,symptoms_hpids):
    disease_to_node_to_gene_results = {}
    print("finding intermediate nodes that are symptoms")
    indices_with_symptom_as_intermediate = []
    go_dict = {}
    ## remove all rows with symptoms as intermediates -- first get indices, then remove
    for index, row in enumerate(disease_all_nodes_genes.iterrows()):
        row = disease_all_nodes_genes.iloc[index]
        if row["node1_type"] == 'Disease':
            if row["node1_name"].upper() in symptom_list:
                indices_with_symptom_as_intermediate.append(index)
        elif row["node1_type"] == 'BiologicalProcess':
            if row["node1_name"].upper() in symptom_list:
                indices_with_symptom_as_intermediate.append(index)
        elif row["node1_type"] == 'PhenotypicFeature':
            if((row["node1_name"].upper() in symptom_list) or (row["node1_name"] in symptoms_hpids)):
                indices_with_symptom_as_intermediate.append(index)

    # print("indices")
    # print(indices_with_symptom_as_intermediate)
    print("removing symptom intermediates")
    disease_all_nodes_genes = disease_all_nodes_genes.drop(disease_all_nodes_genes.index[indices_with_symptom_as_intermediate])

    print("getting gene counts from " + str(len(list(disease_all_nodes_genes["output_name"]))) + " gene entries" )
    i = list(disease_all_nodes_genes["output_name"])
    # d = {x:i.count(x) for x in i}
    d = {}
    for x in i: 
        if x in d: 
            d[x] = d[x] + 1
        else:
            d[x] = 1
    print("sorting counts dictionary")
    sorted_disease_to_all_nodes_to_genes = {k: v for k, v in sorted(d.items(), key=lambda item: item[1])}

    print("top genes occurrence counts: ")
    for x in list(reversed(list(sorted_disease_to_all_nodes_to_genes)))[0:max_two_step_gene_count]:
        print(str(x) + ": " + str(sorted_disease_to_all_nodes_to_genes[x]))
    
    top_related_genes_to_disease = list(reversed(list(sorted_disease_to_all_nodes_to_genes)))[0:max_two_step_gene_count]

    disease_to_node_to_gene_results["top_related_genes_to_disease"] = top_related_genes_to_disease
    disease_to_node_to_gene_results["sorted_disease_to_all_nodes_to_genes"] = sorted_disease_to_all_nodes_to_genes

    # keep track of pubication counts for genes in two-step disease -> intermediate node -> gene
    print("getting publicaiton counts")
    top_two_step_genes_pub_counts = {}
    for index, row in disease_all_nodes_genes.iterrows():
        if row["output_name"] in top_related_genes_to_disease:
            current_pubcount = 0
            if(row["pred1_pubmed"] != None):
                current_pubcount = current_pubcount + str(row["pred1_pubmed"]).count(",") + 1
            if(row["pred2_pubmed"] != None):
                current_pubcount = current_pubcount + str(row["pred2_pubmed"]).count(",") + 1
            if row["output_name"] in top_two_step_genes_pub_counts:
                top_two_step_genes_pub_counts[row["output_name"]] = top_two_step_genes_pub_counts[row["output_name"]] + current_pubcount
            else: 
                top_two_step_genes_pub_counts[row["output_name"]] = current_pubcount

    disease_to_node_to_gene_results["top_two_step_genes_pub_counts"] =  top_two_step_genes_pub_counts
    
    return(disease_to_node_to_gene_results)

In [15]:
disease_symptom_hpids = list(symptom_dict.keys())
disease_symptoms = []
for entry in list(symptom_dict.values()): disease_symptoms = disease_symptoms + entry['names']
# print(disease_symptoms)

In [16]:
max_2_step_genes = 200
disease_symptoms
disease_symptom_hpids
disease_to_all_nodes_to_genes_results = get_disease_to_nodes_to_gene_results(disease_to_all_nodes_to_genes, max_2_step_genes, disease_symptoms, disease_symptom_hpids)
%store disease_to_all_nodes_to_genes_results

finding intermediate nodes that are symptoms
removing symptom intermediates
getting gene counts from 186109 gene entries
sorting counts dictionary
top genes occurrence counts: 
TP53: 471
MYC: 414
BRCA1: 397
PLEKHA4: 360
VIRMA: 339
NTRK1: 338
UBC: 338
H4C1: 329
EP300: 326
TRIM25: 285
PCNA: 275
ESR2: 274
RPA1: 273
RECQL4: 268
EGFR: 262
RPA2: 259
APP: 252
TRIM28: 247
POLR2A: 240
CUL3: 234
RPA3: 233
RNF4: 231
CDK2: 227
RBX1: 226
HSP90AA1: 218
COPS5: 217
FANCD2: 216
CTNNB1: 210
ESR1: 209
UBB: 205
ELAVL1: 203
BIRC3: 202
CUL1: 201
RFC4: 198
UBE2I: 198
UBA52: 197
NR2C2: 196
EFTUD2: 195
CUL4B: 195
RPS27A: 195
POLR2E: 195
KRAS: 192
ATM: 190
XRCC6: 190
POLD1: 190
XPO1: 189
CREBBP: 188
CUL7: 183
HSPA8: 183
CDK9: 181
SNW1: 181
EGLN3: 181
PHB: 181
CHEK1: 180
CUL4A: 180
H2AX: 179
DDB1: 179
POLR2B: 178
PRKDC: 178
AKT1: 177
HDAC1: 175
NXF1: 175
RFC3: 175
POLR2C: 173
TP53BP1: 173
VCP: 173
EZH2: 172
RFC2: 172
APEX1: 170
RFC5: 169
POLR2F: 165
POLR2H: 164
H3C1: 164
OBSL1: 163
WWP2: 163
POLR2L: 163
MDM2: 16

In [None]:
%store -r disease_to_all_nodes_to_genes_results

## 4. Get Disease Symptoms related to Genes
Genes -> Symptoms, then filter based on disease symptoms

In [17]:
disease_top_genes_list = disease_to_gene_results["disease_to_genes_list"] + disease_to_all_nodes_to_genes_results["top_related_genes_to_disease"]
disease_top_genes_list = list(dict.fromkeys(disease_top_genes_list))
disease_top_genes_list

['ERCC3',
 'ERCC2',
 'XPA',
 'POLH',
 'ERCC5',
 'ERCC4',
 'XPC',
 'DDB2',
 'EDNRA',
 'HFM1',
 'GTF2H4',
 'ERCC1',
 'MYC',
 'APAF1',
 'DDX11',
 'ENDOV',
 'MMS19',
 'BBC3',
 'ZC3H12D',
 'CDKN1A',
 'BCL2',
 'POLM',
 'MAPK8',
 'PCNA',
 'FXN',
 'HSPA1A',
 'GADD45A',
 'CETN2',
 'NRAS',
 'ERCC6',
 'FAS',
 'BLM',
 'SFN',
 'PTCH1',
 'PMAIP1',
 'DCTPP1',
 'WDR48',
 'FOS',
 'HSPA9',
 'BTG2',
 'IFNA1',
 'TNFRSF10B',
 'EGFR',
 'TP53',
 'GTF2H1',
 'ERCC8',
 'POLQ',
 'CCNH',
 'TERF2',
 'RPA1',
 'NR1H2',
 'RRM2B',
 'AAGAB',
 'WDR77',
 'PSMC6',
 'LRRC59',
 'CSH1',
 'CPD',
 'ERVK-11',
 'ERVK-19',
 'ERVK-9',
 'FASTK',
 'REXO2',
 'PERP',
 'FASN',
 'TRIAP1',
 'SUSD6',
 'AEN',
 'BAX',
 'hsa_circ_000559',
 'MIR890',
 'MIR744',
 'MIR6771',
 'MIR6732',
 'MIR373',
 'MIR346',
 'MIR21',
 'U2',
 'U12',
 'U11',
 'TERC',
 'TARID',
 'SCAANT1',
 'RNR3',
 'RN5S1@',
 'Pinc',
 'MEG8',
 'LINC01197',
 'LINC00302',
 'HCG14',
 'FAM230A',
 'FAM157B',
 'MUC13',
 'HIST1H3B',
 'MUC1',
 'HIST1H4E',
 'HIST1H3C',
 'HIST1H3G',
 'LIG

In [18]:
def determined_genes_to_symptoms(gene_list, symptom_list, symptom_dict):

    # gene -> phenotypic feature nodes
    print("Genes -> PhenotypicFeatures")
    df_list = []
    for x in gene_list: 
        try: 
            gene = ht.query(x)["Gene"][0]
            fc = FindConnection(input_obj=gene, output_obj='PhenotypicFeature', intermediate_nodes=None)
            fc.connect(verbose=False)
            df = fc.display_table_view()
            rows = df.shape[0]
            if(rows > 0):
                df_list.append(df)
        except:
            print(str(x) + " FAILED")
    if(len(df_list) > 0):
        top_gene_to_phenotypicFeature = pd.concat(df_list)

    # top_gene_to_phenotypicFeature = top_gene_to_phenotypicFeature
    # top_gene_to_phenotypicFeature
    for index in range(top_gene_to_phenotypicFeature.shape[0]):
        if(top_gene_to_phenotypicFeature.iloc[index]["output_name"] in symptom_dict):
            top_gene_to_phenotypicFeature.iloc[index]["output_name"] = symptom_dict[top_gene_to_phenotypicFeature.iloc[index]["output_name"]]['names'][0]

    # gene -> bioprocess
    print("Genes -> Bioprocesses")
    df_list = []
    for x in gene_list: 
        try: 
            gene = ht.query(x)["Gene"][0]
            fc = FindConnection(input_obj=gene, output_obj='BiologicalProcess', intermediate_nodes=None)
            fc.connect(verbose=False)
            df = fc.display_table_view()
            rows = df.shape[0]
            if(rows > 0):
                df_list.append(df)
        except:
            print(str(x) + " FAILED")
    if(len(df_list) > 0):
        top_gene_to_bioprocesses = pd.concat(df_list)

    # bioprocess_top = top_gene_to_bioprocesses
    # Genes -> disease type "symptoms"
    print("Genes -> Diseases")
    df_list = []
    for x in gene_list: 
        try: 
            gene = ht.query(x)["Gene"][0]
            fc = FindConnection(input_obj=gene, output_obj='Disease', intermediate_nodes=None)
            fc.connect(verbose=False)
            df = fc.display_table_view()
            rows = df.shape[0]
            if(rows > 0):
                df_list.append(df)
        except:
            print(str(x) + " FAILED")
    if(len(df_list) > 0):
        top_gene_to_diseases = pd.concat(df_list)

    all_gene_connections = pd.concat([top_gene_to_bioprocesses,top_gene_to_phenotypicFeature,top_gene_to_diseases])
    all_gene_connections["output_name"] = all_gene_connections["output_name"].str.upper()
    return(all_gene_connections)

In [36]:
def filter_genes_to_symptoms(genes_to_symptoms_df, disease_symptoms):
    indices_to_keep = []
    for index, row in enumerate(genes_to_symptoms_df.iterrows()):
        row = genes_to_symptoms_df.iloc[index]
        if(row["output_name"].upper() in disease_symptoms):
            indices_to_keep.append(index)
    genes_to_symptoms_df = genes_to_symptoms_df.iloc[indices_to_keep]
    return(genes_to_symptoms_df)

In [20]:
genes_to_symptoms_df = determined_genes_to_symptoms(disease_top_genes_list, disease_symptoms, symptom_dict)

Genes -> PhenotypicFeatures
0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.info
0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results fr

0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.info
0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.info
0, message

0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.info
0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.info
0, message

0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.info
0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.info
0, message

0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.info
0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.info
hsa_circ_0

0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.info
Pinc FAILED
0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.inf

0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.info
0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.info
0, message

0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.info
0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.info
0, message

0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.info
0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.info
0, message

XRCC6BP1 FAILED
KIAA0101 FAILED
FAM96A FAILED
WAPAL FAILED
H2AFZ FAILED
APP FAILED
MUT FAILED
ACPT FAILED
SGOL1 FAILED
UFD1L FAILED
TCEB2 FAILED
CCBL2 FAILED
MTL5 FAILED
H3F3B FAILED
NARFL FAILED
HIST1H4F FAILED
RARS FAILED
PAPD7 FAILED
SKIV2L2 FAILED
OBFC1 FAILED
KIAA2022 FAILED
45S_rRNA FAILED
28S_rRNA FAILED
18S_rRNA FAILED
MGI:1195972 FAILED
MGI:6258236 FAILED
MGI:6258233 FAILED
Genes -> Diseases
hsa_circ_000559 FAILED
RN5S1@ FAILED
Pinc FAILED
HIST1H3B FAILED
HIST1H4E FAILED
HIST1H3C FAILED
HIST1H3G FAILED
HIST1H3F FAILED
HIST1H3I FAILED
HIST1H3E FAILED
HIST1H4I FAILED
HIST1H4L FAILED
HIST1H3A FAILED
WBSCR16 FAILED
HIST1H4K FAILED
HIST1H4D FAILED
HIST1H4A FAILED
LEPREL2 FAILED
HIST2H3PS2 FAILED
HIST2H2AA3 FAILED
MUM1 FAILED
C19orf40 FAILED
CD3EAP FAILED
HIST2H4A FAILED
HIST2H4B FAILED
C16orf52 FAILED
H326 FAILED
SMEK1 FAILED
HIST4H4 FAILED
Response payload is not completed
Unable to fetch results from mydisease.info
ID resolution steps failed:
HMGA2 FAILED
H2AFX FAILED
C11orf30 FA

In [21]:
%store genes_to_symptoms_df

Stored 'genes_to_symptoms_df' (DataFrame)


In [22]:
%store -r genes_to_symptoms_df

In [37]:
relevant_genes_to_symptoms_df = filter_genes_to_symptoms(genes_to_symptoms_df,disease_symptoms)

In [71]:
def get_symptom_results(relevant_genes_to_symptoms_df):
    symptoms_results = {}
    relevant_top_genes_list = list(dict.fromkeys(list(relevant_genes_to_symptoms_df["input"])))
    for x in relevant_top_genes_list:
        symptoms_results[x] = {
            "related_symptoms" : [],
            "publication_count": 0
        }
    for index, row in relevant_genes_to_symptoms_df.iterrows():
        symptoms_results[row["input"]]["related_symptoms"].append(row["output_name"])
        if(row["pred1_pubmed"]):
            symptoms_results[row["input"]]["publication_count"] = symptoms_results[row["input"]]["publication_count"] + row["pred1_pubmed"].count(',') + 1
#     for x in list(symptoms_results.keys()):
#         if(len(symptoms_results[x]["related_symptoms"]) == 0):
#             print("MEHHH")
#             del symptoms_results[x]
    ### need to remove if related to symptoms length = 0 
    return(symptoms_results)

In [72]:
relevant_genes_to_symptoms_results = get_symptom_results(relevant_genes_to_symptoms_df )
relevant_top_genes_list = list(relevant_genes_to_symptoms_results.keys())
print(relevant_genes_to_symptoms_results)

{'ERCC3': {'related_symptoms': ['DECREASED ACTIVITY OF GONADS', 'DECREASED ACTIVITY OF GONADS', 'DECREASED ACTIVITY OF GONADS', 'ABNORMALLY SMALL CRANIUM', 'ABNORMALLY SMALL CRANIUM', 'ABNORMALLY SMALL CRANIUM', 'HEARING LOSS, SENSORINEURAL', 'HEARING LOSS, SENSORINEURAL', 'CATARACT', 'CATARACT', 'CATARACT', 'OPTIC ATROPHY', 'OPTIC ATROPHY', 'CUTANEOUS PHOTOSENSITIVITY', 'CUTANEOUS PHOTOSENSITIVITY', 'CUTANEOUS PHOTOSENSITIVITY', 'ATAXIA', 'ATAXIA', 'FRECKLING', 'FRECKLING', 'DECREASED BODY HEIGHT', 'DECREASED BODY HEIGHT', 'DECREASED BODY HEIGHT', 'ATROPHIC SKIN', 'ATROPHIC SKIN', 'ABNORMAL TISSUE MASS', 'DRY SKIN', 'INVOLUNTARY MUSCLE STIFFNESS, CONTRACTION, OR SPASM', 'NEUROPATHY', 'CANCER OF SKIN PIGMENT CELLS', 'DELAYED BONE AGE', 'CEREBRAL CORTEX ATROPHY', 'CROSS-EYED', 'ABNORMALITY OF COGNITION', 'CRYPTORCHIDISM', 'CONGENITAL DEAFNESS', 'POIKILODERMA', 'EXTREME SENSITIVITY OF THE EYES TO LIGHT', 'ECTROPION', 'FLAT, DISCOLORED AREA OF SKIN', 'CRANIOFACIAL HYPEROSTOSIS', 'ADHESION

## 5. Get Genes Edges Out Count

In [45]:
## node type list is list of all nodes
def get_gene_edges_out_count(gene_list, node_type_list):
    # dictionary that keeps track of all connections from a gene to any node type 
    connection_dict = {}
    for gene_symbol in gene_list:
        gene_found = False
        gene_query = ht.query(gene_symbol)['Gene']
        for i in gene_query:
            if(i['SYMBOL'].lower() == gene_symbol.lower()):
                gene = i
                gene_found = True
        if(gene_found == True):
            count = 0
            input_object = gene
            for x in node_type_list:
                try: 
                    ## only look at direct connections
                    fc = FindConnection(input_obj=input_object, output_obj=x, intermediate_nodes=None)
                    fc.connect(verbose=False)
                    df = fc.display_table_view()
                    rows = df.shape[0]
                    count = count + rows
                except: 
                    print("gene " + str(gene_symbol) + " for node intermediate " + str(x) + " failed")
            connection_dict[gene_symbol]  = count
        else:
            print(gene_symbol + ' could not be found')
            connection_dict[gene_symbol] = 'Unknown'
    return(connection_dict)

In [46]:
## for example purposes, include ASLD5. - not a real gene as far as I know
gene_edges_out = get_gene_edges_out_count(relevant_top_genes_list, all_node_types)
gene_edges_out

0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.info
0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('https://mydisease.info/v1/query?fields=disgenet.xrefs.umls&size=250')
Unable to fetch results from mydisease
0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.me

0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('https://mydisease.info/v1/query?fields=disgenet.xrefs.umls&size=250')
Unable to fetch results from mydisease
0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human')
Unable to fetch results from mydisease.info
0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,dise

0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human')
Unable to fetch results from mydisease.info
0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human')
Unable to fetch results from mydisease.info
0, message='Attempt to d

0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.mesh,disease_ontology.xrefs.mesh,ctd.mesh,mondo.xrefs.omim,hpo.omim,hpo.orphanet,mondo.xrefs.orphanet&dotfield=true&species=human&size=5')
Unable to fetch results from mydisease.info
0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('https://mydisease.info/v1/query?fields=disgenet.xrefs.umls&size=250')
Unable to fetch results from mydisease
0, message='Attempt to decode JSON with unexpected mimetype: text/html', url=URL('http://mydisease.info/v1/query?fields=mondo.mondo,mondo.xrefs.doid,mondo.xrefs.umls,disgenet.xrefs.umls,mondo.label,disgenet.xrefs.disease_name,mondo.synonym.exact,mondo.synonym.related,disease_ontology.synonyms.exact,mondo.xrefs.me

{'ERCC3': 1097,
 'ERCC2': 1270,
 'XPA': 748,
 'POLH': 565,
 'ERCC5': 697,
 'ERCC4': 934,
 'XPC': 570,
 'DDB2': 628,
 'EDNRA': 1430,
 'ERCC1': 1117,
 'DDX11': 459,
 'PCNA': 2169,
 'FXN': 1991,
 'NRAS': 1533,
 'ERCC6': 876,
 'FAS': 5882,
 'BLM': 1570,
 'PTCH1': 1566,
 'FOS': 7066,
 'TP53': 14024,
 'ERCC8': 2260,
 'RRM2B': 755,
 'PERP': 390,
 'TERC': 252,
 'MUC1': 1175,
 'LIG4': 815,
 'IKBKG': 1412,
 'CHD1': 526,
 'HNRNPDL': 642,
 'ACD': 897,
 'C9orf72': 709,
 'RECQL4': 928,
 'PCGF2': 556,
 'EPHX1': 969,
 'CIB1': 447,
 'KIZ': 205,
 'RAG2': 595,
 'CYP1B1': 811,
 'ATXN3': 1008,
 'SRD5A2': 1161,
 'FRRS1L': 295,
 'GTF2H5': 652,
 'BIVM-ERCC5': 273,
 'TTN': 846,
 'RAD51D': 539,
 'GALK1': 403,
 'TMC6': 438,
 'RPL26': 627,
 'RBM8A': 754,
 'ACTG1': 770,
 'WWOX': 1135,
 'ISCA2': 247,
 'AKT1': 16400,
 'MC1R': 821,
 'BRF1': 347,
 'VDR': 1115,
 'CYFIP2': 515,
 'TET2': 669,
 'TOP3A': 542,
 'PHC1': 614,
 'ATXN7': 565,
 'CUL7': 688,
 'CD19': 1732,
 'FAM111A': 243,
 'PUF60': 690,
 'NTRK1': 2291,
 'IKBKB':

In [49]:
len(relevant_top_genes_list)

1630

In [47]:
%store gene_edges_out

Stored 'gene_edges_out' (dict)


In [48]:
%store -r gene_edges_out

## 6. Assemble Genes related to both Disease and Disease Symptoms

In [53]:
# disease_to_gene_results 
# disease_to_all_nodes_to_genes_results


# relevant_genes_to_symptoms_df 




final_dict = {}

relevant_top_genes_list[1]

for x in relevant_top_genes_list
    final_dict[x] = {
        "disease_to_gene_occurrences" = disease_to_gene_results['sorted_disease_to_genes'][x] if x in disease_to_gene_results['sorted_disease_to_genes'] else 0,
        "disease_to_gene_pub_counts" = disease_to_gene_results['one_step_genes_pub_counts'][x] if x in disease_to_gene_results['one_step_genes_pub_counts'] else 0,
        "disease_to_int_to_gen_occurrences" = disease_to_all_nodes_to_genes_results['sorted_disease_to_all_nodes_to_genes'][x] if x in disease_to_all_nodes_to_genes_results['sorted_disease_to_all_nodes_to_genes'] else 0,
        "disease_to_int_to_gene_pubs" = disease_to_all_nodes_to_genes_results['top_two_step_genes_pub_counts'][x] if x in disease_to_all_nodes_to_genes_results['top_two_step_genes_pub_counts'] else 0,
        "disease_symtpoms_gene_related_to" = 
    }


'ERCC2'

In [57]:
disease_to_all_nodes_to_genes_results.keys()

dict_keys(['top_related_genes_to_disease', 'sorted_disease_to_all_nodes_to_genes', 'top_two_step_genes_pub_counts'])

In [25]:
disease_symptoms

['FLAT, DISCOLORED AREA OF SKIN',
 'MACULE',
 'CRANIOFACIAL HYPEROSTOSIS',
 'ENLARGEMENT OF CRANIOFACIAL BONES',
 'EXCESSIVE BONE GROWTH OF THE SKULL AND FACE',
 'EXCESSIVE GROWTH OF CRANIOFACIAL BONES',
 'HYPEROSTOSIS OF CRANIOFACIAL BONES',
 'HYPERTROPHY OF CRANIOFACIAL BONES',
 'INCREASED OSSIFICATION OF CRANIOFACIAL BONES',
 'OVERGROWTH OF CRANIOFACIAL BONES',
 'THICK CRANIOFACIAL BONES',
 'ATROPHIC SKIN',
 'DERMAL ATROPHY',
 'SKIN ATROPHY',
 'SKIN DEGENERATION',
 'DECREASED BODY HEIGHT',
 'HEIGHT LESS THAN 3RD PERCENTILE',
 'SHORT STATURE',
 'SMALL STATURE',
 'STATURE BELOW 3RD PERCENTILE',
 'NEUROPATHY',
 'PERIPHERAL NERVE DAMAGE',
 'PERIPHERAL NEURITIS',
 'PERIPHERAL NEUROPATHY',
 'ADHESION OF EYELIDS',
 'ANKYLOBLEPHARON',
 'ANKYLOBLEPHARON FILIFORME ADNATUM',
 'EYELID SYNECHIAE',
 'EYELIDS STUCK TOGETHER',
 'CANCER OF SKIN PIGMENT CELLS',
 'MALIGNANT MELANOMA',
 'MELANOMA',
 'SKIN CANCER (MELANOMA)',
 'ARTHRALGIA',
 'ARTHRALGIAS',
 'ARTHRITIC PAIN',
 'JOINT PAIN',
 'JOINT PAINS

In [30]:
genes_to_symptoms_df.iloc[[1,3,4]]

Unnamed: 0,input,input_type,pred1,pred1_source,pred1_api,pred1_pubmed,output_type,output_name,output_id
1,ERCC3,Gene,affects,SEMMED,SEMMED Gene API,10467415,BiologicalProcess,C0007587,UMLS:C0007587
3,ERCC3,Gene,affects,SEMMED,SEMMED Gene API,9637787,BiologicalProcess,C0011065,UMLS:C0011065
4,ERCC3,Gene,affects,SEMMED,SEMMED Gene API,"10214907,11335038,11412842,12221129,16427011,2...",BiologicalProcess,C0012899,UMLS:C0012899


In [35]:
relevant_genes_to_symptoms_df

In [41]:
relevant_top_genes_list

['ERCC3',
 'ERCC2',
 'XPA',
 'POLH',
 'ERCC5',
 'ERCC4',
 'XPC',
 'DDB2',
 'EDNRA',
 'ERCC1',
 'DDX11',
 'PCNA',
 'FXN',
 'NRAS',
 'ERCC6',
 'FAS',
 'BLM',
 'PTCH1',
 'FOS',
 'TP53',
 'ERCC8',
 'RRM2B',
 'PERP',
 'TERC',
 'MUC1',
 'LIG4',
 'IKBKG',
 'CHD1',
 'HNRNPDL',
 'ACD',
 'C9orf72',
 'RECQL4',
 'PCGF2',
 'EPHX1',
 'CIB1',
 'KIZ',
 'RAG2',
 'CYP1B1',
 'ATXN3',
 'SRD5A2',
 'FRRS1L',
 'GTF2H5',
 'BIVM-ERCC5',
 'TTN',
 'RAD51D',
 'GALK1',
 'TMC6',
 'RPL26',
 'RBM8A',
 'ACTG1',
 'WWOX',
 'ISCA2',
 'AKT1',
 'MC1R',
 'BRF1',
 'VDR',
 'CYFIP2',
 'TET2',
 'TOP3A',
 'PHC1',
 'ATXN7',
 'CUL7',
 'CD19',
 'FAM111A',
 'PUF60',
 'NTRK1',
 'IKBKB',
 'PROM1',
 'RNASET2',
 'XRCC4',
 'POR',
 'BRCA1',
 'CDKN2A',
 'NDUFA6',
 'FTO',
 'FDXR',
 'SLC2A1',
 'SLC35A2',
 'RPS24',
 'BDNF',
 'CDKN1C',
 'FBP1',
 'HRAS',
 'TAF1',
 'SLC13A5',
 'POLD1',
 'CDC45',
 'CLMP',
 'ESR1',
 'APTX',
 'PNPT1',
 'RHOA',
 'TAF6',
 'DIABLO',
 'DHFR',
 'SMARCA4',
 'MECP2',
 'FGFR1',
 'CHD7',
 'IGF2',
 'TKT',
 'GLI2',
 'IDH1',
 

In [58]:
symptoms_dict

NameError: name 'symptoms_dict' is not defined

In [70]:
len(relevant_top_genes_list)

# for x in relevant_genes_to_symptoms_results.keys():
#     if 

1630

In [64]:
list(relevant_genes_to_symptoms_results.keys())

['ERCC3',
 'ERCC2',
 'XPA',
 'POLH',
 'ERCC5',
 'ERCC4',
 'XPC',
 'DDB2',
 'EDNRA',
 'ERCC1',
 'DDX11',
 'PCNA',
 'FXN',
 'NRAS',
 'ERCC6',
 'FAS',
 'BLM',
 'PTCH1',
 'FOS',
 'TP53',
 'ERCC8',
 'RRM2B',
 'PERP',
 'TERC',
 'MUC1',
 'LIG4',
 'IKBKG',
 'CHD1',
 'HNRNPDL',
 'ACD',
 'C9orf72',
 'RECQL4',
 'PCGF2',
 'EPHX1',
 'CIB1',
 'KIZ',
 'RAG2',
 'CYP1B1',
 'ATXN3',
 'SRD5A2',
 'FRRS1L',
 'GTF2H5',
 'BIVM-ERCC5',
 'TTN',
 'RAD51D',
 'GALK1',
 'TMC6',
 'RPL26',
 'RBM8A',
 'ACTG1',
 'WWOX',
 'ISCA2',
 'AKT1',
 'MC1R',
 'BRF1',
 'VDR',
 'CYFIP2',
 'TET2',
 'TOP3A',
 'PHC1',
 'ATXN7',
 'CUL7',
 'CD19',
 'FAM111A',
 'PUF60',
 'NTRK1',
 'IKBKB',
 'PROM1',
 'RNASET2',
 'XRCC4',
 'POR',
 'BRCA1',
 'CDKN2A',
 'NDUFA6',
 'FTO',
 'FDXR',
 'SLC2A1',
 'SLC35A2',
 'RPS24',
 'BDNF',
 'CDKN1C',
 'FBP1',
 'HRAS',
 'TAF1',
 'SLC13A5',
 'POLD1',
 'CDC45',
 'CLMP',
 'ESR1',
 'APTX',
 'PNPT1',
 'RHOA',
 'TAF6',
 'DIABLO',
 'DHFR',
 'SMARCA4',
 'MECP2',
 'FGFR1',
 'CHD7',
 'IGF2',
 'TKT',
 'GLI2',
 'IDH1',
 