# Drug/Chemical Reactome Pathway Exploration



In [1]:
import neo4j_functions.driver as neo4j_driver
import pandas as pd
import importlib

## Importing curated lists of drugs and chemicals

In [2]:
chemical_list_df = pd.read_csv('lib/Oxidative Stress Text Mining Targets 4.1 - Summary of Oxidative Stress.csv')
drug_list_df = pd.read_csv('lib/Drug list total 04.05.19   - Overview Drug list.csv')

In [3]:
chemical_list_df.head()

Unnamed: 0,Biological Events of Oxidative Stress,Molecular and Functional Categories,Molecule/Enzyme/Protein,MeSH Heading,MeSH Supplementary,MeSH tree numbers,Chemical Formula,Examples,Pharm Actions,Tree Numbers,References
0,Initiation of Oxidative 1,Reactive Oxygen Species (ROS),Superoxide (anion radical),Superoxides,,D01.248.497.158.685.750.850; D01.339.431.374.8...,O2-,"Superoxide, Hydrogen Peroxide",Oxidants,"D27.720.642,\nD27.888.569.540",PMID: 25547488
1,2,,Hydrogen Peroxide,Hydrogen Peroxide,,D01.248.497.158.685.750.424; D01.339.431.374.4...,H2O2,,"Anti-Infective Agents, Local",D27.505.954.122.187,
2,,,,,,,,,Oxidants,"D27.720.642,\nD27.888.569.540",
3,3,,Hydroxyl (radical),Hydroxyl Radical,,D01.339.431.249; D01.248.497.158.459.300; D01....,HO,,Oxidants,"D27.720.642,\nD27.888.569.540",
4,4,,alpha oxygen,None listed,,,,,,,


In [4]:
drug_list_df.head()

Unnamed: 0,Drug Category,#,Name,Synonyms,MeSH Descriptor,MeSH tree(s),Common adverse effects,Dosage (freq/amount/time/delivery),Duration (time),Pham Action
0,Anticoagulants,1,heparin,"['Calciparine', 'Eparina', 'heparina', 'Hepari...",heparin,D09.698.373.400,"Thrombocytopenia, Cerebral haemorrhage, Haemog...",1/18U/kg/iv,2 days,"Anticoagulants, \nFibrinolytic Agents"
1,,2,warfarin,"['4-Hydroxy-3-(3-oxo-1-phenylbutyl)coumarin', ...",warfarin,D03.383.663.283.446.520.914\nD03.633.100.150.4...,"Haemorrhage, Haematoma, anaemia, Epistaxis, hy...",1/2-10mg/day/po,As needed,"Anticoagulants, \nRodenticides"
2,Thrombolytics,3,streptokinase,['Streptokinase C precursor'],streptokinase,D08.811.277.656.300.775\nD12.776.124.125.662.537,"blurred vision, confusion, dizziness, fever, s...","1/1,500,000 IU/iv",60min,Fibrinolytic Agents
3,,4,urokinase,"['U-plasminogen activator', 'uPA', 'Urokinase-...",Urokinase-Type Plasminogen Activator,D08.811.277.656.300.760.910\nD08.811.277.656.9...,"bleeding gums, coughing up blood, dizziness, h...","1/4,000,000U/iv",10min,
4,,5,tpa,"['Alteplasa', 'Alteplase (genetical recombinat...",Tissue Plasminogen Activator,D08.811.277.656.300.760.875\nD08.811.277.656.9...,,1/0.9mg/kg/iv,60min,Fibrinolytic Agents


## Looking for reactome ids of drugs in curated list
Looks for the lower case name of the drug from the curated list to match any of the lowercase names a drug in reactome is listed in.  

 - Outputs pandas data frame under variable `drugs_in_reactome`
 - 58/155 drugs found to have counter parts in reactome


In [5]:
importlib.reload(neo4j_driver)
driver = neo4j_driver.driver(uri = "bolt://localhost:7687", user = "neo4j", password = "Akre1234")

### Test to see if I can find any drug in the reactome data set

In [76]:
driver.search_item_in_array(
    array_field="name",
    item='"herceptin"',
    class_type='Drug',
    info_cols = ['displayName', 'stId', 'isInDisease'],
    verbose=True
)

Query: 
 MATCH (a:Drug) WHERE toLower("herceptin") IN [x in a.name | toLower(x)] RETURN a.displayName, a.stId, a.isInDisease 


Unnamed: 0,displayName,stId,isInDisease
0,trastuzumab [extracellular region],R-ALL-9634466,True


### Querying reactome for each drug in curated list

In [7]:
drugs_in_reactome = pd.DataFrame()
for drug_name in drug_list_df.Name.unique():
    result = driver.search_item_in_array(
        array_field="name",
        item='"%s"' % drug_name,
        class_type='Drug',
        info_cols = ['displayName', 'stId', 'isInDisease']
    )
    result['Name'] = drug_name
    drugs_in_reactome = drugs_in_reactome.append(result)
drugs_in_reactome.head()

Unnamed: 0,displayName,stId,isInDisease,Name
0,warfarin [cytosol],R-ALL-9014945,True,warfarin
0,dabigatran [extracellular region],R-ALL-9015346,True,dabigatran
0,rivaroxaban [extracellular region],R-ALL-9015055,True,rivaroxaban
0,apixaban [extracellular region],R-ALL-9038732,True,apixaban
0,edoxaban [extracellular region],R-ALL-9038742,True,edoxaban


In [8]:
print('Initial Drug List Size: ', drug_list_df.Name.nunique())
print('Drugs found in Reacome: ', drugs_in_reactome.Name.nunique())


Initial Drug List Size:  155
Drugs found in Reacome:  58


## Looking for top level pathways associated with each drug found in the Reactome database

- Looks for 25 "nearest" pathways marked as `TopLevelPathways` to any drug within 1-3 nodes
- Outputs to `pathways` variable as a pandas dataframe

In [75]:

# Re-initializing driver to account for changes made to driver function
importlib.reload(neo4j_driver)
driver = neo4j_driver.driver(uri = "bolt://localhost:7687", user = "neo4j", password = "Akre1234")

# For each reactome drug ID, looks for 25 "closest" top level pathways within 1-3 nodes 
pathways = pd.DataFrame()
for reactome_id, d_df, in drugs_in_reactome.groupby('stId'):
    result = driver.get_n_relations(
        class_1='Drug',
        id_1="'%s'" % reactome_id,
        id_class='stId',
        class_2='TopLevelPathway',
        info_cols=['a.displayName', 'b.displayName', 'b.speciesName'],
        edge_type='*1..3',
        where_clause=" WHERE b.speciesName = 'Homo sapiens'",
        verbose=False,
        n=25,
    ).rename(columns={
        'a.displayName': 'Drug',
        'b.displayName': 'Pathway',
        'b.speciesName': 'Species'
    })
    pathways = pathways.append(result, ignore_index=True)

pathways.head()

Failed to write data to connection Address(host='localhost', port=7687) (Address(host='127.0.0.1', port=7687)); ("0; 'Underlying socket connection gone (_ssl.c:1961)'")
Failed to write data to connection Address(host='localhost', port=7687) (Address(host='127.0.0.1', port=7687)); ("0; 'Underlying socket connection gone (_ssl.c:1961)'")


Unnamed: 0,Drug,Pathway,Species,edgeLength
0,warfarin [cytosol],DNA Replication,Homo sapiens,2.0
1,warfarin [cytosol],Circadian Clock,Homo sapiens,2.0
2,warfarin [cytosol],Muscle contraction,Homo sapiens,2.0
3,warfarin [cytosol],DNA Replication,Homo sapiens,3.0
4,warfarin [cytosol],Metabolism of RNA,Homo sapiens,3.0


In [69]:
# Number of drugs per pathway, seperated by edgelength
pd.DataFrame(pathways.groupby(['Pathway', 'edgeLength']).Drug.nunique())

Unnamed: 0_level_0,Unnamed: 1_level_0,Drug
Pathway,edgeLength,Unnamed: 2_level_1
Autophagy,3.0,5
Cell Cycle,3.0,3
Cell-Cell communication,3.0,3
Cellular responses to external stimuli,3.0,3
Chromatin organization,3.0,3
Circadian Clock,2.0,3
Circadian Clock,3.0,58
DNA Repair,3.0,3
DNA Replication,2.0,3
DNA Replication,3.0,5


In [74]:
# Number of pathways per drug, seperated by edgeLength
pd.DataFrame(pathways.groupby(['Drug', 'edgeLength']).Pathway.nunique()).reset_index().sort_values(by=['edgeLength', 'Pathway'], ascending=[True, False])

Unnamed: 0,Drug,edgeLength,Pathway
19,enoximone [cytosol],2.0,3
35,milrinone [cytosol],2.0,3
59,warfarin [cytosol],2.0,3
15,dobutamine [extracellular region],3.0,25
27,isoprenaline [extracellular region],3.0,25
...,...,...,...
18,enalapril [endoplasmic reticulum lumen],3.0,2
24,fosinopril [endoplasmic reticulum lumen],3.0,2
42,perindopril [endoplasmic reticulum lumen],3.0,2
48,quinapril [endoplasmic reticulum lumen],3.0,2


## Test Cypher Queries in custom neo4j driver implementation

In [59]:
driver.get_node_info(
    id_field="stId",
    id_val="R-ALL-9634956",
    class_type='Drug',
    info_cols = ['displayName', 'name', 'isInDisease']
)

Query: 
 MATCH (a:Drug {stId: $idnum}) RETURN a.displayName, a.name, a.isInDisease 
pyridostigmine [extracellular region]
['pyridostigmine']
True


Unnamed: 0,displayName,name,isInDisease
0,pyridostigmine [extracellular region],[pyridostigmine],True


In [131]:
driver.get_n_nodes_info(
    class_type='Drug',
    info_cols = ['displayName', 'name', 'isInDisease'],
    n=10
)

Query: 
 MATCH (a:Drug) RETURN a.displayName, a.name, a.isInDisease LIMIT 10


Unnamed: 0,displayName,name,isInDisease
0,trastuzumab [extracellular region],"[trastuzumab, herceptin, D5v8, R-597]",True
0,CP-724714 [cytosol],[CP-724714],True
0,Afatinib [cytosol],"[Afatinib, BIBW2992, Irreversible TKI inhibito...",True
0,AZ5104 [cytosol],[AZ5104],True
0,Sapitinib [cytosol],[Sapitinib],True
0,Tesevatinib [cytosol],"[Tesevatinib, EXEL-7647, XL-647, KD-019, XL647]",True
0,Lapatinib [cytosol],"[Lapatinib, GW572016, Tykerb, Reversible tyros...",True
0,Neratinib [cytosol],"[Neratinib, HKI-272, afatinib, Irreversible ty...",True
0,AEE788 [cytosol],[AEE788],True
0,Canertinib [cytosol],"[Canertinib, CI-1033, Irreversible TKI inhibit...",True
