## Neo4j and Reactome

In [2]:
import pandas as pd
import json
from neo4j import GraphDatabase

In [3]:
driver = GraphDatabase.driver(uri = "bolt://localhost:7687", auth = ("neo4j","neo4j"))

#### 1. How many nodes of Pathways, Drugs and Diseases

In [4]:
query = 'MATCH (d:Disease)\
        RETURN d.name, d.definition, d.dbId, d.synonym'
Result = []
with driver.session() as session:
    info = session.run(query)
    for item in info:
        Result.append({'disease': item.values()[0][0],\
                      'defn': item.values()[1],\
                      'dbid': item.values()[2],\
                      'synonym':item.values()[3]})
                      
df = pd.DataFrame(Result)
print(df.shape)
df = df.set_index('disease')
df.head()

(387, 4)


Unnamed: 0_level_0,dbid,defn,synonym
disease,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cancer,1500689,A disease of cellular proliferation that is ma...,"[malignant tumor, malignant neoplasm, primary ..."
lung squamous cell carcinoma,1981182,,"[squamous cell carcinoma of lung (disorder), E..."
head and neck squamous cell carcinoma,1248679,,"[carcinoma of the head and neck, squamous cell..."
breast cancer,1500576,A thoracic cancer that originates in the mamma...,"[malignant tumor of the breast, mammary cancer..."
ovarian cancer,1500575,,"[ovarian neoplasm, malignant tumour of ovary, ..."


In [51]:
query = 'MATCH (p:Pathway)\
        RETURN p.displayName, p.schemaClass, p.stId, p.speciesName'
Result = []
with driver.session() as session:
    info = session.run(query)
    for item in info:
        Result.append({'name': item.values()[0],\
                      'class': item.values()[1],\
                      'ID': item.values()[2],\
                      'species':item.values()[3]})
                      
df = pd.DataFrame(Result)
print(df.shape)
df = df.set_index('name')
df.head(10)

(20959, 4)


Unnamed: 0_level_0,ID,class,species
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Autophagy,R-HSA-9612973,TopLevelPathway,Homo sapiens
Chaperone Mediated Autophagy,R-HSA-9613829,Pathway,Homo sapiens
Lipophagy,R-HSA-9613354,Pathway,Homo sapiens
Lipophagy,R-MMU-9613354,Pathway,Mus musculus
Lipophagy,R-RNO-9613354,Pathway,Rattus norvegicus
Lipophagy,R-CFA-9613354,Pathway,Canis familiaris
Lipophagy,R-BTA-9613354,Pathway,Bos taurus
Lipophagy,R-SSC-9613354,Pathway,Sus scrofa
Lipophagy,R-DRE-9613354,Pathway,Danio rerio
Lipophagy,R-XTR-9613354,Pathway,Xenopus tropicalis


In [54]:
query = 'MATCH (d:Drug)\
        RETURN d.displayName, d.name, d.stId, d.dbId'
Result = []
with driver.session() as session:
    info = session.run(query)
    for item in info:
        Result.append({'dispname': item.values()[0],\
                      'name': item.values()[1],\
                      'ID': item.values()[2],\
                      'species':item.values()[3]})
                      
df = pd.DataFrame(Result)
print(df.shape)
df = df.set_index('name')
df.head(10)

(222, 4)


Unnamed: 0_level_0,ID,dispname,species
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"[trastuzumab, herceptin, D5v8, R-597]",R-ALL-9634466,trastuzumab [extracellular region],9634466
[CP-724714],R-ALL-9649889,CP-724714 [cytosol],9649889
"[Afatinib, BIBW2992, Irreversible TKI inhibitor afatinib generic inhibits EGFR and ERBB2 (HER2)]",R-ALL-1220577,Afatinib [cytosol],1220577
[AZ5104],R-ALL-9649879,AZ5104 [cytosol],9649879
[Sapitinib],R-ALL-9649894,Sapitinib [cytosol],9649894
"[Tesevatinib, EXEL-7647, XL-647, KD-019, XL647]",R-ALL-9646810,Tesevatinib [cytosol],9646810
"[Lapatinib, GW572016, Tykerb, Reversible tyrosine kinase inhibitor lapatinib generic inhibits EGFR and ERBB2 (HER2)]",R-ALL-1216521,Lapatinib [cytosol],1216521
"[Neratinib, HKI-272, afatinib, Irreversible tyrosine kinase inhibitor HKI-272 generic inhibits EGFR and ERBB2 (HER2)]",R-ALL-1216527,Neratinib [cytosol],1216527
[AEE788],R-ALL-9652364,AEE788 [cytosol],9652364
"[Canertinib, CI-1033, Irreversible TKI inhibitor canertinib generic pan-ERBB]",R-ALL-1227677,Canertinib [cytosol],1227677


#### 2. How many connected nodes for 'Drug to Disease'

In [40]:
query = 'MATCH (a:Drug)--(d:Disease)\
              RETURN a.name,d.name'

Result = []
with driver.session() as session:
    info = session.run(query)
    for item in info:
        Result.append({'drug': item.values()[0],\
                      'disease': item.values()[1]})
                      
df = pd.DataFrame(Result)
print(df.shape)
df.head()

(349, 2)


Unnamed: 0,disease,drug
0,[cancer],"[trastuzumab, herceptin, D5v8, R-597]"
1,[cancer],[CP-724714]
2,[cancer],"[Afatinib, BIBW2992, Irreversible TKI inhibito..."
3,[cancer],[AZ5104]
4,[cancer],[Sapitinib]


#### 3. How many connected nodes for 'Disease to Pathway'

In [41]:
query = 'MATCH (d:Disease)--(p:Pathway)\
              RETURN d.name,p.name LIMIT 10'

Result = []
with driver.session() as session:
    info = session.run(query)
    for item in info:
        Result.append({'disease': item.values()[0],\
                      'pathway': item.values()[1]})
                      
df = pd.DataFrame(Result)
print(df.shape)
df.head()

(10, 2)


Unnamed: 0,disease,pathway
0,[cancer],[Diseases of Base Excision Repair]
1,[cancer],[Defective Base Excision Repair Associated wit...
2,[cancer],[Defective NTHL1 substrate processing]
3,[cancer],[Defective NTHL1 substrate binding]
4,[cancer],[Defective Base Excision Repair Associated wit...


#### 4. Counting Pathways per Disease

In [42]:
query = 'MATCH (d:Disease)--(p:Pathway)\
        RETURN d.name,count(p.name),collect(p.name)'

Result = []
with driver.session() as session:
    info = session.run(query)
    for item in info:
        Result.append({'disease': item.values()[0][0],\
                      'pw-count': item.values()[1],\
                      'pw-collect':item.values()[2]})
                      
df = pd.DataFrame(Result)
print(df.shape)
df = df.set_index('disease')
df = df.sort_values(by='pw-count', ascending=False)
df.head(20)

(199, 3)


Unnamed: 0_level_0,pw-collect,pw-count
disease,Unnamed: 1_level_1,Unnamed: 2_level_1
cancer,"[[Diseases of Base Excision Repair], [Defectiv...",105
Human immunodeficiency virus infectious disease,"[[HIV Infection], [Host Interactions of HIV fa...",49
influenza,"[[Influenza Infection], [Host Interactions wit...",29
congenital disorder of glycosylation type I,"[[Defective MPDU1 causes MPDU1-CDG (CDG-1f)], ...",19
primary immunodeficiency disease,"[[Diseases of Immune System], [Diseases associ...",12
bone development disease,"[[Signaling by FGFR in disease], [Signaling by...",10
inherited metabolic disorder,[[Defective SLC16A1 causes symptomatic deficie...,9
botulism,"[[Neurotoxicity of clostridium toxins, Clostri...",8
congenital disorder of glycosylation type II,[[Defective SLC35A1 causes congenital disorder...,8
carbohydrate metabolic disorder,"[[Diseases of carbohydrate metabolism], [Pento...",8


#### 5. Counting Reaction per Pathway

In [57]:
query = 'MATCH (pw:Pathway)-[:hasEvent]->(rx:Reaction)\
         RETURN pw.name,count(rx.name),collect(rx.name)'

Result = []
with driver.session() as session:
    info = session.run(query)
    for item in info:
        Result.append({'pathway': item.values()[0][0],\
                      'rx-count': item.values()[1],\
                      'rx-collect':item.values()[2]})
                      
df = pd.DataFrame(Result)
print(df.shape)
df = df.set_index('pathway')
df = df.sort_values(by='rx-count', ascending=False)
df.head(20)

(1720, 3)


Unnamed: 0_level_0,rx-collect,rx-count
pathway,Unnamed: 1_level_1,Unnamed: 2_level_1
Interconversion of nucleotide di- and triphosphates,"[[AMP + ATP <=> ADP + ADP [AK2]], [ADP + ADP <...",421
Macroautophagy,[[ULK1 phosphorylates AMBRA1:Beclin-1 complex]...,365
Amino acid transport across the plasma membrane,[[SLC1A5-mediated exchange of glutamine and al...,363
VEGFA-VEGFR2 Pathway,"[[RHOA:GTP:Mg2+ binds ROCK1,ROCK2], [p-AXL bin...",360
Recruitment and ATM-mediated phosphorylation of repair and signaling proteins at DNA double strand breaks,"[[Formation of BRCA1-A complex at DNA DSBs], [...",356
Synthesis of PIPs at the plasma membrane,"[[MTMR8 binds MTMR9], [p-Y281,292-RUFY1 binds ...",355
Metabolism of folate and pterines,[[ALDH1L2 dehydrogenates 10-formyl-THFPG to TH...,352
Sphingolipid de novo biosynthesis,[[ceramide + H2O <=> stearate + sphingosine [e...,351
Ub-specific processing proteases,"[[USP18 deubiquitinates TAK1:TAB1], [USP8 deub...",351
Gluconeogenesis,"[[PXLP-K259-GOT1 dimer deaminates L-Asp, aspar...",323


#### Preceding Event Path

In [7]:
query = 'MATCH path = (pw:Pathway)-[:hasEvent]->(rx1:Reaction)\
        -[:precedingEvent*1..2]-(rx2:Reaction)\
        RETURN path LIMIT 1'

with driver.session() as session:
    info = session.run(query)
    for item in info:
        print(item)

<Record path=<Path start=<Node id=2 labels=frozenset({'Event', 'Pathway', 'DatabaseObject'}) properties={'schemaClass': 'Pathway', 'isInDisease': False, 'releaseDate': '2019-06-12', 'displayName': 'Chaperone Mediated Autophagy', 'stId': 'R-HSA-9613829', 'speciesName': 'Homo sapiens', 'diagramHeight': 1817, 'stIdVersion': 'R-HSA-9613829.2', 'dbId': 9613829, 'name': ['Chaperone Mediated Autophagy'], 'hasDiagram': True, 'isInferred': False, 'doi': '10.3180/R-HSA-9613829.1', 'diagramWidth': 2185}> end=<Node id=2468 labels=frozenset({'ReactionLikeEvent', 'Event', 'Reaction', 'DatabaseObject'}) properties={'schemaClass': 'Reaction', 'speciesName': 'Homo sapiens', 'isInDisease': False, 'releaseDate': '2019-06-12', 'displayName': 'p-GFAP:GFAP dissociates from LAMP2a multimer', 'stIdVersion': 'R-HSA-9626242.1', 'dbId': 9626242, 'name': ['p-GFAP:GFAP dissociates from LAMP2a multimer'], 'isChimeric': False, 'stId': 'R-HSA-9626242', 'category': 'dissociation', 'isInferred': False}> size=2>>


In [None]:
MATCH path = (pw:Pathway)-[:hasEvent]->(:Reaction)
        -[:precedingEvent*1..5]-(:Reaction)
        RETURN path LIMIT 10