### Requirements

In [None]:
# Install necessary libraries
!pip install py2neo
!pip install py2neo pandas
!pip install neo4jupyter
!pip install neo4j-driver
!pip install neo4j
!pip install apoc

In [2]:
# import necessary libraries
from py2neo import Graph, Node, Relationship, Subgraph, NodeMatcher
from neo4j import GraphDatabase
import pandas as pd
import json

# Import json data

In [7]:
# Significan Adverse Drug Reaction
with open('/content/drive/MyDrive/Colab Notebooks/Gradvek/significantAdverseDrugReactions.json', 'r') as f:
  sadr_data = [json.loads(line) for line in f]

sadr_df = pd.DataFrame(sadr_data)
sadr_df

Unnamed: 0,chembl_id,event,llr
0,CHEMBL3,palpitations,42.828752
1,CHEMBL3,loss of consciousness,39.344885
2,CHEMBL3,premature baby,311.242716
3,CHEMBL3,application site erythema,150.585750
4,CHEMBL3,nicotine dependence,1107.305764
...,...,...,...
112923,CHEMBL4297852,eye swelling,78.732787
112924,CHEMBL4297879,lung infection,55.851689
112925,CHEMBL4297953,haemorrhage,71.678253
112926,CHEMBL4650379,mental status changes,32.001566


In [8]:
# Known Drugs Aggregated
with open('/content/drive/MyDrive/Colab Notebooks/Gradvek/knownDrugsAggregated.json', 'r') as f:
    kda_data = [json.loads(line) for line in f]

kda_df = pd.DataFrame(kda_data)
kda_df

Unnamed: 0,drugId,targetId,targetName
0,CHEMBL409,ENSG00000169083,Androgen Receptor
1,CHEMBL409,ENSG00000169083,Androgen Receptor
2,CHEMBL409,ENSG00000169083,Androgen Receptor
3,CHEMBL409,ENSG00000169083,Androgen Receptor
4,CHEMBL409,ENSG00000169083,Androgen Receptor
...,...,...,...
259596,CHEMBL2107358,ENSG00000258643,Apoptosis regulator Bcl-W
259597,CHEMBL2107358,ENSG00000258643,Apoptosis regulator Bcl-W
259598,CHEMBL2107358,ENSG00000258643,Apoptosis regulator Bcl-W
259599,CHEMBL2107358,ENSG00000258643,Apoptosis regulator Bcl-W


In [9]:
# Mechanism Of Action
with open('/content/drive/MyDrive/Colab Notebooks/Gradvek/mechanismOfAction.json', 'r') as f:
    moa_data = [json.loads(line) for line in f]

moa_df = pd.DataFrame(moa_data)
moa_df

Unnamed: 0,mechanismOfAction,chemblIds,references,actionType,targetName,targetType,targets
0,Unknown,{'list': [{'element': 'CHEMBL125'}]},"{'list': [{'element': {'source': 'PubMed', 'id...",,,,
1,Unknown,{'list': [{'element': 'CHEMBL2110816'}]},"{'list': [{'element': {'source': 'PubMed', 'id...",,,,
2,Unknown,"{'list': [{'element': 'CHEMBL975'}, {'element'...","{'list': [{'element': {'source': 'FDA', 'ids':...",,,,
3,Unknown,{'list': [{'element': 'CHEMBL1200922'}]},"{'list': [{'element': {'source': 'PubMed', 'id...",,,,
4,Unknown,{'list': [{'element': 'CHEMBL2106915'}]},"{'list': [{'element': {'source': 'PubMed', 'id...",,,,
...,...,...,...,...,...,...,...
6605,GABA A receptor alpha-1/beta-1/gamma-2 positiv...,{'list': [{'element': 'CHEMBL1521'}]},"{'list': [{'element': {'source': 'Wikipedia', ...",POSITIVE ALLOSTERIC MODULATOR,GABA A receptor alpha-1/beta-1/gamma-2,protein complex,"{'list': [{'element': 'ENSG00000022355'}, {'el..."
6606,GABA A receptor alpha-1/beta-1/gamma-2 positiv...,"{'list': [{'element': 'CHEMBL3989820'}, {'elem...","{'list': [{'element': {'source': 'FDA', 'ids':...",POSITIVE ALLOSTERIC MODULATOR,GABA A receptor alpha-1/beta-1/gamma-2,protein complex,"{'list': [{'element': 'ENSG00000022355'}, {'el..."
6607,"Carbamoyl-phosphate synthase [ammonia], mitoch...",{'list': [{'element': 'CHEMBL1201780'}]},"{'list': [{'element': {'source': 'DailyMed', '...",POSITIVE ALLOSTERIC MODULATOR,"Carbamoyl-phosphate synthase [ammonia], mitoch...",single protein,{'list': [{'element': 'ENSG00000021826'}]}
6608,Neuronal acetylcholine receptor protein alpha-...,{'list': [{'element': 'CHEMBL429317'}]},"{'list': [{'element': {'source': 'Other', 'ids...",POSITIVE ALLOSTERIC MODULATOR,Neuronal acetylcholine receptor protein alpha-...,single protein,{'list': [{'element': 'ENSG00000175344'}]}


In [10]:
# Targets
with open('/content/drive/MyDrive/Colab Notebooks/Gradvek/targets.json', 'r') as f:
    target_data = [json.loads(line) for line in f]

target_df = pd.DataFrame(target_data)
target_df

Unnamed: 0,id,approvedSymbol,pathways
0,ENSG00000279061,ENSG00000279061,
1,ENSG00000279002,ENSG00000279002,
2,ENSG00000280177,ENSG00000280177,
3,ENSG00000278875,ENSG00000278875,
4,ENSG00000279326,ENSG00000279326,
...,...,...,...
62673,ENSG00000283453,PRIM2BP,
62674,ENSG00000214329,SLC9B1P2,
62675,ENSG00000285760,ENSG00000285760,
62676,ENSG00000170827,CELP,


# Neo4J connection

In [12]:
# Go to Neo4J Sandbox https://neo4j.com/sandbox/, click "Launch the Free Sandbox", 
# login (create account or login using google), 
# select "Blank Sandbox", click "Create"
# once created click on the sandbox, go to connection details, and enter the uri, user and password below.

# Set up Neo4j graph database
uri = "bolt://3.237.252.156:7687"
user = "neo4j"
password = "breeze-adhesives-picture"
graph = Graph(uri, auth=(user, password))
driver = GraphDatabase.driver(uri, auth=(user, password))

# Using Cypher Queries

### Takes 3-4 hours to create nodes and edges for the above 4 files above. 
### For Gradvek, will use APOC library for Cypher Queries and indexing, which should take 5-10 min to create nodes and edges.


### Creating nodes and relationships in Neo4J using Cypher queries

In [None]:
# Creating nodes and relationships for significantAdverseDrugReactions.json
adverse_events_query = '''
UNWIND $adverse_events AS ae
MERGE (drug:Drug {chembl_id: ae.chembl_id})
MERGE (event:Event {name: ae.event})
MERGE (drug)-[:CAUSES]->(event)
'''

# Creating nodes and relationships for knownDrugsAggregated.json
known_drugs_query = '''
UNWIND $known_drugs AS kd
MERGE (drug:Drug {chembl_id: kd.drugId})
MERGE (target:Target {targetName: kd.targetName})
MERGE (drug)-[:TARGETS]->(target)
'''

# Creating nodes and relationships for mechanismOfAction.json
mechanisms_of_action_query = '''
UNWIND $mechanisms_of_action AS moa
UNWIND moa.targets.list AS target
MATCH (t:Target {id: target.element})
MERGE (action:Action {type: moa.actionType})
FOREACH (chembl_id IN moa.chemblIds.list |
  MERGE (compound:Compound {chembl_id: chembl_id.element})
  MERGE (compound)-[:ACTS_ON]->(t)
  MERGE (compound)-[:HAS_MECHANISM]->(action)
)
'''

# Creating nodes and relationships for targets.json
targets_query = '''
UNWIND $targets AS t
MERGE (target:Target {id: t.id})
SET target.approvedSymbol = t.approvedSymbol
FOREACH (p IN t.pathways.list |
  MERGE (pathway:Pathway {pathwayId: p.element.pathwayId})
  MERGE (pathway)-[:INVOLVES]->(target)
  MERGE (pathway)-[:TOP_LEVEL_TERM]->(tlt:TopLevelTerm {name: p.element.topLevelTerm})
)
'''

In [None]:
# Running the Cypher queries to insert data into the Neo4j database (took 3 hrs and 30 mins)
with driver.session() as session:
    session.run(adverse_events_query, adverse_events=sadr_data)
    session.run(known_drugs_query, known_drugs=kda_data)
    session.run(mechanisms_of_action_query, mechanisms_of_action=moa_data)
    session.run(targets_query, targets=target_data)


In [None]:
# Closing the connection to Neo4j database
driver.close()