In [1]:
from neo4j import GraphDatabase
import os
import pandas as pd
import time
import json
print("Import successful")

Import successful


In [2]:
URI = os.environ["NEO4J_URI"]
USER=os.environ["NEO4J_USER_NAME"]
PASSWORD=os.environ["NEO4J_PASSWD"]
AUTH = (os.environ["NEO4J_USER_NAME"], os.environ["NEO4J_PASSWD"])

In [3]:
#Neo4J connect and Query Boilerplate

class Neo4jConnection:
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
            
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            #response = (session.run(query, parameters))
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        
        #return pd.DataFrame([r.values() for r in response], columns=response.keys())
        return response
        
    def multi_query(self, multi_line_query, parameters=None, db=None):
        for li in multi_line_query.splitlines():
                print(li)
                result=self.query(li, parameters=None, db=None)
                print(result)

In [4]:
#Make a default connection and it should return `[<Record count(n)=0>]`
conn = Neo4jConnection(uri=URI, 
                       user=USER,
                       pwd=PASSWORD)

#if db is empty, then seed with init values 
res=conn.query('MATCH (n) RETURN count(n)')
print(res)

[<Record count(n)=13774>]


In [None]:
# NOTE: should be executed only once per database; Alternatively, you can also copy paste this query into neo4j browser
constraint = [
"CREATE CONSTRAINT car_id FOR (carcinogen:Carcinogen) REQUIRE carcinogen.itemID IS UNIQUE;",
"CALL db.awaitIndexes();"]

for cquery in constraint:
    try:
        res = conn.query(cquery)
        print(f"Executed successfully: {cquery}")
    except Exception as e:
        print(f"Error executing query: {cquery}\n{e}")

In [None]:
# ----------------- PAH Relationships ---------------------
load_relationships1 = """CALL apoc.periodic.iterate(
    "CALL apoc.load.csv('/CarcinogenCausal/rel_pah_mayProduce.csv') YIELD map AS row",
    "MATCH (ing:Ingredient {itemID: row.ing_id})
     MATCH (ca:CookingMethod {itemID: row.cooking_method_id})
     MATCH (car:Carcinogen {itemID: row.carcinogen_id})
     MERGE (ing)-[:ifUndergoes]->(ca)
     MERGE (ca)-[:mayProduce]->(car)",
    {batchSize:10000, iterateList:true, parallel:true}
)"""

load_relationships2 = """CALL apoc.periodic.iterate(
    "CALL apoc.load.csv('/CarcinogenCausal/rel_pah_mayProduced.csv') YIELD map AS row",
    "MATCH (ing:Ingredient {itemID: row.ing_id})
     MATCH (ca:CookingMethod {itemID: row.cooking_method_id})
     MATCH (car:Carcinogen {itemID: row.carcinogen_id})
     MERGE (ing)-[:hasUndergone]->(ca)
     MERGE (ca)-[:mayProduced]->(car)",
    {batchSize:10000, iterateList:true, parallel:true}
)"""

In [6]:
result = conn.query(load_relationships1)
print("Relationships mayProduce loaded. Time taken:" + str(result[0][2]) + ' seconds. Committed Operations: ' + str(result[0][3]) + '. Failed Operations:' + str(result[0][4]))

result = conn.query(load_relationships1)
print("Relationships mayProduced loaded. Time taken:" + str(result[0][2]) + ' seconds. Committed Operations: ' + str(result[0][3]) + '. Failed Operations:' + str(result[0][4]))


Relationships mayProduce loaded. Time taken:1 seconds. Committed Operations: 35720. Failed Operations:0
Relationships mayProduced loaded. Time taken:0 seconds. Committed Operations: 35720. Failed Operations:0


In [None]:
# ----------------- HCA Relationships ---------------------
load_relationships1 = """CALL apoc.periodic.iterate(
    "CALL apoc.load.csv('/CarcinogenCausal/rel_hca_mayProduce.csv') YIELD map AS row",
    "MATCH (ing:Ingredient {itemID: row.ing_id})
     MATCH (ca:CookingMethod {itemID: row.cooking_method_id})
     MATCH (car:Carcinogen {itemID: row.carcinogen_id})
     MERGE (ing)-[:ifUndergoes]->(ca)
     MERGE (ca)-[:mayProduce]->(car)",
    {batchSize:10000, iterateList:true, parallel:true}
)"""

load_relationships2 = """CALL apoc.periodic.iterate(
    "CALL apoc.load.csv('/CarcinogenCausal/rel_hca_mayProduced.csv') YIELD map AS row",
    "MATCH (ing:Ingredient {itemID: row.ing_id})
     MATCH (ca:CookingMethod {itemID: row.cooking_method_id})
     MATCH (car:Carcinogen {itemID: row.carcinogen_id})
     MERGE (ing)-[:hasUndergone]->(ca)
     MERGE (ca)-[:mayProduced]->(car)",
    {batchSize:10000, iterateList:true, parallel:true}
)"""

In [9]:
result = conn.query(load_relationships1)
print("Relationships mayProduce loaded. Time taken:" + str(result[0][2]) + ' seconds. Committed Operations: ' + str(result[0][3]) + '. Failed Operations:' + str(result[0][4]))

result = conn.query(load_relationships1)
print("Relationships mayProduced loaded. Time taken:" + str(result[0][2]) + ' seconds. Committed Operations: ' + str(result[0][3]) + '. Failed Operations:' + str(result[0][4]))


Relationships mayProduce loaded. Time taken:0 seconds. Committed Operations: 46436. Failed Operations:0
Relationships mayProduced loaded. Time taken:0 seconds. Committed Operations: 46436. Failed Operations:0
