In [1]:
from neo4j import GraphDatabase
import pandas as pd

import time

In [14]:
df_nodes = pd.read_csv("reuters_nodes - Sheet1.csv")
df_nodes['node_NER'] = df_nodes['node_NER'].apply(lambda node_NER: [node_NER])

In [3]:
df_nodes

Unnamed: 0,node_id,node_name,node_NER
0,1,Japan,[LOCATION]
1,2,Sumita,[PERSON]
2,3,Paris,[LOCATION]
3,4,USA,[LOCATION]
4,5,Tokyo,[LOCATION]


In [4]:
df_rel = pd.read_csv("reuters_rel - Sheet1.csv")
df_rel

Unnamed: 0,Source,Target,Relationship
0,2,1,WAS_BORN
1,2,5,WAS_BORN
2,2,3,LIVED_IN
3,2,4,STUDIED_IN
4,5,1,IS_CAPITAL


In [5]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, parameters=None, db=None):
        
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [6]:
conn = Neo4jConnection(uri="bolt://localhost:7687", user="neo4j", pwd="NER123")

In [7]:
conn.query('CREATE CONSTRAINT UniqueEntityId ON (e:Entity) ASSERT e.id IS UNIQUE')

[]

In [8]:
def add_nodes(rows):

    query = ''' UNWIND $rows AS row
                MERGE (:Node {name: row.node_name, id: row.node_id, type: row.node_NER})
                RETURN count(*) as total
    '''
    return insert_data(query, rows)


def add_edges(rows):
    
    
    query = """ UNWIND $rows AS row
                MATCH (src:Node {id: row.Source}), (tar:Node {id: row.Target})
                CREATE (src)-[rel:%s]->(tar)
    """ % relationship
    
    return insert_data(query, rows)

def insert_data(query, rows):

    total = 0
    start = time.time()
    result = None

    res = conn.query(query, parameters={'rows': rows.to_dict('records')})
    try:
        total = res[0]['total']
    except:
        total = 0
    result = {"total":total, "time":time.time()-start}
    print(result)

    return result

In [9]:
add_nodes(df_nodes)

{'total': 5, 'time': 0.13599920272827148}


{'total': 5, 'time': 0.13599920272827148}

In [10]:
for relationship in df_rel['Relationship'].unique():
    print(relationship)
    y = df_rel[df_rel['Relationship'] == relationship]
    #print(y.shape)
    add_edges(y)

WAS_BORN
{'total': 0, 'time': 0.09499979019165039}
LIVED_IN
{'total': 0, 'time': 0.050998687744140625}
STUDIED_IN
{'total': 0, 'time': 0.04900169372558594}
IS_CAPITAL
{'total': 0, 'time': 0.05299997329711914}


In [12]:
# Update node labels based on node_NER list - run the the neo4j  terminal
'''
MATCH (n:Node) 
CALL apoc.create.addLabels(n, n.type) 
YIELD node 
RETURN node
'''

'\nMATCH (n:Node) \nCALL apoc.create.addLabels(n, n.type) \nYIELD node \nRETURN node\n'