In [2]:
from neo4j import GraphDatabase
import pandas as pd
import time

In [9]:
# Create a dataframe with nodes from entities.csv df_nodes:
df_nodes = pd.read_csv("entities.csv")
# Change label column into a list:
df_nodes['label'] = df_nodes['label'].apply(lambda label: [label])

In [10]:
df_nodes.head()

Unnamed: 0.1,Unnamed: 0,id,name,label,mentions
0,0,1,iran-iraq war,[EVENT],5
1,1,2,the gulf war,[EVENT],8
2,2,3,gulf war,[EVENT],2
3,3,4,yom kippur war,[EVENT],1
4,4,5,september m3,[EVENT],2


In [None]:
# Create a dataframe with relationships df_rel:
df_rel = pd.read_csv("")
df_rel

In [11]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, parameters=None, db=None):
        
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [12]:
conn = Neo4jConnection(uri="bolt://localhost:7687", user="neo4j", pwd="NER123")

In [13]:
conn.query('CREATE CONSTRAINT UniqueEntityId ON (e:Entity) ASSERT e.id IS UNIQUE')

[]

In [14]:
def add_nodes(rows):

    query = ''' UNWIND $rows AS row
                MERGE (:Node {name: row.name, id: row.id, type: row.label})
                RETURN count(*) as total
    '''
    return insert_data(query, rows)


def add_edges(rows):
    
    
    query = """ UNWIND $rows AS row
                MATCH (src:Node {id: row.Source}), (tar:Node {id: row.Target})
                CREATE (src)-[rel:%s]->(tar)
    """ % relationship
    
    return insert_data(query, rows)

def insert_data(query, rows):

    total = 0
    start = time.time()
    result = None

    res = conn.query(query, parameters={'rows': rows.to_dict('records')})
    try:
        total = res[0]['total']
    except:
        total = 0
    result = {"total":total, "time":time.time()-start}
    print(result)

    return result

In [15]:
add_nodes(df_nodes)

{'total': 24305, 'time': 74.41509103775024}


{'total': 24305, 'time': 74.41509103775024}

In [None]:
for relationship in df_rel['Relationship'].unique():
    print(relationship)
    y = df_rel[df_rel['Relationship'] == relationship]
    #print(y.shape)
    add_edges(y)

In [16]:
# Update node labels based on node_NER list - run the the neo4j  terminal
'''
MATCH (n:Node) 
CALL apoc.create.addLabels(n, n.type) 
YIELD node 
RETURN node
'''

'\nMATCH (n:Node) \nCALL apoc.create.addLabels(n, n.type) \nYIELD node \nRETURN node\n'