In [1]:
from neo4j import GraphDatabase
import pandas as pd
import time

In [2]:
# Create a dataframe with nodes from entities.csv df_nodes:
df_nodes = pd.read_csv("../database/entities.csv")
# Change label column into a list:
df_nodes['label'] = df_nodes['label'].apply(lambda label: [label])

In [3]:
df_nodes.head()

Unnamed: 0.1,Unnamed: 0,id,name,label,mentions
0,0,1,iran-iraq war,[EVENT],5
1,1,2,the gulf war,[EVENT],8
2,2,3,gulf war,[EVENT],2
3,3,4,yom kippur war,[EVENT],1
4,4,5,september m3,[EVENT],2


In [4]:
# Create a dataframe with relationships df_rel:
df_rel = pd.read_csv("../database/relationships.csv")
df_rel

Unnamed: 0.1,Unnamed: 0,subj,subj_label,verb,obj,obj_label,subj_id,obj_id
0,1,Indonesia,GPE,make,Malaysia,GPE,98,90
1,5,Komatsu,PERSON,tell,Reuters,ORG,10142,10130
2,9,Bond,ORG,say,Cebu,GPE,11062,109
3,11,Corp,ORG,consider,Philippines,GPE,14236,108
4,12,US,GPE,demand,Japan,GPE,271,85
...,...,...,...,...,...,...,...,...
1603,4479,MULFORD,ORG,disappointed,MULFORD,ORG,14018,14018
1604,4480,Germany,GPE,stress,US,GPE,244,271
1605,4481,Inc,ORG,share,USAir,ORG,11764,11118
1606,4482,Piedmont,GPE,constitute,USAir,ORG,1121,11118


In [5]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, parameters=None, db=None):
        
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [6]:
conn = Neo4jConnection(uri="bolt://localhost:7687", user="neo4j", pwd="NER123")

In [7]:
conn.query('CREATE CONSTRAINT UniqueEntityId ON (n:Node) ASSERT n.id IS UNIQUE')

Query failed: {code: Neo.ClientError.Schema.ConstraintWithNameAlreadyExists} {message: There already exists a constraint called 'UniqueEntityId'.}


In [8]:
def add_nodes(rows):

    query = ''' UNWIND $rows AS row
                MERGE (:Node {name: row.name, id: row.id, type: row.label})
                RETURN count(*) as total
    '''
    return insert_data(query, rows)


def add_edges(rows):
    
    
    query = """ UNWIND $rows AS row
                MATCH (src:Node {id: row.subj_id}), (tar:Node {id: row.obj_id})
                CREATE (src)-[rel:%s]->(tar)
    """ % relationship
    
    return insert_data(query, rows)

def insert_data(query, rows):

    total = 0
    start = time.time()
    result = None

    res = conn.query(query, parameters={'rows': rows.to_dict('records')})
    try:
        total = res[0]['total']
    except:
        total = 0
    result = {"total":total, "time":time.time()-start}
    print(result)

    return result

In [9]:
add_nodes(df_nodes)

{'total': 24305, 'time': 80.69682288169861}


{'total': 24305, 'time': 80.69682288169861}

In [10]:
for relationship in df_rel['relationship'].unique():
    print(relationship)
    y = df_rel[df_rel['relationship'] == relationship]
    #print(y.shape)
    add_edges(y)

make
{'total': 0, 'time': 0.267000675201416}
tell
{'total': 0, 'time': 0.6590006351470947}
say
{'total': 0, 'time': 2.38309383392334}
consider
{'total': 0, 'time': 0.05600142478942871}
demand
{'total': 0, 'time': 0.025998830795288086}
remain
{'total': 0, 'time': 0.0749976634979248}
drop
{'total': 0, 'time': 0.048002004623413086}
ask
{'total': 0, 'time': 0.26999640464782715}
revalue
{'total': 0, 'time': 0.03399801254272461}
want
{'total': 0, 'time': 0.03599858283996582}
cut
{'total': 0, 'time': 0.12900090217590332}
convince
{'total': 0, 'time': 0.06599950790405273}
persuade
{'total': 0, 'time': 0.07600855827331543}
need
{'total': 0, 'time': 0.037000417709350586}
plan
{'total': 0, 'time': 0.03600001335144043}
leave
{'total': 0, 'time': 0.09399962425231934}
lay
{'total': 0, 'time': 0.029000282287597656}
intervene
{'total': 0, 'time': 0.13501739501953125}
opening
{'total': 0, 'time': 0.036983489990234375}
meetingan
{'total': 0, 'time': 0.02599954605102539}
dispose
{'total': 0, 'time': 0.02

{'total': 0, 'time': 0.07100319862365723}
arrive
{'total': 0, 'time': 0.04501032829284668}
ltTWA
{'total': 0, 'time': 0.04499959945678711}
broadcast
{'total': 0, 'time': 0.05199885368347168}
pursue
{'total': 0, 'time': 0.07500100135803223}
consume
{'total': 0, 'time': 0.05900979042053223}
oppose
{'total': 0, 'time': 0.08256649971008301}
eliminate
{'total': 0, 'time': 0.04399991035461426}
sayif
{'total': 0, 'time': 0.04601573944091797}
win
{'total': 0, 'time': 0.08000302314758301}
agree
{'total': 0, 'time': 0.12999439239501953}
pass
{'total': 0, 'time': 0.0690007209777832}
target
{'total': 0, 'time': 0.04600095748901367}
s
{'total': 0, 'time': 0.10799932479858398}
fly
{'total': 0, 'time': 0.07399940490722656}
do
{'total': 0, 'time': 0.08701395988464355}
shell
{'total': 0, 'time': 0.04701995849609375}
resolve
{'total': 0, 'time': 0.09600138664245605}
require
{'total': 0, 'time': 0.15199756622314453}
comply
{'total': 0, 'time': 0.07400083541870117}
inform
{'total': 0, 'time': 0.0669996738

{'total': 0, 'time': 0.04803061485290527}
REPEAL
{'total': 0, 'time': 0.0420229434967041}
get
{'total': 0, 'time': 0.08099842071533203}
ltHBJ
{'total': 0, 'time': 0.05196380615234375}
head
{'total': 0, 'time': 0.0429987907409668}
put
{'total': 0, 'time': 0.04413032531738281}
cooperate
{'total': 0, 'time': 0.04300093650817871}
attack
{'total': 0, 'time': 0.044065237045288086}
ltBCFsue
{'total': 0, 'time': 0.04602980613708496}
resume
{'total': 0, 'time': 0.04898476600646973}
sayIraq
{'total': 0, 'time': 0.04701709747314453}
place
{'total': 0, 'time': 0.04498910903930664}
ltAG
{'total': 0, 'time': 0.045995473861694336}
pump
{'total': 0, 'time': 0.045003414154052734}
earning
{'total': 0, 'time': 0.043031930923461914}
instal
{'total': 0, 'time': 0.04796266555786133}
respond
{'total': 0, 'time': 0.08700084686279297}
choke
{'total': 0, 'time': 0.05601906776428223}
approach
{'total': 0, 'time': 0.044985294342041016}
seem
{'total': 0, 'time': 0.0480036735534668}
complete
{'total': 0, 'time': 0.

In [11]:
# Update node labels based on node_NER list - run the the neo4j  terminal
'''
MATCH (n:Node) 
CALL apoc.create.addLabels(n, n.type) 
YIELD node 
RETURN node
'''

'\nMATCH (n:Node) \nCALL apoc.create.addLabels(n, n.type) \nYIELD node \nRETURN node\n'