In [None]:
import xml.etree.ElementTree as ET

# Parse the XML file
tree = ET.parse('docs/76437_Markets_dragged_down_by_credit_crisis.xml')

# Get the root element
root = tree.getroot()

# Print the tag and attributes of the root element
print(root.tag)
print(root.attrib)

# Iterate over the child elements of the root element
for child in root:
    print(child.tag, child.attrib, child.text)


In [1]:
from neo4j import GraphDatabase


In [2]:
uri = "neo4j+s://14c06ac2.databases.neo4j.io"
pwd = "ySqXDlOHgSJpamhJtvyFNXwgQxSzNhyz1CxdD2MeXR4"

driver = GraphDatabase.driver(uri, auth=("neo4j", pwd))

In [3]:
query = "MATCH (n) RETURN COUNT(n)"

In [4]:
with driver.session() as session:
    result = session.run(query)
    print(result.single()["COUNT(n)"])

0


In [5]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, parameters=None, db=None):
        
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [6]:
uri = "neo4j+s://14c06ac2.databases.neo4j.io"
pwd = "ySqXDlOHgSJpamhJtvyFNXwgQxSzNhyz1CxdD2MeXR4"
user = 'neo4j'
conn = Neo4jConnection(uri, user, pwd)

In [7]:
result = conn.query('MATCH (n) RETURN COUNT(n) AS ct')
print(result)

[<Record ct=0>]


In [10]:
conn.query('CREATE CONSTRAINT documents IF NOT EXISTS FOR (u:Document) REQUIRE u.id IS UNIQUE')
conn.query('CREATE CONSTRAINT tokens IF NOT EXISTS FOR (t:Token) REQUIRE t.tid IS UNIQUE')
conn.query('CREATE CONSTRAINT entityMentions IF NOT EXISTS FOR (t:EntityMention) REQUIRE t.mid IS UNIQUE')
conn.query('CREATE CONSTRAINT eventMentions IF NOT EXISTS FOR (t:EventMention) REQUIRE t.mid IS UNIQUE')
conn.query('CREATE CONSTRAINT events IF NOT EXISTS FOR (t:Event) REQUIRE t.mid IS UNIQUE')
conn.query('CREATE CONSTRAINT entities IF NOT EXISTS FOR (t:Entity) REQUIRE t.mid IS UNIQUE')
conn.query('CREATE CONSTRAINT values IF NOT EXISTS FOR (t:Value) REQUIRE t.mid IS UNIQUE')
conn.query('CREATE CONSTRAINT timexes IF NOT EXISTS FOR (t:Timex) REQUIRE t.mid IS UNIQUE')
conn.query('CREATE CONSTRAINT participants IF NOT EXISTS FOR (t:Participant) REQUIRE t.id IS UNIQUE')

[]

In [11]:
result = conn.query('MATCH (n) RETURN n')
print(result)

[]


In [12]:
for record in result:
    print(record['doc_name'])

In [13]:
def replace_none_with(d, replacement=0):
    retval = {}
    for key, val in d.items():
        if val is None or val is '':
            retval[key] = replacement
        elif isinstance(val, dict):
            retval[key] = replace_none_with(val, replacement)
        else:
            retval[key] = val
    return retval

  if val is None or val is '':


In [15]:
from curses import pair_content
from webbrowser import get
import xml.etree.ElementTree as ET

# Parse the XML file
#tree = ET.parse('docs/76437_Markets_dragged_down_by_credit_crisis.xml')
tree = ET.parse('docs/180695_US_stocks_see_9%_drop_before_making_recovery.xml')

# Get the root element
root = tree.getroot()

# Print the tag and attributes of the root element
print(root.tag)
print(root.attrib)
print(root.attrib['doc_id'])

params = { "doc_id": root.attrib['doc_id'],
"doc_name": root.attrib['doc_name'],
"lang":  root.attrib['lang'],
"url": root.attrib['url']}

# store document node
query = """ 
        merge (:Document {id: $doc_id, doc_name: $doc_name
        , lang: $lang, url: $url})
        return count(*) as total
        """
print (conn.query(query, params))

queryForToken = """ 
                merge (:Token {tid: $id, number: $number, 
                sentence: $sentence, value: $value})
                return count(*) as total
                """

queryForToken2 = """ 
                unwind $tokens as token
                merge (:Token {tid: token.tid, number: token.number, 
                sentence: token.sentence, value: token.value})
                return count(*) as total
                """



queryForEntityMention = """ 
                unwind $entityMentions as entityMention
                merge (e:EntityMention {mid: entityMention.id, head: entityMention.head,
                syntacticType: entityMention.syntacticType})
                with entityMention.anchors as anchors, entityMention.id as id, e
                unwind anchors as anchor
                match(t:Token) where t.tid=anchor
                merge (t)-[:PARTICIPATES_IN]->(e)   
                """



queryForEventMention = """ 
                unwind $eventMentions as eventMention
                merge (e:EventMention {mid: eventMention.id, aspect: eventMention.aspect,
                certainty: eventMention.certainty, comment: eventMention.comment, 
                modality: eventMention.modality, polarity: eventMention.polarity,
                pos: eventMention.pos, pred: eventMention.pred, specialCases: eventMention.specialCases,
                tense: eventMention.tense, time: eventMention.time})
                with eventMention.anchors as anchors, eventMention.id as id, e
                unwind anchors as anchor
                match(t:Token) where t.tid=anchor
                merge (t)-[:PARTICIPATES_IN]->(e)   
                """
# Iterate over the child elements of the root element


queryForSignal = """ 
                unwind $signals as signal
                merge (e:Signal {mid: signal.id, comment: signal.comment,
                })
                with signal.anchors as anchors, signal.id as id, e
                unwind anchors as anchor 
                match(t:Token) where t.tid=anchor
                merge(t)-[:PARTICIPATES_IN]->(e)

                """


queryForTimex = """ 
                unwind $timexes as timex
                merge (e:Timex {mid: timex.id, comment: timex.comment, 
                anchorTimeID: timex.anchorTimeID, beginPoint: timex.beginPoint, 
                endPoint: timex.endPoint, functionInDocument: timex.functionInDocument,
                type: timex.type, value: timex.value
                })
                with timex.anchors as anchors, timex.id as id, e
                unwind anchors as anchor 
                match(t:Token) where t.tid=anchor
                merge(t)-[:PARTICIPATES_IN]->(e)

                """

queryForValue = """ 
                unwind $values as value
                merge (e:Value {mid: value.id, comment: value.comment, 
                type: value.type
                })
                with value.anchors as anchors, value.id as id, e
                unwind anchors as anchor 
                match(t:Token) where t.tid=anchor
                merge(t)-[:PARTICIPATES_IN]->(e)

                """

queryForEvent = """ 
                unwind $events as event
                merge (e:Event {mid: event.id, comment: event.comment, 
                tagDescriptor: event.tagDescriptor, class: event.class, 
                externalRef: event.externalRef, instanceId: event.instanceId
                })
                """


queryForEntity = """ 
                unwind $entities as entity
                merge (e:Entity {mid: entity.id, comment: entity.comment, 
                tagDescriptor: entity.tagDescriptor, entityType: entity.entityType, 
                externalRef: entity.externalRef, instanceId: entity.instanceId
                })
                """


queryForTlinks = """ 
                unwind $relations as relation
                match (s where s.mid = relation.source)
                match (t where t.mid = relation.target)
                merge (s)-[:TLINK {comment: relation.comment, 
                rid: relation.id, relType: relation.relType, 
                signalID: relation.signalID}]->(t)
                """

queryForClinks = """ 
                unwind $relations as relation
                match (s where s.mid = relation.source)
                match (t where t.mid = relation.target)
                merge (s)-[:CLINK { 
                rid: relation.id}]->(t)
                """

queryForGlinks = """ 
                unwind $relations as relation
                match (s where s.mid = relation.source)
                match (t where t.mid = relation.target)
                merge (s)-[:GLINK { 
                rid: relation.id}]->(t)
                """



queryForHasParticipants = """ 
                unwind $hasParticipantRelations as relation
                match (s where s.mid = relation.source)
                match (t where t.mid = relation.target)
                merge (s)-[:HAS_PARTICIPANT {comment: relation.comment, 
                rid: relation.id, semRole: relation.semRole,
                semRoleFramework: relation.semRoleFramework 
                }]->(t)
                """


queryForRefersTo = """ 
                unwind $refersToRelations as relation
                unwind relation.source as source
                unwind relation.target as target
                match (s where s.mid = source)
                match (t where t.mid = target)
                merge (s)-[:REFERS_TO { 
                rid: relation.id}]->(t)
                """

tokens = []
entities = []
events = []
signals = []
timexes = []
values = []
entityMentions = []
eventMentions = []
tlinks = []
clinks = []
glinks = []
hasParticipantRelations = []
refersToRelations = []

for child in root:
    print(child.tag, child.attrib)

    if child.tag == 'token':
        
        t_id = child.attrib["t_id"]
        number = child.attrib["number"]
        sentence = child.attrib["sentence"]
        value = child.text

        token = { "tid": t_id, "number": number, "sentence": sentence,"value": value }

        #print(conn.query(queryForToken, token))
        tokens.append(token)

    

    if child.tag == 'Relations':
        for relation in child:
                if relation.tag == 'TLINK':
                        comment = relation.attrib.get("comment")
                        id = relation.attrib.get("r_id")
                        relType = relation.attrib.get("reltype")
                        signalID = relation.attrib.get("signalID")
                        source = ""
                        target = ""

                        for participant in relation:
                                if participant.tag == 'source':
                                        source = participant.attrib.get("m_id")
                                if participant.tag == 'target':
                                        target = participant.attrib.get("m_id")

                        relation = {"comment": comment, "id": id, "relType": relType, 
                        "signalID": signalID, "source": source, "target": target}
                        relation = replace_none_with(relation, "None")
                        tlinks.append(relation)

                elif relation.tag == 'CLINK':
                        id = relation.attrib.get("r_id")
                        source = ""
                        target = ""

                        for participant in relation:
                                if participant.tag == 'source':
                                        source = participant.attrib.get("m_id")
                                if participant.tag == 'target':
                                        target = participant.attrib.get("m_id")

                        relation = {"id": id, "source": source, "target": target}
                        relation = replace_none_with(relation, "None")
                        clinks.append(relation)

                
                elif relation.tag == 'GLINK':
                        id = relation.attrib.get("r_id")
                        source = ""
                        target = ""

                        for participant in relation:
                                if participant.tag == 'source':
                                        source = participant.attrib.get("m_id")
                                if participant.tag == 'target':
                                        target = participant.attrib.get("m_id")

                        relation = {"id": id, "source": source, "target": target}
                        relation = replace_none_with(relation, "None")
                        glinks.append(relation)

                elif relation.tag == 'HAS_PARTICIPANT':
                        comment = relation.attrib.get("comment")
                        id = relation.attrib.get("r_id")
                        semRole = relation.attrib.get("sem_role")
                        semRoleFramework = relation.attrib.get("sem_role_framework")
                        source = ""
                        target = ""

                        for participant in relation:
                                if participant.tag == 'source':
                                        source = participant.attrib.get("m_id")
                                if participant.tag == 'target':
                                        target = participant.attrib.get("m_id")

                        hasParticipantRelation = {"comment": comment, "id": id, "semRole": semRole, 
                        "semRoleFramework": semRoleFramework, "source": source, "target": target}
                        hasParticipantRelation = replace_none_with(hasParticipantRelation, "None")
                        hasParticipantRelations.append(hasParticipantRelation)


                elif relation.tag == 'REFERS_TO':
                        id = relation.attrib.get("r_id")

                        source = []
                        target = []

                        for participant in relation:
                                if participant.tag == 'source':
                                        source.append(participant.attrib.get("m_id"))
                                if participant.tag == 'target':
                                        target.append(participant.attrib.get("m_id"))

                        refersToRelation = { "id": id, "source": source, "target": target}
                        refersToRelation = replace_none_with(refersToRelation, "None")
                        refersToRelations.append(refersToRelation)


    if child.tag == 'Markables':
        for markable in child:
                if markable.tag == 'ENTITY_MENTION':
                        #comment = markable.attrib["comment"]
                        head = markable.attrib.get("head")
                        id = markable.attrib.get("m_id")
                        syntacticType = markable.attrib.get("syntactic_type")
                        tas = []
                        for ta in markable:
                                tas.append(ta.attrib["t_id"])
                        entityMention = { "head": head, "id": id, "syntacticType": syntacticType, "anchors": tas}
                        entityMention = replace_none_with(entityMention, "None")
                        entityMentions.append(entityMention)


                elif markable.tag == 'EVENT_MENTION':
                        aspect = markable.attrib.get("aspect")
                        certainty = markable.attrib.get("certainty")
                        comment = markable.attrib.get("comment")
                        id = markable.attrib.get("m_id")
                        modality = markable.attrib.get("modality")
                        polarity = markable.attrib.get("polarity")
                        pos = markable.attrib.get("pos")
                        pred = markable.attrib.get("pred")
                        specialCases = markable.attrib.get("special_cases")
                        tense = markable.attrib.get("tense")
                        time = markable.attrib.get("time")
                        tas = []
                        for ta in markable:
                                tas.append(ta.attrib["t_id"])
                        eventMention = {"comment": comment, "aspect": aspect, "certainty": certainty,  "id": id,
                         "modality": modality, "polarity": polarity, "pos": pos, "pred": pred, "specialCases":specialCases,
                         "tense": tense, "time": time, "anchors": tas}
                        eventMention = replace_none_with(eventMention, "None")
                        eventMentions.append(eventMention)

                elif markable.tag == 'SIGNAL':
                        comment = markable.attrib.get("comment")
                        id = markable.attrib.get("m_id")
                        tas = []
                        for ta in markable:
                                tas.append(ta.attrib["t_id"])
                        signal = {"comment": comment, "id": id, "anchors":tas}
                        signal = replace_none_with(signal, "None")
                        signals.append(signal)

                elif markable.tag == 'TIMEX3':
                        comment = markable.attrib.get("comment")
                        anchorTimeID = markable.attrib.get("anchorTimeID")
                        beginPoint = markable.attrib.get("beginPoint")
                        endPoint = markable.attrib.get("endPoint")
                        functionInDocument = markable.attrib.get("functionInDocument")
                        id = markable.attrib.get("m_id")
                        type = markable.attrib.get("type")
                        value = markable.attrib.get("value")
                        tas = []
                        for ta in markable:
                                tas.append(ta.attrib["t_id"])
                        timex = {"comment": comment, "id": id, "anchorTimeID": anchorTimeID, "beginPoint": beginPoint,
                         "anchors":tas, "endPoint": endPoint, "functionInDocument": functionInDocument, "type": type,
                         "value": value}
                        timex = replace_none_with(timex, "None")
                        timexes.append(timex)

                elif markable.tag == 'VALUE':
                        comment = markable.attrib.get("comment")
                        id = markable.attrib.get("m_id")
                        type = markable.attrib.get("type")
                        tas = []
                        for ta in markable:
                                tas.append(ta.attrib["t_id"])
                        value = {"comment": comment, "id": id, "type": type, "anchors":tas}
                        value = replace_none_with(value, "None")
                        values.append(value)

                
                elif markable.tag == 'EVENT':
                        comment = markable.attrib.get("comment")
                        id = markable.attrib.get("m_id")
                        tagDescriptor = markable.attrib.get("TAG_DESCRIPTOR")
                        # here attribute name is class , but its reserved word in python
                        # so we are calling it here 'type'
                        type = markable.attrib.get("class")
                        externalRef = markable.attrib.get("external_ref")
                        instanceId = markable.attrib.get("instance_id")
                        tas = []
                        event = {"comment": comment, "id": id, "class": type, "tagDescriptor": tagDescriptor,
                        "externalRef": externalRef, "instanceId": instanceId}
                        event = replace_none_with(event, "None")
                        events.append(event)


                elif markable.tag == 'ENTITY':
                        comment = markable.attrib.get("comment")
                        id = markable.attrib.get("m_id")
                        tagDescriptor = markable.attrib.get("TAG_DESCRIPTOR")
                        entityType = markable.attrib.get("ent_type")
                        externalRef = markable.attrib.get("external_ref")
                        instanceId = markable.attrib.get("instance_id")
                        entity = {"comment": comment, "id": id, "entityType": entityType, "tagDescriptor": tagDescriptor,
                        "externalRef": externalRef, "instanceId": instanceId}
                        entity = replace_none_with(entity, "None")
                        entities.append(entity)




                


        
print(conn.query(queryForToken2,parameters={'tokens': tokens}))
print(conn.query(queryForEntityMention,parameters={'entityMentions': entityMentions}))
print(conn.query(queryForEventMention,parameters={'eventMentions': eventMentions}))
print(conn.query(queryForEntity,parameters={'entities': entities}))
print(conn.query(queryForEvent,parameters={'events': events}))
print(conn.query(queryForTimex,parameters={'timexes': timexes}))
print(conn.query(queryForValue,parameters={'values': values}))
print(conn.query(queryForSignal,parameters={'signals': signals}))
print(conn.query(queryForTlinks,parameters={'relations': tlinks}))
print(conn.query(queryForGlinks,parameters={'relations': glinks}))
print(conn.query(queryForClinks,parameters={'relations': clinks}))
print(conn.query(queryForHasParticipants,parameters={'hasParticipantRelations': hasParticipantRelations}))
print(conn.query(queryForRefersTo,parameters={'refersToRelations': refersToRelations}))
# print(tokens)
# print(events)
# print(entities)





FileNotFoundError: [Errno 2] No such file or directory: 'docs/76437_Markets_dragged_down_by_credit_crisis.xml'