In [101]:
import os
import sys
sys.path.append(os.path.pardir)
from utils import utils
import time
import random
import neo4j
from neo4j import GraphDatabase
import babelnet as bn
from babelnet import BabelSynsetID, Language
from babelnet.data.relation import BabelPointer
from zerorpc import TimeoutExpired, LostRemote

In [18]:
URI = "bolt://localhost:7687"
AUTH = ("giovanni", "BabeldistGraph")

In [19]:
def run_no_exception(session: neo4j.Session, query: str):
    try:
        session.run(query)
    except Exception as e:
        pass

In [127]:
merge_graph_query = """
MERGE (s:Synset {synsetID: $synsetID_1})
MERGE (hyponym:Synset {synsetID: $synsetID_2})
WITH s, hyponym
WHERE s.synsetID <> hyponym.synsetID
MERGE (s)<-[:IS_A]-(hyponym) """

count_nodes_query = """
MATCH (s:Synset)
RETURN count(s) """

count_edges_query = """
MATCH ()-[r:IS_A]->()
RETURN count(r) """

In [93]:
# EXPORTING BABELNET TO NEO4J - ONLY SYNSET IDs, NO LEMMA OR OTHER PROPERTIES

fname = utils.get_current_logfile_number('exporting_neo4j', extension='.log')

visited = set()
max_visits, n = 500, 0
start_synset_id = 'bn:00062164n'
q = [BabelSynsetID(start_synset_id)]

# creating driver isn't lightweight, but for this case is ok...
with GraphDatabase.driver(URI, auth=AUTH) as driver:
    with driver.session() as session:
        run_no_exception(session, 'CREATE CONSTRAINT FOR (s:Synset) REQUIRE s.synsetID IS UNIQUE')
        tx = session.begin_transaction()
        
        start_n, start_r = tx.run(count_nodes_query).values()[0][0], tx.run(count_edges_query).values()[0][0]
        
        with open(fname, 'w') as logfname:
            start_t = time.time()
            while q and n < max_visits:
                pb = utils.get_progress_bar(int((n / max_visits) * 100))
                print(pb, end='\r')
                n += 1

                try:
                    synset = q.pop(0).to_synset()
                    hyponym_edges = synset.outgoing_edges(BabelPointer.ANY_HYPONYM)        
                except (TimeoutExpired, LostRemote) as e:
                    e.with_traceback()
                    hyponym_edges = []

                for edge in hyponym_edges:
                    try:
                        result = tx.run(merge_graph_query, {
                            'synsetID_1': str(synset.id), 
                            'synsetID_2': str(edge.id_target) })
                    except Exception as e:
                        e.with_traceback()

                    if edge.id_target not in visited and edge.id_target not in q:                        
                        q.append(edge.id_target)
                        visited.add(edge.id_target)

                if n % 1000 == 0:
                    tx.commit()
                    tx = session.begin_transaction()

            end_n, end_r = tx.run(count_nodes_query).values()[0][0], tx.run(count_edges_query).values()[0][0]
            tx.commit()
            print(f'Added {end_n - start_n} nodes, added {end_r - start_r} edges.')
            
            logfname.write(f'start_node={start_synset_id}\n')
            logfname.write(f'max_visits={max_visits}\n')
            if q == []: logfname.write('Queue empy\n')
            if n == max_visits: logfname.write('Reached max visits\n')
            logfname.write(f'Added {end_n - start_n} nodes, added {end_r - start_r} edges.')
            end_t = time.time()
            min, sec = divmod(end_t - start_t, 60)
            logfname.write(f'total_time,{int(min)}m,{int(sec)}s') 

0 

Added 5092 nodes, added 5251 edges.
