In [39]:
import os
import sys
sys.path.append(os.path.pardir)
from utils import utils
import time
import pprint
import random
import neo4j
from neo4j import GraphDatabase
import babelnet as bn
from babelnet import BabelSynsetID, Language
from babelnet.data.relation import BabelPointer
from zerorpc import TimeoutExpired, LostRemote

In [2]:
URI = "bolt://localhost:7687"
AUTH = ("giovanni", "BabeldistGraph")

In [34]:
random_node_query = """
MATCH (a:Synset)
RETURN a.synsetID, rand() as r 
ORDER BY r LIMIT 1"""

count_nodes_query = """
MATCH (s:Synset)
RETURN count(s) """

count_edges_query = """
MATCH ()-[r:IS_A]->()
RETURN count(r) """

shortestPath_query = """
MATCH (s1:Synset {synsetID: $synsetID_1})
MATCH (s2:Synset {synsetID: $synsetID_2})
MATCH p = shortestPath((s1)-[:IS_A*..12]-(s2))
RETURN p """

first_common_node_query = """
MATCH (s1:Synset {synsetID: $synsetID_1})
MATCH (s2:Synset {synsetID: $synsetID_2})
MATCH p = (s1)-[:IS_A*..5]->(common_node:Synset)<-[:IS_A*..5]-(s2) 
RETURN p, common_node """

In [55]:
root_node_id_str = 'bn:00062164n' # physical entity

In [4]:
driver = GraphDatabase.driver(URI, auth=AUTH)

In [46]:
id1_str = driver.execute_query(random_node_query, result_transformer_=neo4j.Result.data)
id2_str = driver.execute_query(random_node_query, result_transformer_=neo4j.Result.data)
id1_str, id2_str = id1_str[0]['a.synsetID'], id2_str[0]['a.synsetID']
id1_str, id2_str

('bn:18162455n', 'bn:14976259n')

In [48]:
id1, id2 = BabelSynsetID(id1_str), BabelSynsetID(id2_str)
s1, s2 = id1.to_synset(), id2.to_synset()
s1, s2

(236848116__WIKI:NL:Chiweta-formatie, 462268501__WIKIDATA:EN:Cupido_Formation)

In [70]:
# SHORTEST PATH
result = driver.execute_query(
    shortestPath_query, 
    {'synsetID_1': id1_str,
     'synsetID_2': id2_str},
    database_='neo4j',
    result_transformer_=neo4j.Result.data)
result

[{'p': [{'synsetID': 'bn:18162455n'},
   'IS_A',
   {'synsetID': 'bn:00035942n'},
   'IS_A',
   {'synsetID': 'bn:14976259n'}]}]

In [71]:
result = result[0]['p']

In [72]:
for item in result:
    if type(item) is dict and 'synsetID' in item.keys():
        try:
            print(BabelSynsetID(item['synsetID']).to_synset().main_sense().full_lemma)
        except Exception as e:
            print(e.args[0])

Chiweta-formatie
geological_formation
Cupido_Formation


In [54]:
# LEAST COMMON SUBSUMER
result = driver.execute_query(
    first_common_node_query,
    {'synsetID_1': id1_str,
     'synsetID_2': id2_str},
    database_='neo4j',
    result_transformer_=neo4j.Result.data)
result

[{'p': [{'synsetID': 'bn:18162455n'},
   'IS_A',
   {'synsetID': 'bn:00035942n'},
   'IS_A',
   {'synsetID': 'bn:14976259n'}],
  'common_node': {'synsetID': 'bn:00035942n'}}]

In [56]:
# TO-DO handle multiple paths
result = result[0]

In [57]:
result

{'p': [{'synsetID': 'bn:18162455n'},
  'IS_A',
  {'synsetID': 'bn:00035942n'},
  'IS_A',
  {'synsetID': 'bn:14976259n'}],
 'common_node': {'synsetID': 'bn:00035942n'}}

In [58]:
path, common_node = [s['synsetID'] for s in result['p'] if type(s) is dict], result['common_node']
path, common_node

(['bn:18162455n', 'bn:00035942n', 'bn:14976259n'],
 {'synsetID': 'bn:00035942n'})

In [59]:
from_s1 = path[0:path.index(common_node['synsetID']) + 1]
from_s2 = path[path.index(common_node['synsetID']):]
from_s2.reverse()
from_s1, from_s2

(['bn:18162455n', 'bn:00035942n'], ['bn:14976259n', 'bn:00035942n'])

In [61]:
lcs_depth = max([len(from_s1)-1, len(from_s2)-1])
lcs_depth

1

In [66]:
# DISTANCE FROM ROOT NODE (PHYSICAL ENTITY)
(dist_s1_root) = driver.execute_query(
    shortestPath_query,
    {'synsetID_1': id1_str,
     'synsetID_2': root_node_id_str},
    database_='neo4j',
    result_transformer_=neo4j.Result.data)[0]['p'].count('IS_A')
(dist_s1_root)

3

In [67]:
# DISTANCE FROM ROOT NODE (PHYSICAL ENTITY)
dist_s2_root = driver.execute_query(
    shortestPath_query,
    {'synsetID_1': id2_str,
     'synsetID_2': root_node_id_str},
    database_='neo4j',
    result_transformer_=neo4j.Result.data)[0]['p'].count('IS_A')
dist_s2_root

3

In [68]:
wup_similarity = lcs_depth / (dist_s1_root + dist_s2_root) 
wup_similarity

0.16666666666666666

With synsets 'Cupido Formation' and 'Chiweta-formatie', both geological formations, this result is good, because the wup similarity measures is lower if the two synsets are more similar.

In [None]:
driver.close()