In [22]:
import babelnet as bn
from babelnet import BabelSynsetID, Language
from babelnet.data.relation import BabelPointer

In [23]:
from  zerorpc import TimeoutExpired, LostRemote

In [24]:
bn.version()

5.0

Al seguente link si trova la documentazione della libreria per interfacciarsi con babelnet: https://babelnet.org/guide.

In [6]:
# basic BFS 
visited = set()

max_visits, n = 20000, 0
root = bn.get_synset(BabelSynsetID('bn:00015267n'))
q = [root]

with open('no_hypernyms.txt', 'w') as nohypfile:
    while q and n <= max_visits:
        synset = q.pop(0)
        n += 1
        hypernym_edges = synset.outgoing_edges(BabelPointer.ANY_HYPERNYM)
        if hypernym_edges == []:
            print(f'Possible root node: {synset.id}')
            nohypfile.write(f'{str(synset.id)},{synset.main_sense().full_lemma}\n')
        for edge in hypernym_edges:
            if edge.id_target not in visited:
                q.append(bn.get_synset(edge.id_target))
                visited.add(edge.id_target)

Possible root node: bn:00076248n




Possible root node: bn:00044576n
Possible root node: bn:00064608n




Possible root node: bn:14481752n
Possible root node: bn:00248578n
Possible root node: bn:04751152n
Possible root node: bn:16932101n
Possible root node: bn:00031027n
Possible root node: bn:21705963n
Possible root node: bn:04889559n
Possible root node: bn:14685380n
Possible root node: bn:03245758n
Possible root node: bn:01027542n
Possible root node: bn:02636022n
Possible root node: bn:00894769n
Possible root node: bn:15921369n
Possible root node: bn:04591277n
Possible root node: bn:06171637n
Possible root node: bn:03758703n
Possible root node: bn:01404775n
Possible root node: bn:06162070n
Possible root node: bn:00234465n
Possible root node: bn:15090031n
Possible root node: bn:07669899n
Possible root node: bn:00218497n
Possible root node: bn:02936056n
Possible root node: bn:02638689n
Possible root node: bn:06429342n
Possible root node: bn:07129841n
Possible root node: bn:03843033n
Possible root node: bn:17690782n
Possible root node: bn:05749505n
Possible root node: bn:06760533n
Possible r

In [16]:
for id_lemma in open('no_hypernyms.txt', 'r').readlines()[2:]:
    id, lemma = id_lemma.strip().split(',')
    synset = bn.get_synset(BabelSynsetID(id))
    print(f'{id},{lemma},{len(synset.outgoing_edges())},{len(synset.outgoing_edges(BabelPointer.ANY_HYPERNYM))}')

bn:00064608n,profession,5697,5
bn:14481752n,field_of_study,77,0
bn:00248578n,Trade_literature,22,0
bn:04751152n,production_center,7,0
bn:16932101n,routine,140,0
bn:00031027n,entity,18,0
bn:21705963n,Religious_calling,46,0
bn:04889559n,flow_of_matter_and_energy,22,0
bn:14685380n,resource,200,0
bn:03245758n,engineering_process,512,0
bn:01027542n,source_of_information,164,0
bn:02636022n,Thermodynamic_process,427,0
bn:00894769n,task_(computing),40,0
bn:15921369n,adaptation,14,0
bn:04591277n,Work_(human_activity),224,0
bn:06171637n,signaling_system,66,0
bn:03758703n,Mutual_intelligibility,571,0
bn:01404775n,Subatomic_particle,843,0
bn:06162070n,format,24,0
bn:00234465n,Unordered_pair,58,0
bn:15090031n,posture,424,0
bn:07669899n,suprasegmental,75,0
bn:00218497n,Identity_formation,77,0
bn:02936056n,Cellular_component,365,0
bn:02638689n,Course_(medicine),169,0
bn:06429342n,Dokument_normatywny,5,0
bn:07129841n,classification_of_languages,261,0
bn:03843033n,Challenge_(TV_channel),513,0
bn:176907

In [15]:
profession = bn.get_synset(BabelSynsetID('bn:00064608n'))
profession.outgoing_edges(BabelPointer.ANY_HYPERNYM)

[EN_wn2020@_bn:00014138n,
 MUL_wdp279_bn:00014138n,
 EN_@_bn:00014138n,
 EN_@w_bn:21705963n,
 SQ_@w_bn:00064608n]

In [17]:
taxgroup = bn.get_synset(BabelSynsetID('bn:00076248n'))
print(len(taxgroup.outgoing_edges(BabelPointer.ANY_HYPERNYM)))

0


In [18]:
homo = bn.get_synset(BabelSynsetID('bn:00044576n'))
print(len(homo.outgoing_edges(BabelPointer.ANY_HYPERNYM)))



0


In [5]:
### find distance between synsets A and B in the graph (which is not a tree...)

s1 = bn.get_synset(BabelSynsetID('bn:00012605n')) # watchband
s2 = bn.get_synset(BabelSynsetID('bn:00019887n')) # clock

In [6]:
s1.main_sense().full_lemma, s2.main_sense().full_lemma

('watchband', 'clock')

In [7]:
s1.main_gloss(), s2.main_gloss()

(A band of cloth or leather or metal links attached to a wristwatch and wrapped around the wrist,
 A timepiece that shows the time of day)

In [25]:
### BFS fashion
import time
start_t = time.time()
max_length = 100
n = 0
visited = set()
target = s2.id
q = [(s1,n)]

while q and n <= max_length:
    synset, n = q.pop(0)
    if n % 10 == 0 and n > 0:
        end_t = time.time()
        min, sec = (end_t - start_t) // 60, (end_t - start_t) % 60
        print(f'{n} - min: {min}, sec: {sec}')
    
    try:
        edges = synset.outgoing_edges(BabelPointer.ANY_HYPERNYM, BabelPointer.ANY_HOLONYM)
    except LostRemote as lr:
        print(f'LostRemote error with synset {synset.id}')

    for edge in edges:
        if edge.id_target == target:
            print(f'Reached target at depth {n}')
            end_t = time.time()
            min, sec = (end_t - start_t) // 60, (end_t - start_t) % 60
            print(f'Total time: {min}m,{sec}s')
            n = max_length+1
            break
        if edge.id_target not in visited:
            try:
                visited.add(edge.id_target)
                q.append((bn.get_synset(edge.id_target), n+1))
            except TimeoutExpired as te:
                print(f'TimeoutExpired error with synset: {edge.id_target}')
                # raise TimeoutExpired('eccheccacchio')
            except LostRemote as lr:
                print(f'LostRemote error with synset {synset.id}')
    
    if n == max_length-1: print('Reached max length.')
if q == []: print('Queue empty')



LostRemote error with synset bn:15125301n
LostRemote error with synset bn:15125301n
LostRemote error with synset bn:01236190n
LostRemote error with synset bn:01236190n
LostRemote error with synset bn:00048027n
LostRemote error with synset bn:00048027n
LostRemote error with synset bn:00071103n
LostRemote error with synset bn:00071103n
LostRemote error with synset bn:00028604n
LostRemote error with synset bn:00028604n
LostRemote error with synset bn:00069679n
Reached target at depth 6


In [28]:
t1 = bn.get_synset(BabelSynsetID('bn:02617074n')) # console (videogiochi)
t2 = bn.get_synset(BabelSynsetID('bn:15625318n')) # Playstation 4

In [30]:
### BFS fashion
import time
start_t = time.time()
timestamp_n, timestamp_step = 2, 2
max_length = 15
n = 0
visited = set()
target = t2.id
q = [(t1,n)]
logfile = open('.log_search.txt', 'w')

while q and n <= max_length:
    synset, n = q.pop(0)
    if n == timestamp_n:
        timestamp_n += timestamp_step
        end_t = time.time()
        min, sec = int((end_t - start_t) // 60), int((end_t - start_t) % 60)
        print(f'{n}: {min}m,{sec}s')
    
    try:
        edges = synset.outgoing_edges(BabelPointer.ANY_HYPERNYM, BabelPointer.ANY_HOLONYM)
    except LostRemote as lr:
        logfile.write(f'LostRemote,{synset.id}')
        edges = []
        continue

    for edge in edges:
        if edge.id_target == target:
            print(f'Reached target at depth {n}')
            end_t = time.time()
            min, sec = int((end_t - start_t) // 60), int((end_t - start_t) % 60)
            print(f'Total time: {min}m,{sec}s')
            n = max_length+1
            break

        if edge.id_target not in visited:
            try:
                visited.add(edge.id_target)
                q.append((bn.get_synset(edge.id_target), n+1))
            except TimeoutExpired as te:
                logfile.write(f'TimeoutExpired,{synset.id}')
            except LostRemote as lr:
                logfile.write(f'LostRemote,{synset.id}')
    
    if n == max_length-1: print('Reached max length.')

if q == []: print('Queue empty')

2: 0m,1s
4: 0m,8s




6: 4m,23s
