In [17]:
# !pip install ruwordnet
# !ruwordnet download

In [18]:
from ruwordnet import RuWordNet
from collections import deque
import numpy as np

In [19]:
# Initialize RuWordNet
ruwordnet = RuWordNet()

In [20]:
# Function to get all hypernyms for a given synset
def get_hypernyms(synset):
    return synset.hypernyms

In [21]:
# Bidirectional BFS to find shortest path between two synsets
def bidirectional_bfs(synset1, synset2):
    if synset1 == synset2:
        return 0
    
    queue1 = deque([(synset1, 0)])
    queue2 = deque([(synset2, 0)])
    
    visited1 = {synset1: 0}
    visited2 = {synset2: 0}
    
    while queue1 and queue2:
        if queue1:
            current_synset, distance = queue1.popleft()
            for hypernym in get_hypernyms(current_synset):
                if hypernym in visited2:
                    return distance + 1 + visited2[hypernym]
                if hypernym not in visited1:
                    visited1[hypernym] = distance + 1
                    queue1.append((hypernym, distance + 1))
        
        if queue2:
            current_synset, distance = queue2.popleft()
            for hypernym in get_hypernyms(current_synset):
                if hypernym in visited1:
                    return distance + 1 + visited1[hypernym]
                if hypernym not in visited2:
                    visited2[hypernym] = distance + 1
                    queue2.append((hypernym, distance + 1))
    
    return float('inf')

In [47]:
# Function to calculate the minimal distance between two labels
def calculate_distance(label1, label2):
    synsets_label1 = ruwordnet.get_synsets(label1)
    synsets_label2 = ruwordnet.get_synsets(label2)

    min_distance = float('inf')
    for synset1 in synsets_label1:
        for synset2 in synsets_label2:
            distance = bidirectional_bfs(synset1, synset2)
            if distance < min_distance:
                min_distance = distance
                
    if min_distance == float('inf'):
        print(f"can't calculate distance for {label1} and {label2}")
    
    return min_distance if min_distance != float('inf') else None

In [48]:
# Function to calculate the average distance between labels and ideal_label
def calculate(labels, ideal_label):
    # Calculate distances for correct and wrong labels
    distances = [calculate_distance(ideal_label, label) for label in labels]
    # Filter out None values
    distances = [dist for dist in distances if dist is not None]
    # Compute average distances, treating no distances as infinity
    average_distance = np.mean(distances) if distances else float('inf')
    return average_distance

In [49]:
label1 = "человек"
label2 = "женщина"
distance = calculate_distance(label1, label2)
print(f"distance between label1: {label1} and label2: {label2} is: {distance}")

distance between label1: человек and label2: женщина is: 1


In [50]:
labels = ["женщина", "рабочий"]
ideal_label = "человек"
average_distance = calculate(labels, ideal_label)
print(f"average distance between labels: {labels} and ideal label: {ideal_label} is: {average_distance}")

can't calculate distance for человек and рабочий
average distance between labels: ['женщина', 'рабочий'] and ideal label: человек is: 1.0
