In [1]:
from dictionary_graph import DictionaryGraph
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

In [2]:
def similarity_limit(A, B, iterations=10):
    Z = np.full((B.shape[0], A.shape[0]), 1)
    for i in range(2*iterations): # Paran broj iteracija
        Z = B @ Z @ A.T + B.T @ Z @ A
        Z /= np.linalg.norm(Z, 'fro')
    return Z

In [3]:
graph = DictionaryGraph('OPTED', 'a-z')

def get_synonyms(word, n=10, **kwargs):
    vertices, adjacency_matrix = graph.adjacency_matrix(word, **kwargs)
    S = pd.DataFrame(similarity_limit(
        np.array([
            [0, 1, 0],
            [0, 0, 1],
            [0, 0, 0],
        ], dtype=np.float64),
        adjacency_matrix,
    ), index=vertices)
    return S[1].nlargest(n)

In [4]:
# Metoda 0: Koristi sve veze među riječima
# Metoda 1: Uklanja riječi iz induciranog grafa koje se pojavljuju u definicijama više od 1000 puta
# Metoda 2: adjacency_matrix[i][j] iz metode 0 je dijelimo s brojem ukupnih pojavljivanja riječi j u definicijama
# Metoda 2, f: adjacency_matrix[i][j] iz metode 0 dijelimo s f(broj ukupnih pojavljivanja riječi j u definicijama)
for method, f in [(0, None), (1, None), (2, None), (2, np.sqrt), (2, lambda x: np.log(1+x))]:
    print((method, f))
    print(get_synonyms('disappear', method=method, f=f))

(0, None)
value
to           0.191060
as           0.188393
a            0.180136
or           0.178049
of           0.173065
the          0.160774
disappear    0.142423
be           0.131523
from         0.126350
at           0.109615
Name: 1, dtype: float64
(1, None)
value
disappear    0.372840
vanish       0.196909
pass         0.150394
die          0.123490
fade         0.111597
faint        0.105173
wear         0.091636
gradually    0.087202
light        0.066422
evanid       0.061815
Name: 1, dtype: float64
(2, None)
value
evanid         0.579632
vanish         0.016232
disappear      0.015307
faint          0.007629
fade           0.001704
die            0.001570
pass           0.001428
dissipate      0.001419
evanesce       0.001418
disappeared    0.000820
Name: 1, dtype: float64
(2, <ufunc 'sqrt'>)
value
evanid       0.410771
disappear    0.269405
vanish       0.182102
faint        0.107490
fade         0.102513
die          0.074299
pass         0.064375
eliminate    0.05799