In [16]:
from gensim.models.poincare import PoincareModel, PoincareKeyedVectors, PoincareRelations
from  gensim.viz.poincare import poincare_2d_visualization
from gensim.test.utils import datapath


###  Steps :
1. generate word and adjective syncset closure from wordnet 
2. use poincare embedding from gensim to create embedding 
3. convert poincare embedding into wordwvec format
4. use word2vec format of poincare embedding to train model 



#### create closure 

In [17]:
import re
import pandas
from nltk.corpus import wordnet as wn
from tqdm import tqdm
try:
    wn.all_synsets
except LookupError as e:
    import nltk
    nltk.download('wordnet')

# make sure each edge is included only once
edges = set()
poss = ['a', 'n']
for p in poss :
    for synset in tqdm(wn.all_synsets(pos=p)):
        # write the transitive closure of all hypernyms of a synset to file
        for hyper in synset.closure(lambda s: s.hypernyms()):
            edges.add((synset.name(), hyper.name()))

        # also write transitive closure for all instances of a synset
        for instance in synset.instance_hyponyms():
            for hyper in instance.closure(lambda s: s.instance_hypernyms()):
                edges.add((instance.name(), hyper.name()))
                for h in hyper.closure(lambda s: s.hypernyms()):
                    edges.add((instance.name(), h.name()))

nouns_and_adjectives  = pandas.DataFrame(list(edges), columns=['id1', 'id2'])
#nouns_and_adjectives['weight'] = 1
nouns_and_adjectives.to_csv('nouns_and_adjectives_closure.csv', index=False)


18156it [00:00, 36111.22it/s]
82115it [00:06, 13599.35it/s]


### create poincare embedding

qwwq

In [18]:
wordnet_mamal_file_path = 'nouns_and_adjectives_closure.csv'
relations = PoincareRelations(wordnet_mamal_file_path, delimiter=',')

In [19]:
model = PoincareModel(train_data=relations, size= 16 , burn_in=0)
model.train(epochs = 20 , print_every = 500)

In [20]:
model.kv.distance('mammal.n.01', 'placental.n.01')

0.2119915773641999

In [21]:
#poincare_2d_visualization(model , relations , 'aaa', show_node_labels= model.kv.vocab.keys())

In [22]:
model.kv.difference_in_hierarchy('mammal.n.01', 'dog.n.01')

0.060347634840669184

In [23]:
model.kv.most_similar('kangaroo.n.01', topn=2)

[('marsupial.n.01', 0.6141399952214143),
 ('metatherian.n.01', 0.6246329255434071)]

In [24]:
model.kv.norm('mammal.n.01')

0.7160803476417321

###  create word2wec form embedding from poincare embedding

In [25]:
model.kv.save_word2vec_format('word2_wec_poinc', fvocab=None, binary=False, total_vec=None)

In [26]:
#### create model poincare embedding 

In [27]:
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

In [28]:
with open('word2_wec_poinc', 'r') as f:
    emb= f.readlines()
    

In [29]:
#E = np.loadtxt('word2_wec_poinc.tsv', delimiter='\t')
embedding_layer = Embedding(100, 10, input_length=100, weights = emb)
embedding_layer.trainable = True

In [30]:
emb

['82117 16\n',
 'entity.n.01 -0.016929681574857018 0.002219559639064995 -0.00693505242781044 0.0025617760256091103 -0.0011763589475189008 -0.003947770725207639 -0.0007836818837137598 0.006944324833555251 -0.0005630910063697814 -0.006748390587641161 0.0065021898950999215 -0.004186623014456919 -0.005657757455932061 -0.02863590298320856 0.00896444906843662 -0.015280087820851905\n',
 'physical_entity.n.01 0.027260428918393597 0.13617005080303263 -0.0036866141491980666 0.03736965003455551 -0.03120204899383839 0.016334789341041545 -0.09602380823445517 -0.022120625946377655 -0.10407523706434411 0.029389025311083127 -0.03243959128754709 0.045330939203181694 0.01981677868222828 0.05484953276031446 -0.06830526005676922 -0.0017551680882208608\n',
 'abstraction.n.06 -0.04949261631447142 -0.2254542285498637 0.001659358764727736 -0.028955433024967153 0.09109353354779268 -0.0059084623486370365 0.16174162351748778 0.06440634625827521 0.23821900557652226 -0.09132026183825409 0.06126315735400902 -0.0998