## ライブラリ

In [1]:
!pip install gensim
!pip install plotly
!pip install pandas



In [2]:
from gensim.models.poincare import PoincareModel
from IPython import display
import pandas as pd

## Story Name

In [6]:
story_names = ["ACaseOfIdentity_formatted_S2",
                 "CrookedMan_formatted_S2",
                 "DancingMen_formatted_S2",
                 "DevilsFoot_formatted_S2",
                 "SpeckledBand_formatted_S2"]

In [8]:
vector_size = 3
#vector_size = 5
negative = 20
epochs = 100

## TEST

train_data = [
    ('Software Engineer', 'Engineer'),
    ('Senior Software Engineer', 'Software Engineer'),
    ('Web Programmer', 'Programmer'),
    ('UI Designer', 'Designer'),
    ('エンジニア', 'Engineer'),
    ('Engineer', 'エンジニア'),
    ('Engineer', 'Programmer'),
]

model = PoincareModel(train_data, size=2, negative=2)
model.train(epochs=500)

tree = set(train_data)
figure_title = ''
node_labels = train_data
poincare_2d_visualization(model, tree, figure_title, num_nodes=50, show_node_labels=node_labels)

## Poincare Embeding

In [9]:
class PoincareEmbeding:

    def __init__(self, file_base: str,vector_size:int, negative:int, epochs:int):
        self.vector_size = vector_size
        self.epochs = epochs
        self.negative = negative
        
        self.data_file_name = file_base + "-pair.txt"
        self.tsv_label_file_name = file_base + "-poincare-label.tsv"

        t = file_base + "-poincare-" + str(self.vector_size)
        self.model_file_name = t + "d.model"
        self.tsv_vector_file_name = t + "d-vector.tsv"

        print(self.data_file_name)
        print(self.tsv_label_file_name)
        print(self.model_file_name)
        print(self.tsv_vector_file_name)
        
        self.train_data = [(a, b) for a, b in pd.read_csv(self.data_file_name, 
                                                          header=None, 
                                                          delimiter='\t').values]
        
        model = PoincareModel(self.train_data, 
                      size=self.vector_size, 
                      negative=self.negative)
        model.train(epochs=self.epochs)
        model.save(self.model_file_name)
        self._makeVector(model)
    
    def _makeVector(self, model):
        d = pd.read_csv(self.tsv_label_file_name, 
                        header=None, 
                        delimiter='\t').values
        node_labels = d.reshape(d.shape[0]).tolist()

        with open(self.tsv_vector_file_name, 'w') as f:
            for node in node_labels:
                y = []
                t = model.kv.word_vec(node)
                for v in t:
                    y.append(str(v))
                f.write('\t'.join(y))
                f.write('\n')


In [10]:
for story_name in story_names:
    print("====> " + story_name)
    PoincareEmbeding(story_name, vector_size=vector_size, negative=negative, epochs=epochs)

====> DevilsFoot_formatted_S2
DevilsFoot_formatted_S2-pair.txt
DevilsFoot_formatted_S2-poincare-label.tsv
DevilsFoot_formatted_S2-poincare-3d.model
DevilsFoot_formatted_S2-poincare-3d-vector.tsv
====> SpeckledBand_formatted_S2
SpeckledBand_formatted_S2-pair.txt
SpeckledBand_formatted_S2-poincare-label.tsv
SpeckledBand_formatted_S2-poincare-3d.model
SpeckledBand_formatted_S2-poincare-3d-vector.tsv
