# TenserBoardのEmbedding Projectorの動作確認

In [None]:
import codecs
import numpy as np
import os
import tensorflow as tf
import time
from bert_serving.server.helper import get_args_parser
from bert_serving.server.helper import get_shutdown_parser
from bert_serving.server import BertServer
from bert_serving.client import BertClient
from tensorflow.contrib.tensorboard.plugins import projector

In [None]:
DATA_DIR = '/app/data'
LOGS_DIR = '/app/logs'
TMP_DIR = '/app/tmp'
NUM_FEATURES = 768

## データを入力

In [None]:
text_path = os.path.join(DATA_DIR, 'text.txt')
texts = []
with codecs.open(text_path, 'r', 'utf-8') as fin:
    for text in fin:
        text = text.strip().replace('\u3000', ' ')
        texts.append(text)

label_path = os.path.join(DATA_DIR, 'label.txt')
labels = []
with codecs.open(label_path, 'r', 'utf-8') as fin:
    for label in fin:
        label = label.strip()
        labels.append(label)

## 文書ベクトルを取得

### サーバを起動

In [None]:
os.environ['ZEROMQ_SOCK_TMP_DIR'] = '/app/tmp'
args = get_args_parser().parse_args([
    '-model_dir', '/app/model',
    '-ckpt_name', 'model.ckpt-1400000',
    '-config_name', 'bert-wiki-ja_config.json',
    '-graph_tmp_dir', '/app/tmp',
    '-port', '5555',
    '-port_out', '5556',
    '-max_seq_len', 'NONE',
    '-num_worker', '1',
    '-cpu',
    '-show_tokens_to_client',
])
server = BertServer(args)
server.start()

print('wait until server is ready...')
time.sleep(20)

### 文書ベクトルを取得

In [None]:
vectors = np.zeros((len(texts), NUM_FEATURES), dtype="float32")
bc = BertClient()
for idx, text in enumerate(texts):
    result = bc.encode([text], show_tokens=True)
    vectors[idx] = result[0][0]
    print(idx)
bc.close()

In [None]:
embeddings = tf.Variable(vectors, name='embeddings')

### サーバを停止

In [None]:
server.close()

## ラベル・ファイルを生成

In [None]:
metadata = os.path.join(LOGS_DIR, 'metadata.tsv')
with codecs.open(metadata, 'w', 'utf-8') as metadata_file:
    for row in labels:
        metadata_file.write('{}\n'.format(row))

## データを出力

In [None]:
with tf.Session() as sess:
    saver = tf.train.Saver([embeddings])

    sess.run(embeddings.initializer)
    saver.save(sess, os.path.join(LOGS_DIR, 'embeddings.ckpt'))

    config = projector.ProjectorConfig()
    # One can add multiple embeddings.
    embedding = config.embeddings.add()
    embedding.tensor_name = embeddings.name
    # Link this tensor to its metadata file (e.g. labels).
    embedding.metadata_path = metadata
    # Saves a config file that TensorBoard will read during startup.
    projector.visualize_embeddings(tf.summary.FileWriter(LOGS_DIR), config)

## 以降の手順

### File -> New -> Terminal で新しいターミナルを起動して下記コマンドを実行

```bash
/app/run_tensorboard.sh
```

### "TensorBoard 1.14.0 at … (Press CTRL+C to quit)"というメッセージが表示されたらTensorBoardを開く

http://localhost:6006/#projector&run=.