In [1]:
%load_ext tensorboard


import os
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorboard.plugins import projector

In [2]:
!rm -rf ./logs/embeddings/

In [3]:
(train_data, test_data), info = tfds.load(
    "imdb_reviews/subwords8k",
    split=(tfds.Split.TRAIN, tfds.Split.TEST),
    with_info=True,
    as_supervised=True,
)
encoder = info.features["text"].encoder

# shuffle and pad the data.
train_batches = train_data.shuffle(1000).padded_batch(
    10, padded_shapes=((None,), ())
)
test_batches = test_data.shuffle(1000).padded_batch(
    10, padded_shapes=((None,), ())
)
train_batch, train_labels = next(iter(train_batches))


In [4]:
embedding_dim = 32

embedding = tf.keras.layers.Embedding(encoder.vocab_size, embedding_dim)

model = tf.keras.Sequential()
model.add(embedding)
model.add(tf.keras.layers.GlobalAveragePooling1D())
model.add(tf.keras.layers.Dense(16, activation="relu"))
model.add(tf.keras.layers.Dense(1))

model.compile(
    optimizer="adam",
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

history = model.fit(
    train_batches, epochs=1, validation_data=test_batches, validation_steps=20
)



In [5]:
log_dir='./logs/embeddings/'
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

with open(os.path.join(log_dir, 'metadata.tsv'), "w") as f:
    for subwords in encoder.subwords:
        f.write("{}\n".format(subwords))

    for unknown in range(1, encoder.vocab_size - len(encoder.subwords)):
        f.write("unknown #{}\n".format(unknown))


weights = tf.Variable(model.layers[0].get_weights()[0][1:])


checkpoint = tf.train.Checkpoint(embedding=weights)
checkpoint.save(os.path.join(log_dir, "embedding.ckpt"))


config = projector.ProjectorConfig()
embedding = config.embeddings.add()


embedding.tensor_name = "embedding/.ATTRIBUTES/VARIABLE_VALUE"
embedding.metadata_path = 'metadata.tsv'
projector.visualize_embeddings(log_dir, config)

In [12]:
%tensorboard --logdir ./logs/embeddings


In [11]:
!kill 69235