# Example notebook 1: visualize word embeddings

In [1]:
import os
from zeugma import EmbeddingTransformer
 
embedding_transformer = EmbeddingTransformer()

Using TensorFlow backend.


## Create the embeddings matrix for the words present in a given corpus

In [2]:
from zeugma import TextsToSequences

# Define a corpus to get a list of relevant words
corpus = ['developer', 'programmer']

tts = TextsToSequences()
tts.fit(corpus)
word_index = tts.word_index

def create_embedding_matrix(word_index, embedding_transformer):
    """ Prepare the embedding matrix """
    embedding_dim = embedding_transformer.transform(['the']).shape[1]
    embedding_matrix = np.zeros((len(word_index)+1, embedding_dim))
    for word, i in word_index.items():
        embedding_matrix[i] = embedding_transformer.transform([word])[0]
    return embedding_matrix

embedding_matrix = create_embedding_matrix(word_index, embedding_transformer)

## Setup tensorboard configuration

In [3]:
import tensorflow as tf
from tensorboard.plugins import projector

# Create the log directory
LOG_DIR = os.path.join(os.environ['PWD'], 'logs')
if not os.path.exists(LOG_DIR):
    os.mkdir(LOG_DIR)

# Write word names in a file for tensorboard display
with open(os.path.join(LOG_DIR, 'metadata.tsv'), 'w') as f:
    f.write('Word\n' + '\n'.join(word_index.keys()))
    
# Create a tensorflow variable from embedding matrix
embedding_var = tf.Variable(embedding_matrix)

# Save embeddings in model.ckpt
saver = tf.compat.v1.train.Saver([embedding_var])
saver.save(None, os.path.join("logs", "model.ckpt"))

# Add metadata (i.e. words here) to the embeddings points
config = projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = embedding_var.name
embedding.metadata_path = os.path.join(LOG_DIR, 'metadata.tsv')

projector.visualize_embeddings(LOG_DIR, config)



# Embed Tensorboar in the notebook

In [6]:
%load_ext tensorboard
%tensorboard --logdir logs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


## Run tensorboard locally

In [4]:
# Run tensorboard to visualize the embeddings in your
# browser on port 6006: http://localhost:6006
# Navigate to the 'Projector' tab to visualize the embeddings
!tensorboard --logdir logs/ --host 127.0.0.1

TensorBoard 2.0.1 at http://127.0.0.1:6006/ (Press CTRL+C to quit)
^C
