##### Copyright 2020 The TensorFlow Authors.

In [5]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Visualizing Data using the Embedding Projector in TensorBoard

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://www.tensorflow.org/tensorboard/tensorboard_projector_plugin"><img src="https://www.tensorflow.org/images/tf_logo_32px.png" />View on TensorFlow.org</a>
  </td>
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/tensorboard/blob/master/docs/tensorboard_projector_plugin.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/tensorflow/tensorboard/blob/master/docs/tensorboard_projector_plugin.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
</table>

## Overview

Using the **TensorBoard Embedding Projector**, you can graphically represent high dimensional embeddings. This can be helpful in visualizing, examining, and understanding your embedding layers.

<img src="https://github.com/tensorflow/docs/blob/master/site/en/tutorials/text/images/embedding.jpg?raw=\" alt="Screenshot of the embedding projector" width="400"/>

In this tutorial, you will learn how visualize this type of trained layer.

## Setup

For this tutorial, we will be using TensorBoard to visualize an embedding layer generated for classifying movie review data.

In [1]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

%load_ext tensorboard

In [23]:
import os
import datetime
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorboard.plugins import projector
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import InputLayer
from tensorflow import keras


In [24]:
param_epochs=200
param_learningrate=0.01

In [25]:
#load data from a CSV
csv_original = pd.read_csv('../datasets/animals.csv', header=0, sep=';')
csv = csv_original.copy()
print(csv)
csv=csv.drop('name',1)
csv_train, csv_test = train_test_split(csv, test_size=0.2)

#print('\nEntire dataset:\n ',csv)
#print('\nTrain dataset:\n',csv_train)
#print('\nTest dataset:\n',csv_test)

#get the N samples of 4 features == array N x 4  (filter columns  1 to the end)
train_data = csv_train.values[:,[0,1,2,3]]
#get the targets for each sample == array 1 x N (#filter the first column)
train_target = csv_train.values[:,4]

test_data=csv_test.values[:,[0,1,2,3]]
test_target=csv_test.values[:,4]

print('\nTrain:\n',csv_train)
print('\nTrain Data:  ',train_data)
print('\nTrain target:',train_target)

print('\nTest:\n',csv_test)
print('\nTest Data:  ',test_data)
print('\nTest target:',test_target)

         name  Has legs  Can swim  Has gills  breathes  lives in the water
0         Dog         1         1          0         1                   0
1         Cat         1         1          0         1                   0
2       Mouse         1         1          0         1                   0
3      Turtle         1         1          1         1                   0
4        Fish         0         1          1         0                   1
5         Man         1         1          0         1                   0
6      Lizard         1         1          0         1                   0
7       Whale         0         1          0         1                   1
8     Dolphin         0         1          0         1                   1
9         Ape         1         1          0         1                   0
10        Bat         1         0          0         1                   0
11  Crocodile         1         1          0         1                   1
12  Elephant          1  

# Keras Embedding Layer

A [Keras Embedding Layer](https://keras.io/layers/embeddings/) can be used to train an embedding for each word in your volcabulary. Each word (or sub-word in this case) will be associated with a 16-dimensional vector (or embedding) that will be trained by the model.

See [this tutorial](https://www.tensorflow.org/tutorials/text/word_embeddings?hl=en) to learn more about word embeddings.

In [134]:
# Create an embedding layer
embedding_dim = 4
embedding = tf.keras.layers.Embedding(16,input_length=4,output_dim=1)

# Train this embedding as part of a keras model
model = tf.keras.Sequential(
    [
        embedding, # The embedding layer should be the first layer in a model.
        tf.keras.layers.Dense(1,activation="sigmoid"),
    ]
)

# define the keras model
model = Sequential()
model.add(embedding)
#model.add(InputLayer(4))
model.add(Dense(4, activation='sigmoid'))
print(model.summary())

Model: "sequential_84"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_64 (Embedding)     (None, 4, 4)              64        
_________________________________________________________________
dense_102 (Dense)            (None, 4, 4)              20        
Total params: 84
Trainable params: 84
Non-trainable params: 0
_________________________________________________________________
None


In [135]:
# Compile model
model.compile(
    optimizer="adam",
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=["accuracy"],
)



In [136]:
# Train model
#history = model.fit(test_data, epochs=1, validation_data=test_batches, validation_steps=20)

log_dir = "/tmp/logs/aaa/" + datetime.datetime.now().strftime("%Y%m%d-%Hh%M.%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

# fit the keras model on the dataset
history = model.fit(train_data,train_target,epochs=param_epochs, batch_size=10, verbose=1, validation_data=(test_data, test_target),callbacks=[tensorboard_callback])


Epoch 1/200


ValueError: in user code:

    /home/rhp/PycharmProjects/venv_3_6/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /home/rhp/PycharmProjects/venv_3_6/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /home/rhp/PycharmProjects/venv_3_6/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/rhp/PycharmProjects/venv_3_6/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/rhp/PycharmProjects/venv_3_6/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/rhp/PycharmProjects/venv_3_6/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:788 run_step  **
        outputs = model.train_step(data)
    /home/rhp/PycharmProjects/venv_3_6/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:756 train_step
        y, y_pred, sample_weight, regularization_losses=self.losses)
    /home/rhp/PycharmProjects/venv_3_6/lib/python3.6/site-packages/tensorflow/python/keras/engine/compile_utils.py:203 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    /home/rhp/PycharmProjects/venv_3_6/lib/python3.6/site-packages/tensorflow/python/keras/losses.py:152 __call__
        losses = call_fn(y_true, y_pred)
    /home/rhp/PycharmProjects/venv_3_6/lib/python3.6/site-packages/tensorflow/python/keras/losses.py:256 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    /home/rhp/PycharmProjects/venv_3_6/lib/python3.6/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /home/rhp/PycharmProjects/venv_3_6/lib/python3.6/site-packages/tensorflow/python/keras/losses.py:1608 binary_crossentropy
        K.binary_crossentropy(y_true, y_pred, from_logits=from_logits), axis=-1)
    /home/rhp/PycharmProjects/venv_3_6/lib/python3.6/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /home/rhp/PycharmProjects/venv_3_6/lib/python3.6/site-packages/tensorflow/python/keras/backend.py:4979 binary_crossentropy
        return nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
    /home/rhp/PycharmProjects/venv_3_6/lib/python3.6/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /home/rhp/PycharmProjects/venv_3_6/lib/python3.6/site-packages/tensorflow/python/ops/nn_impl.py:174 sigmoid_cross_entropy_with_logits
        (logits.get_shape(), labels.get_shape()))

    ValueError: logits and labels must have the same shape ((None, 4, 4) vs (None, 1))


## Saving data for TensorBoard

TensorBoard reads tensors and metadata from your tensorflow projects from the logs in the specified `log_dir` directory. For this tutorial, we will be using `/logs/imdb-example/`.

In order to visualize this data, we will be saving a checkpoint to that directory, along with metadata to understand which layer to visualize.

In [125]:
# Set up a logs directory, so Tensorboard knows where to look for files
log_dir='/tmp/logs/aaa/'
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

# Save Labels separately on a line-by-line manner.
with open(os.path.join(log_dir, 'metadata.tsv'), "w") as f:
  for subwords in encoder.subwords:
    f.write("{}\n".format(subwords))
  # Fill in the rest of the labels with "unknown"
  for unknown in range(1, encoder.vocab_size - len(encoder.subwords)):
    f.write("unknown #{}\n".format(unknown))


# Save the weights we want to analyse as a variable. Note that the first
# value represents any unknown word, which is not in the metadata, so
# we will remove that value.
weights = tf.Variable(model.layers[0].get_weights()[0][1:])
# Create a checkpoint from embedding, the filename and key are
# name of the tensor.
checkpoint = tf.train.Checkpoint(embedding=weights)
checkpoint.save(os.path.join(log_dir, "embedding.ckpt"))

# Set up config
config = projector.ProjectorConfig()
embedding = config.embeddings.add()
# The name of the tensor will be suffixed by `/.ATTRIBUTES/VARIABLE_VALUE`
embedding.tensor_name = "embedding/.ATTRIBUTES/VARIABLE_VALUE"
embedding.metadata_path = 'metadata.tsv'
projector.visualize_embeddings(log_dir, config)

In [9]:
%/tensorboard --logdir /tmp/logs/imdb-example/

ERROR: Could not find `tensorboard`. Please ensure that your PATH
contains an executable `tensorboard` program, or explicitly specify
the path to a TensorBoard binary by setting the `TENSORBOARD_BINARY`
environment variable.

<!-- <img class="tfo-display-only-on-site" src="images/embedding_projector.png?raw=1"/> -->

## Analysis
The TensorBoard Projector is a great tool for analyzing your data and seeing embedding values relative to each other. The dashboard allows searching for specific terms, and highlights words that are nearby in the embedding space. From this example we can see that Wes **Anderson** and Alfred **Hitchcock** are both rather neutral terms, but that they are referenced in different contexts.

<!-- <img class="tfo-display-only-on-site" src="images/embedding_projector_hitchcock.png?raw=1"/> -->

Hitchcock is closer associated to words like `nightmare`, which likely relates to his work in horror movies. While Anderson is closer to the word `heart`, reflecting his heartwarming style.

<!-- <img class="tfo-display-only-on-site" src="images/embedding_projector_anderson.png?raw=1"/> -->