# TensorBoard Embedding

aka Dimensionality Reduction

https://www.tensorflow.org/get_started/embedding_viz

    tensorboard --logdir=/data/podondra/embedding

In [1]:
import h5py
import numpy as np
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector

In [2]:
# open HDF5 file with all data
f = h5py.File('data.hdf5')
# TODO change dtype of X_train to np.float32
X = np.array(f['X_train'], dtype=np.float32)
y = f['y_train'][...]

In [3]:
# log directory for data serialization
LOG_DIR = '/data/podondra/embedding/'

In [4]:
# start TensorFlow interactive session
sess = tf.InteractiveSession()

In [5]:
# clear the log directory
if tf.gfile.Exists(LOG_DIR):
    tf.gfile.DeleteRecursively(LOG_DIR)
tf.gfile.MakeDirs(LOG_DIR)

# instatiate file TF writer
summary_writer = tf.summary.FileWriter(LOG_DIR, sess.graph)

In [6]:
# save labels to metadata.tsv
# format is Tab Separeted Values
# if more then one columns then first row is columns names
np.savetxt(LOG_DIR + 'metadata.tsv', y.reshape(-1, 1), fmt='%d')

In [7]:
# start creating config
config = projector.ProjectorConfig()

In [8]:
# add embeddings
def add_embedding(X, name):
    embedding_var = tf.Variable(tf.constant(X), name=name)
    embedding = config.embeddings.add()
    embedding.tensor_name = embedding_var.name
    # link this tensor to its metadata file
    embedding.metadata_path = LOG_DIR + 'metadata.tsv'
    
add_embedding(X, 'normalized')
add_embedding((X - 1) / np.max(np.abs(X - 1)), 'scaled')

In [9]:
# load the variables
sess.run(tf.global_variables_initializer())
# checkpoint the session
saver = tf.train.Saver()
saver.save(sess, LOG_DIR + 'model.ckpt', 0)

'/data/podondra/embedding/model.ckpt-0'

In [10]:
# write config
projector.visualize_embeddings(summary_writer, config)

In [11]:
sess.close()
f.close()