Plotting t-SNE figures of ShapeWorld vocabulary

In [None]:
# SRC VOCAB FROM SHAPEWORLD API
SIMPLE_SRC_VOCAB = ['', '.', 'a', 'blue', 'circle', 'cross', 'cyan', 'ellipse', 'gray', 'green', 'is', 'magenta',
            'pentagon', 'rectangle', 'red', 'semicircle', 'shape', 'square', 'there', 'triangle', 'yellow', '[UNKNOWN]']

# Shape and colours
SHAPES = ['circle', 'cross', 'ellipse', 'pentagon', 'rectangle', 'semicircle', 'square', 'triangle']   # Specific shapes
SHAPES_AUX = ['shape']      # Abstract words for shapes
COLORS = ['blue', 'cyan', 'gray', 'green', 'magenta', 'red', 'yellow']  # Color words
STOPS = ['a', 'an', 'there', 'is', "."]      # Stop words
AUX_VOCAB = ["", '[UNKNOWN]', "<S>", "</S>"]    # Aux words to useful vocabulary

SHAPE_COLOR_VOCAB = AUX_VOCAB + SHAPES + COLORS
SHAPE_VOCAB = AUX_VOCAB + SHAPES
COLOR_VOCAB = AUX_VOCAB + COLORS
STANDARD_VOCAB = AUX_VOCAB + SHAPES + COLORS + ['there', 'is', 'a']

AGREEMENT_ONESHAPE_VOCAB = AUX_VOCAB + SHAPES + SHAPES_AUX + COLORS + STOPS

In [None]:
import math
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os, time, csv

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from sklearn.manifold import TSNE

In [None]:
tf.reset_default_graph()
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

# Point to dir where the checkpoint is
loc = './models/final/oneshape/train'
# Where to save .eps figs
output_root = "/Users/tom/thesis-acs/thesis/figs/"

# Name the output .eps
output_fname = output_root + 'c5-oneshape-vocab-space2.eps'

# Choose vocab to label each embedding index
vocab = AGREEMENT_ONESHAPE_VOCAB

# Get TF checkpoint
ckpt = tf.train.latest_checkpoint(loc)

# Import meta graph (avoids having to do any model.build() calls) and restore from ckpt
new_saver = tf.train.import_meta_graph(ckpt+".meta")
new_saver.restore(sess,ckpt)

# Get the sequence embeddings from the graph (assumed name is seq_embeddings/seq_map)
var = [v for v in tf.global_variables() if v.name == 'seq_embeddings/seq_map:0'][0]

# Print Tensor REPR
print(var)

# Get Numpy version of the tensor
v_ = sess.run(var)

# Print Numpy embedding obj
print(v_)

In [None]:
# Must have as many words as embeddings
assert np.shape(v_)[0]==len(vocab)

In [None]:
# Run t-SNE to collapse to 2D. Edit perplexity for your data 
tsne = TSNE(n_components=2, perplexity=8)
v_tsne = tsne.fit_transform(v_)

In [None]:
# ShapeWorld specific labelling to colour code output plots
colors = []
for s in vocab:
    if s in SHAPES:
        colors.append('blue')
    elif s in COLORS:
        colors.append('red')
    else:
        colors.append('green')

In [None]:
# Generate a plot and save

hfont = {'fontname':'Helvetica'}

plt.style.use('ggplot')
fig, ax = plt.subplots()
ax.scatter(v_tsne[:,0], v_tsne[:,1], c=colors,label=labels)
fig = plt.gcf()
fig.set_size_inches(8.24, 6)
ann = []
for i, txt in enumerate(vocab):
    ann.append(ax.annotate(txt, (v_tsne[i,0], v_tsne[i,1]),fontsize=12,**hfont))

mask = np.zeros(fig.canvas.get_width_height(), bool)

plt.savefig(output_fname,dpi=300)

fig.canvas.draw()
