In [1]:
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter

import cv2

import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector

import glob
import os

%matplotlib inline

  from ._conv import register_converters as _register_converters


In [2]:
files = glob.glob('./data/**/*.npy')
im_files = glob.glob('./data/**/*.png')
labels = [f.split('/')[2] for f in files]

u_labels = set(labels)
label_dict = dict(zip(u_labels, range(len(u_labels))))
num_labels = [label_dict[l] for l in labels]

num_labels = np.array(num_labels)
labels = np.array(labels)
len_data = len(labels)

In [3]:
images = np.zeros((len_data, 80, 80, 3), dtype=np.int32)
for i, f in enumerate(im_files):
    images[i] = cv2.resize(cv2.imread(f), (80, 80))

Label imbalance:

In [4]:
top10 = Counter(labels).most_common()[:10]
top10

[('George_W_Bush', 530),
 ('Colin_Powell', 236),
 ('Tony_Blair', 144),
 ('Donald_Rumsfeld', 121),
 ('Gerhard_Schroeder', 109),
 ('Ariel_Sharon', 77),
 ('Hugo_Chavez', 71),
 ('Junichiro_Koizumi', 60),
 ('Jean_Chretien', 55),
 ('John_Ashcroft', 53)]

In [5]:
embeds = []
for f in files:
    embeds.append(np.load(f))
embeds = np.array(embeds)
embeds.shape

(4324, 128)

In [6]:
def get_name_indices(names, num=20):
    idx = np.arange(len(labels))
    indices = []
    for name in names:
        indices.append(idx[labels==name][:num])
        
    return np.hstack(indices)

subset = get_name_indices([name_count[0] for name_count in top10])

In [7]:
!mkdir ./logs/
!mkdir ./logs/1
PATH = os.getcwd()
LOG_DIR = PATH+'/logs/1/'

mkdir: ./logs/: File exists
mkdir: ./logs/1: File exists


In [8]:
with open(os.path.join(LOG_DIR, 'metadata.tsv'), 'w') as f:
    f.write('Class\tName\n')
    for num, name in zip(num_labels[subset], labels[subset]):
        f.write('{}\t{}\n'.format(num,name))

In [9]:
def images_to_sprite(data):
    """Creates the sprite image along with any necessary padding
    Args:
      data: NxHxW[x3] tensor containing the images.
    Returns:
      data: Properly shaped HxWx3 image with any necessary padding.
    """
    if len(data.shape) == 3:
        data = np.tile(data[...,np.newaxis], (1,1,1,3))
    data = data.astype(np.float32)
    min = np.min(data.reshape((data.shape[0], -1)), axis=1)
    data = (data.transpose(1,2,3,0) - min).transpose(3,0,1,2)
    max = np.max(data.reshape((data.shape[0], -1)), axis=1)
    data = (data.transpose(1,2,3,0) / max).transpose(3,0,1,2)
    # Inverting the colors seems to look better for MNIST
    #data = 1 - data

    n = int(np.ceil(np.sqrt(data.shape[0])))
    padding = ((0, n ** 2 - data.shape[0]), (0, 0),
            (0, 0)) + ((0, 0),) * (data.ndim - 3)
    data = np.pad(data, padding, mode='constant',
            constant_values=0)
    # Tile the individual thumbnails into an image.
    data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3)
            + tuple(range(4, data.ndim + 1)))
    data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
    data = (data * 255).astype(np.uint8)
    return data
#%%
sprite = images_to_sprite(images[subset])
cv2.imwrite(os.path.join(LOG_DIR, 'sprite_4_classes.png'), sprite)

True

In [10]:
features = tf.Variable(embeds[subset], name='features')

with tf.Session() as sess:
    saver = tf.train.Saver([features])

    sess.run(features.initializer)
    saver.save(sess, os.path.join(LOG_DIR, 'images_4_classes.ckpt'))
    
    config = projector.ProjectorConfig()
    # One can add multiple embeddings.
    embedding = config.embeddings.add()
    embedding.tensor_name = features.name
    # Link this tensor to its metadata file (e.g. labels).
    embedding.metadata_path = os.path.join(LOG_DIR, 'metadata.tsv')
    # Comment out if you don't want sprites
    embedding.sprite.image_path = os.path.join(LOG_DIR, 'sprite_4_classes.png')
    embedding.sprite.single_image_dim.extend([images.shape[1], images.shape[1]])
    # Saves a config file that TensorBoard will read during startup.
    projector.visualize_embeddings(tf.summary.FileWriter(LOG_DIR), config)


In [11]:
!tensorboard --logdir=./logs/1/

  from ._conv import register_converters as _register_converters
TensorBoard 1.5.1 at http://Sachinthakas-MacBook-Pro.local:6006 (Press CTRL+C to quit)
^C


[click here](http://localhost:6006/) to open tensorboard.

array([0.9999999 , 0.99999994, 1.        , ..., 1.0000001 , 1.        ,
       1.        ], dtype=float32)