In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import os
import csv
from tensorflow.contrib.tensorboard.plugins import projector
from tensorflow.examples.tutorials.mnist import input_data
import cv2
LOG_DIR = 'tensorboard'
NAME_TO_VISUALISE_VARIABLE = "styleembedding"
TO_EMBED_COUNT = 500

In [2]:
def get_product_to_features(product_feats_file):
    product_to_feats = {}
    rf_feature_import_file = 'rf_feat_import.dat'
    all_feat_importances = np.load(rf_feature_import_file)
    top_features = sorted(list(zip(range(0, 1024), all_feat_importances)), key=lambda tup: tup[1], reverse=True)
    bottom_feature_indexes = [f[0] for f in top_features[150:]]
    with open(product_feats_file, 'r') as tsvfile:
        tsvreader = csv.reader(tsvfile, delimiter='\t')
        for row in tsvreader:
            product_id = row[0]
            feats_stored = [float(n) for n in row[1:]]
            feats_reduced = np.delete(feats_stored, bottom_feature_indexes)
            feats = np.array([feats_reduced])
            product_to_feats[product_id] = feats
    return product_to_feats


def make_data_inputs(product_file, product_to_features):
    print('Reading file ', product_file)
    X_product_features = []
    X_product_ids = []
    with open(product_file, 'r') as f:
        for i, line in enumerate(f.readlines()[:]):
            try:
                l = line.split('\t')
                product_id = l[0]
                product_category = l[2]
                product_name = l[3]
                
                features = product_to_features[product_id][0]
                X_product_features.append(features)
                X_product_ids.append((product_id, product_category, product_name))

            except Exception as e:
                print(e)
    return X_product_features, X_product_ids

In [3]:
# Taken from: https://github.com/tensorflow/tensorflow/issues/6322
def images_to_sprite(data):
    """Creates the sprite image along with any necessary padding
    Args:
      data: NxHxW[x3] tensor containing the images.
    Returns:
      data: Properly shaped HxWx3 image with any necessary padding.
    """
    if len(data.shape) == 3:
        data = np.tile(data[...,np.newaxis], (1,1,1,3))
    data = data.astype(np.float32)
    min = np.min(data.reshape((data.shape[0], -1)), axis=1)
    data = (data.transpose(1,2,3,0) - min).transpose(3,0,1,2)
    max = np.max(data.reshape((data.shape[0], -1)), axis=1)
    data = (data.transpose(1,2,3,0) / max).transpose(3,0,1,2)
    # Inverting the colors seems to look better for MNIST
    #data = 1 - data

    n = int(np.ceil(np.sqrt(data.shape[0])))
    padding = ((0, n ** 2 - data.shape[0]), (0, 0),
            (0, 0)) + ((0, 0),) * (data.ndim - 3)
    data = np.pad(data, padding, mode='constant',
            constant_values=0)
    # Tile the individual thumbnails into an image.
    data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3)
            + tuple(range(4, data.ndim + 1)))
    data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
    data = (data * 255).astype(np.uint8)
    return data

In [25]:
product_feats_file = 'data-outfits/outfit_product_features_small.tsv'
product_ids_file = 'data-outfits/outfit_products_small.tsv'
outfit_permutations_file = 'data-outfits/outfit_permutations_small.tsv'
prods_to_feats = get_product_to_features(product_feats_file)

In [5]:
len(prods_to_feats['641336745'][0])

150

In [6]:
X_features, X_prods = make_data_inputs(product_ids_file, prods_to_feats)
X_prod_ids = [p[0] for p in X_prods]
X_prod_categories = [p[1] for p in X_prods]

Reading file  data-outfits/outfit_products_small.tsv


In [7]:
X_features = np.array(X_features)
X_features.shape

(18673, 150)

In [26]:
def make_image_sprites(product_file):
    img_data=[]
    data_path = 'data-outfits/images_small'
    with open(product_file, 'r') as f:
        for i, line in enumerate(f.readlines()):
            l = line.split('\t')
            product_id = l[0]
            input_img=cv2.imread(data_path + '/' + product_id + '.jpg')
            input_img_resize=cv2.resize(input_img,(100,100)) # you can choose what size to resize your data
            img_data.append(input_img_resize)
    img_data = np.array(img_data)
    sprite = images_to_sprite(img_data)
    cv2.imwrite(os.path.join(LOG_DIR, 'sprite_classes.png'), sprite)

In [None]:
make_image_sprites(product_ids_file)

In [20]:
embedding_var = tf.Variable(X_features, name=NAME_TO_VISUALISE_VARIABLE)
summary_writer = tf.summary.FileWriter(LOG_DIR)

config = projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = embedding_var.name

# Specify where you find the metadata
embedding.metadata_path = 'product_names_tsne.tsv' #path_for_mnist_metadata 

# Specify where you find the sprite 
embedding.sprite.image_path = 'sprite_classes.png' #path_for_mnist_sprites #'mnistdigits.png'
embedding.sprite.single_image_dim.extend([100, 100])

# Say that you want to visualise the embeddings
projector.visualize_embeddings(summary_writer, config)



In [21]:

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.save(sess, os.path.join(LOG_DIR, "model.ckpt"), 1)


'tensorboard/model.ckpt-1'

In [None]:
# tensorboard --logdir tensorboard
# https://github.com/anujshah1003/Tensorboard-own-image-data-image-features-embedding-visualization/blob/master/own-data-embedding-visualization.py