In [11]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import os
import csv
from tensorflow.contrib.tensorboard.plugins import projector
from tensorflow.examples.tutorials.mnist import input_data
import cv2


LOG_DIR = 'tensorboard_outfits'
METADATA_FILE = LOG_DIR + '/outfits_metadata.tsv'
NAME_TO_VISUALISE_VARIABLE = "style_embedding"
TO_EMBED_COUNT = 500
product_feats_file = 'data-outfits/outfit_product_features_small.tsv'
product_ids_file = 'data-outfits/outfit_products_small.tsv'
outfit_combinations_file = 'data-outfits/outfit_combinations_small.tsv'
IMAGE_SIZE=250

In [2]:
def get_product_to_features(product_feats_file):
    product_to_feats = {}
    rf_feature_import_file = 'rf_feat_import.dat'
    all_feat_importances = np.load(rf_feature_import_file)
    top_features = sorted(list(zip(range(0, 1024), all_feat_importances)), key=lambda tup: tup[1], reverse=True)
    bottom_feature_indexes = [f[0] for f in top_features[100:]]
    with open(product_feats_file, 'r') as tsvfile:
        tsvreader = csv.reader(tsvfile, delimiter='\t')
        for row in tsvreader:
            product_id = row[0]
            feats_stored = [float(n) for n in row[1:]]
            feats_reduced = np.delete(feats_stored, bottom_feature_indexes)
            feats = np.array([feats_reduced])
            product_to_feats[product_id] = feats
    return product_to_feats


def make_data_inputs(outfit_combo_file, product_to_features):
    print('Reading file ', outfit_combo_file)
    X_product_features = []
    metadata = []
    with open(outfit_combo_file, 'r') as f:
        for line in f:
            l = line.split()
            product1_id = l[2]
            product2_id = l[5]
            #product3_id = l[8]
            collection_id = l[1]
            product1_feat = product_to_features[product1_id]
            product2_feat = product_to_features[product2_id]
            #product3_feat = product_to_features[product3_id]

            merged_feat = np.concatenate((product1_feat, product2_feat), axis=1)
            metadata.append([collection_id, ])
            X_product_features.append(merged_feat[0])
            
    print('Saving Metadata')
    with open(METADATA_FILE, 'w') as f:
        writer = csv.writer(f, delimiter='\t')
        #writer.writerow(['Id'])
        for m in metadata:
            writer.writerow(m)
            f.flush()
    return np.array(X_product_features), metadata

In [3]:
# Taken from: https://github.com/tensorflow/tensorflow/issues/6322
def images_to_sprite(data):
    """Creates the sprite image along with any necessary padding
    Args:
      data: NxHxW[x3] tensor containing the images.
    Returns:
      data: Properly shaped HxWx3 image with any necessary padding.
    """
    if len(data.shape) == 3:
        data = np.tile(data[...,np.newaxis], (1,1,1,3))
    data = data.astype(np.float32)
    min = np.min(data.reshape((data.shape[0], -1)), axis=1)
    data = (data.transpose(1,2,3,0) - min).transpose(3,0,1,2)
    max = np.max(data.reshape((data.shape[0], -1)), axis=1)
    data = (data.transpose(1,2,3,0) / max).transpose(3,0,1,2)
    # Inverting the colors seems to look better for MNIST
    #data = 1 - data

    n = int(np.ceil(np.sqrt(data.shape[0])))
    padding = ((0, n ** 2 - data.shape[0]), (0, 0),
            (0, 0)) + ((0, 0),) * (data.ndim - 3)
    data = np.pad(data, padding, mode='constant',
            constant_values=0)
    # Tile the individual thumbnails into an image.
    data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3)
            + tuple(range(4, data.ndim + 1)))
    data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
    data = (data * 255).astype(np.uint8)
    return data

In [4]:
prods_to_feats = get_product_to_features(product_feats_file)

In [5]:
len(prods_to_feats['641336745'][0])

100

In [6]:
X_features, X_collection_ids = make_data_inputs(outfit_combinations_file, prods_to_feats)
# X_prod_ids = [p[0] for p in X_prods]
# X_prod_categories = [p[1] for p in X_prods]
X_features.shape

Reading file  data-outfits/outfit_combinations_small.tsv
Saving Metadata


(1000, 200)

In [7]:
def make_image_sprites(outfit_combo_file):
    img_data=[]
    data_path = 'data-outfits/images_collections'
    with open(outfit_combo_file, 'r') as f:
        for line in f:
            l = line.split()
            collection_id = l[1]
            input_img=cv2.imread(data_path + '/' + collection_id + '.jpg')
            input_img_resize=cv2.resize(input_img,(IMAGE_SIZE,IMAGE_SIZE)) # you can choose what size to resize your data
            img_data.append(input_img_resize)
    img_data = np.array(img_data)
    sprite = images_to_sprite(img_data)
    cv2.imwrite(os.path.join(LOG_DIR, 'sprite_classes.png'), sprite)

In [8]:
make_image_sprites(outfit_combinations_file)

In [9]:
embedding_var = tf.Variable(X_features, name=NAME_TO_VISUALISE_VARIABLE)
summary_writer = tf.summary.FileWriter(LOG_DIR)

config = projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = embedding_var.name

# Specify where you find the metadata
embedding.metadata_path = METADATA_FILE

# Specify where you find the sprite 
embedding.sprite.image_path = 'sprite_classes.png' #path_for_mnist_sprites #'mnistdigits.png'
embedding.sprite.single_image_dim.extend([IMAGE_SIZE, IMAGE_SIZE])

# Say that you want to visualise the embeddings
projector.visualize_embeddings(summary_writer, config)

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.save(sess, os.path.join(LOG_DIR, "model.ckpt"), 1)


'tensorboard_outfits/model.ckpt-1'

In [10]:
# tensorboard --logdir tensorboard_outfits
# https://github.com/anujshah1003/Tensorboard-own-image-data-image-features-embedding-visualization/blob/master/own-data-embedding-visualization.py