In [None]:
from sklearn.manifold import TSNE
from PIL import Image
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
model_name = 'curriculum-adversarial-8-k'
embedding_path = '../data/DeepFashion/vectors/' + model_name + '.tsv.gz'
article_embeddings = pd.read_csv(embedding_path, sep='\t')
article_embeddings.head()

In [None]:
sample = article_embeddings.sample(10000, random_state=42)
images = sample.image.values
x = sample.iloc[:,1:].values
tsne = TSNE(n_components=2, learning_rate=100, perplexity=16, n_iter=5000, early_exaggeration=48, metric='cosine', random_state=42).fit_transform(x)
tx, ty = tsne[:,0], tsne[:,1]
tx = (tx-np.min(tx)) / (np.max(tx) - np.min(tx))
ty = (ty-np.min(ty)) / (np.max(ty) - np.min(ty))
plt.scatter(tx, ty)

In [None]:
height = 10000
width = int(height*1.6)

max_dim = 190
padding = 100
spacing = 10

grid = True

visible_imgs = 0
pos_visited = set()
full_image = Image.new('RGB', (width+padding*2, height+padding*2), (255, 255, 255))

for img, x, y in zip(images, tx, ty):
    x_pos = int((width - tile.width) * x)
    y_pos = int((height - tile.height) * (1-y))
    
    if grid:
        x_pos = x_pos - (x_pos % (max_dim + spacing))
        y_pos = y_pos - (y_pos % (max_dim + spacing))
        
    pos = (x_pos + padding, y_pos + padding)
    
    if pos not in pos_visited:
        tile = Image.open('../data/DeepFashion/' + img)
        rs = max(1, tile.width/max_dim, tile.height/max_dim)
        tile = tile.resize((int(tile.width/rs), int(tile.height/rs)), Image.ANTIALIAS)
        full_image.paste(tile, pos)
        pos_visited.add(pos)
        visible_imgs += 1

full_image.save('../results/figures/tsne/' + model_name + '.jpg')
full_image

In [None]:
visible_imgs