This converts `fingerprints.npy` to `.tsv` formatted t-SNE embeddings and plots of those embeddings in the `tsne/` and `plot/` folders respectively. If you add multiple values to `perplexity` and `initial_dims` then all combinations will be computed (in parallel). Good perplexities are in the range 1-200 with the best range around 30-100. Good `initial_dims` are in the range 30 and higher, with the dimensionality of your input data being the highest possible value (e.g., a 32x32 fingerprint would have a highest possible `initial_dims` value of 32x32=1024).

In [None]:
data_root = 'data/drums/'
initial_dims = [30]
perplexities = [30]

In [None]:
%matplotlib inline
from matplotlib import pyplot as plt
from time import time
from utils import *
from os.path import join
from multiprocessing import Pool
import numpy as np
import itertools

In [None]:
def save_tsv(data, fn):
    np.savetxt(fn, data, fmt='%.5f', delimiter='\t')
def tsne(data, data_root, prefix, initial_dims=30, perplexity=30):
    mkdir_p(data_root + 'tsne')
    mkdir_p(data_root + 'plot')
    
    figsize = (16,16)
    pointsize = 2

    X_2d = list(bh_tsne(data, initial_dims=initial_dims, perplexity=perplexity, no_dims=2))
    X_2d = normalize(np.array(X_2d))
    save_tsv(X_2d, join(data_root, 'tsne/{}.{}.{}.2d.tsv'.format(prefix, initial_dims, perplexity)))
    
    plt.figure(figsize=figsize)
    plt.scatter(X_2d[:,0], X_2d[:,1], edgecolor='', s=pointsize)
    plt.tight_layout()
    plt.savefig(join(data_root, 'plot/{}.{}.{}.png'.format(prefix, initial_dims, perplexity)))
    plt.close()
    
    X_3d = list(bh_tsne(data, initial_dims=initial_dims, perplexity=perplexity, no_dims=3))
    X_3d = normalize(np.array(X_3d))
    save_tsv(X_3d, join(data_root, 'tsne/{}.{}.{}.3d.tsv'.format(prefix, initial_dims, perplexity)))
    
    plt.figure(figsize=figsize)
    plt.scatter(X_2d[:,0], X_2d[:,1], edgecolor='', s=pointsize, c=X_3d)
    plt.tight_layout()
    plt.savefig(join(data_root, 'plot/{}.{}.{}.png'.format(prefix, initial_dims, perplexity)))
    plt.close()

In [None]:
prefix = 'fingerprints'
data = np.load(join(data_root, 'fingerprints.npy')).astype(np.float64)
data = data.reshape(len(data), -1) # flatten the data

In [None]:
# data = np.load(data_root + 'encoded_mc.npy').astype(np.float64)
# data -= data.min()
# data /= data.max()

In [None]:
# images = np.load(data_root + 'images.half.cqt.1.npy')
# encoded = np.load(data_root + 'encoded_mc.npy')
# predicted = np.load(data_root + 'predicted_mc.npy')
# images = images.reshape(len(images), -1)
# images_norm = images / images.std()
# encoded_std = encoded.std(axis=0)
# encoded_norm = encoded[:, encoded_std > 0] / encoded_std[encoded_std > 0]
# predicted_norm = predicted / predicted.std()
# combined = np.hstack((images_norm, encoded_norm, predicted_norm))
# data = combined.astype(np.float64).astype(np.float64)

In [None]:
def job(params):
    start = time()
    tsne(data, data_root, prefix, initial_dims=params[0], perplexity=params[1])
    print 'initial_dims={}, perplexity={}, {} seconds'.format(params[0], params[1], time() - start)
params = list(itertools.product(initial_dims, perplexities))
pool = Pool()
pool.map(job, params)