## Preamble

In [1]:
import numpy as np
import h5py
import math
import scipy.spatial.distance as scpd
from scipy import linalg
from sklearn.manifold.t_sne import _joint_probabilities
from scipy.spatial.distance import pdist
from scipy.spatial.distance import squareform
from sklearn.datasets import load_digits
from sklearn.manifold import TSNE
from sklearn.metrics import pairwise_distances
from matplotlib import pyplot as plt

from IPython import get_ipython
get_ipython().run_line_magic('matplotlib', 'qt')

f = h5py.File("NLO_3B_for_singV.h5")



In [11]:
alphas = f['alphas'][:]
np.savetxt('alphas_3_plot.out', alphas, delimiter='   ')
alphas.shape[0]

100

In [3]:
%matplotlib qt
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(alphas[:,0],alphas[:,1],alphas[:,2], c=np.arange(100), marker='o')
# fig = plt.figure()
# ax = fig.add_subplot(111, projection='3d')
# ax.scatter(alphas[:,0],alphas[:,1],alphas[:,2],marker='o')
# plt.show()

<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7f9db40dc160>

In [58]:
n_components = 2
perplexity = 0.01
MACHINE_EPSILON = np.finfo(np.double).eps

In [59]:
def fit(X):
    n_samples = X.shape[0]
    
    # Compute euclidean distance
    distances = pairwise_distances(X, metric='euclidean', squared=True)
    
    # Compute joint probabilities p_ij from distances.
    P = _joint_probabilities(distances=distances, desired_perplexity=perplexity, verbose=False)
    
    # The embedding is initialized with iid samples from Gaussians with standard deviation 1e-4.
    X_embedded = 1e-4 * np.random.mtrand._rand.randn(n_samples, n_components).astype(np.float32)
    
    # degrees_of_freedom = n_components - 1 comes from
    # "Learning a Parametric Embedding by Preserving Local Structure"
    # Laurens van der Maaten, 2009.
    degrees_of_freedom = max(n_components - 1, 1)
    
    return _tsne(P, degrees_of_freedom, n_samples, X_embedded=X_embedded)

In [60]:
def _kl_divergence(params, P, degrees_of_freedom, n_samples, n_components):
    X_embedded = params.reshape(n_samples, n_components)
    
    dist = pdist(X_embedded, "sqeuclidean")
    dist /= degrees_of_freedom
    dist += 1.
    dist **= (degrees_of_freedom + 1.0) / -2.0
    Q = np.maximum(dist / (2.0 * np.sum(dist)), MACHINE_EPSILON)
    
    # Kullback-Leibler divergence of P and Q
    kl_divergence = 2.0 * np.dot(P, np.log(np.maximum(P, MACHINE_EPSILON) / Q))
    
    # Gradient: dC/dY
    grad = np.ndarray((n_samples, n_components), dtype=params.dtype)
    PQd = squareform((P - Q) * dist)
    for i in range(n_samples):
        grad[i] = np.dot(np.ravel(PQd[i], order='K'),
                         X_embedded[i] - X_embedded)
    grad = grad.ravel()
    c = 2.0 * (degrees_of_freedom + 1.0) / degrees_of_freedom
    grad *= c
    
    return kl_divergence, grad

In [61]:
def _tsne(P, degrees_of_freedom, n_samples, X_embedded):
    
    params = X_embedded.ravel()
    
    obj_func = _kl_divergence
    
    params = _gradient_descent(obj_func, params, [P, degrees_of_freedom, n_samples, n_components])
        
    X_embedded = params.reshape(n_samples, n_components)
    
    return X_embedded

In [62]:
def _gradient_descent(obj_func, p0, args, it=0, n_iter=1000,
                      n_iter_check=1, n_iter_without_progress=300,
                      momentum=0.8, learning_rate=200.0, min_gain=0.01,
                      min_grad_norm=1e-7):
    
    p = p0.copy().ravel()
    update = np.zeros_like(p)
    gains = np.ones_like(p)
    error = np.finfo(np.float).max
    best_error = np.finfo(np.float).max
    best_iter = i = it
    
    for i in range(it, n_iter):
        error, grad = obj_func(p, *args)
        grad_norm = linalg.norm(grad)
        inc = update * grad < 0.0
        dec = np.invert(inc)
        gains[inc] += 0.2
        gains[dec] *= 0.8
        np.clip(gains, min_gain, np.inf, out=gains)
        grad *= gains
        update = momentum * update - learning_rate * grad
        p += update
        
        print("[t-SNE] Iteration %d: error = %.7f,"
                      " gradient norm = %.7f"
                      % (i + 1, error, grad_norm))
        
        if error < best_error:
                best_error = error
                best_iter = i
        elif i - best_iter > n_iter_without_progress:
            break
        
        if grad_norm <= min_grad_norm:
            break
            
        return p

In [63]:
X_embedded = fit(alphas)

[t-SNE] Iteration 1: error = 4.3072240, gradient norm = 0.0000503


In [64]:
plt.plot(X_embedded[:,0], X_embedded[:,1],'o')

[<matplotlib.lines.Line2D at 0x7f9da88060f0>]