# t-SNE Explorations

## Initialization
github: https://github.com/gauss256/t-SNE/blob/master/t-SNE%20Explorations.ipynb

interactive notebook: https://mybinder.org/


In [None]:
%matplotlib notebook

import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
from   sklearn.decomposition import PCA
from   sklearn.manifold import TSNE, Isomap, LocallyLinearEmbedding, MDS, SpectralEmbedding
from   sklearn.preprocessing import StandardScaler

np.set_printoptions(suppress=True)
np.set_printoptions(precision=4)
plt_style = 'seaborn-talk'

# Experimental: HDBScan is a state-of-the-art clustering algorithm
hdbscan_available = True
try:
    import hdbscan
except ImportError:
    hdbscan_available = False

## Plotting function

In [None]:
def do_plot(X_fit, title, labels):
    dimension = X_fit.shape[1]
    label_types = sorted(list(set(labels)))
    num_labels = len(label_types)
    colors = cm.Accent(np.linspace(0, 1, num_labels))
    with plt.style.context(plt_style):
        fig = plt.figure()
        if dimension == 2:
            ax = fig.add_subplot(111)
            for lab, col in zip(label_types, colors):
                ax.scatter(X_fit[labels==lab, 0],
                           X_fit[labels==lab, 1],
                           c=[col])
        elif dimension == 3:
            ax = fig.add_subplot(111, projection='3d')
            for lab, col in zip(label_types, colors):
                ax.scatter(X_fit[labels==lab, 0],
                           X_fit[labels==lab, 1],
                           X_fit[labels==lab, 2],
                           c=[col])
        else:
            raise Exception('Unknown dimension: %d' % dimension)
        plt.title(title)
        plt.show()

## Generate data

In [None]:
data = [(	-0.137163,-0.390162,0    ) ,
(	-1.091668,-0.915984,0    ) ,
(	-0.400630,-0.733990,1    ) ,
(	1.455796	,-1.655627,1   ) ,
(	0.236798	,-0.841211,2   ) ,
(	-2.608761,-2.829113,2    ) ,
(	0.783135	,-0.303087,3   ) ,
(	0.787389	,1.011995	,3   ) ,
(1,2,1)]

data_df = pd.DataFrame(data, columns=('x','y','label'))
data_df

## Prep the data for fitting and visualizing

In [None]:
X = data_df.iloc[:,0:-1]
X_std = StandardScaler().fit_transform(X)
y = data_df['label'].values

In [None]:
X.values[:,0:-1].shape

## Dumb 2D projection

In [None]:
do_plot(X.values, 'Plane $x=0$', y)

## t-SNE (2D)

In [None]:
tsne2 = TSNE(n_components=2, random_state=0)
do_plot(tsne2.fit_transform(X), 't-SNE', y)