In [74]:
print(__doc__)

from time import time
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.ticker import NullFormatter

from sklearn import manifold, datasets

# Next line to silence pyflakes. This import is needed.
Axes3D
# FILENAME here

filename="../preprocessed/los.csv"

f = open(filename)
f.readline()  # skip the header
data =  np.loadtxt(fname = f, delimiter = ',',dtype='double')

label = data[:,1]
X = data[:, 2:data.shape[1]]
# Dataset has Label at index 1

# Reading the labels now
f= open(filename)
labels_=np.loadtxt(fname=f,delimiter=',',dtype='string')
labels_=labels_[0,1:]

Automatically created module for IPython interactive environment


In [75]:
print 'X: ',X
print 'labels: ',label

X:  [[  2.  60.   2. ...,   1.   1.   1.]
 [  2.  52.   2. ...,   1.   1.   1.]
 [  1.  54.   1. ...,   1.   1.   1.]
 ..., 
 [  1.  65.   1. ...,   3.   3.   3.]
 [  2.  65.   1. ...,   1.   1.   1.]
 [  2.  49.   1. ...,   2.   1.   2.]]
labels:  [ 3.  4.  4. ...,  4.  5.  3.]


#### Normalizing to Zero Mean Unit Variance

In [76]:
mean = X.mean(axis=0)
std = X.std(axis=0)
X = (X - mean) / std

  app.launch_new_instance()


In [77]:
X=X.astype('float64')
print 'X: ',X

X:  [[ 0.5585039   0.46364645  1.12915898 ..., -1.06976351 -0.7998895
  -1.07366621]
 [ 0.5585039  -0.57000632  1.12915898 ..., -1.06976351 -0.7998895
  -1.07366621]
 [-1.79049779 -0.31159313 -0.88561489 ..., -1.06976351 -0.7998895
  -1.07366621]
 ..., 
 [-1.79049779  1.10967943 -0.88561489 ...,  1.71523849  2.63654566
   1.71424528]
 [ 0.5585039   1.10967943 -0.88561489 ..., -1.06976351 -0.7998895
  -1.07366621]
 [ 0.5585039  -0.95762611 -0.88561489 ...,  0.32273749 -0.7998895
   0.32028953]]


In [78]:
prev=0;
for i in range(X.shape[1]):
    for j in range(X.shape[0]):
        if (np.isnan(X[j,i])):
            X[j,i]=0

### Validate No. of neighbors & Components

#### Restriction: n_neighbors > [n_components * (n_components + 3) / 2]

** The no. of neighbors is to be validated **

In [79]:
n_neighbors = 15
n_components = 2

## Locally Linear Embeddings and Variations

In [80]:

fig = plt.figure(figsize=(18, 12))
plt.suptitle("Trying out different Manifold Techniques", fontsize=14)

## Some compatibility code below per Sklearn Example
try:
    # compatibility matplotlib < 1.0
    ax = fig.add_subplot(251, projection='3d')
    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=label, cmap=plt.cm.Spectral)
    ax.view_init(4, -72)
except:
    ax = fig.add_subplot(251, projection='3d')
    plt.scatter(X[:, 0], X[:, 2], c=label, cmap=plt.cm.Spectral)

methods = ['standard', 'ltsa', 'hessian', 'modified']
labels = ['LLE', 'LTSA', 'Hessian LLE', 'Modified LLE']

for i, method in enumerate(methods):
    t0 = time()
    Y = manifold.LocallyLinearEmbedding(n_neighbors, n_components,
                                        eigen_solver='dense',
                                        method=method).fit_transform(X)
    t1 = time()
    print("%s: %.2g sec" % (methods[i], t1 - t0))

    ax = fig.add_subplot(252 + i)
    plt.scatter(Y[:, 0], Y[:, 1], c=label, cmap=plt.cm.Spectral)
    plt.title("%s (%.2g sec)" % (labels[i], t1 - t0))
    ax.xaxis.set_major_formatter(NullFormatter())
    ax.yaxis.set_major_formatter(NullFormatter())
    plt.axis('tight')


standard: 0.52 sec
ltsa: 0.77 sec
hessian: 0.9 sec
modified: 0.76 sec


## Isomap Learning

In [81]:
t0 = time()
Y = manifold.Isomap(n_neighbors, n_components).fit_transform(X)
t1 = time()
print("Isomap: %.2g sec" % (t1 - t0))
ax = fig.add_subplot(257)
plt.scatter(Y[:, 0], Y[:, 1], c=label, cmap=plt.cm.Spectral)
plt.title("Isomap (%.2g sec)" % (t1 - t0))
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
plt.axis('tight')

Isomap: 3.1 sec


(-19.545780304539473,
 18.873987702830298,
 -15.633952117573005,
 25.627832982654684)

## Multi-dimensional Scaling (MDS)

In [82]:

t0 = time()
mds = manifold.MDS(n_components, max_iter=100, n_init=1)
Y = mds.fit_transform(X)
t1 = time()
print("MDS: %.2g sec" % (t1 - t0))
ax = fig.add_subplot(258)
plt.scatter(Y[:, 0], Y[:, 1], c=label, cmap=plt.cm.Spectral)
plt.title("MDS (%.2g sec)" % (t1 - t0))
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
plt.axis('tight')

MDS: 6.5 sec


(-25.964452524697073,
 33.349313371106668,
 -30.658623292444005,
 24.734142333470906)

## Spectral Embeddings

In [83]:

t0 = time()
se = manifold.SpectralEmbedding(n_components=n_components,
                                n_neighbors=n_neighbors)
Y = se.fit_transform(X)
t1 = time()
print("SpectralEmbedding: %.2g sec" % (t1 - t0))
ax = fig.add_subplot(259)
plt.scatter(Y[:, 0], Y[:, 1], c=label, cmap=plt.cm.Spectral)
plt.title("SpectralEmbedding (%.2g sec)" % (t1 - t0))
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
plt.axis('tight')


SpectralEmbedding: 1.9 sec


(-0.13364700397732138,
 1.1025395641478406,
 -0.46371817229191514,
 0.48239151558541471)

## t-Distributed Stochastic Neighbor Embedding (t-SNE)

In [None]:
t0 = time()
tsne = manifold.TSNE(n_components=n_components, init='pca', random_state=0)
Y = tsne.fit_transform(X)
t1 = time()
print("t-SNE: %.2g sec" % (t1 - t0))
ax = fig.add_subplot(2, 5, 10)
plt.scatter(Y[:, 0], Y[:, 1], c=label, cmap=plt.cm.Spectral)
plt.title("t-SNE (%.2g sec)" % (t1 - t0))
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
plt.axis('tight')

t-SNE: 14 sec


(-27.572424532981387,
 21.700036500087339,
 -22.354509702234321,
 24.148444640427904)

In [None]:
plt.show()