## Clustering

In [72]:
from sklearn import cluster, datasets
import scipy as sp
import numpy as np
import matplotlib.pyplot as plt

from sklearn.feature_extraction.image import grid_to_graph
from sklearn.cluster import AgglomerativeClustering

In [73]:
iris = datasets.load_iris()
X_iris = iris.data
y_iris = iris.target

In [74]:
k_means = cluster.KMeans(n_clusters=3)
k_means.fit(X_iris)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=3, n_init=10, n_jobs=1, precompute_distances='auto',
    random_state=None, tol=0.0001, verbose=0)

In [75]:
print(k_means.labels_[::10])

[1 1 1 1 1 0 0 0 0 0 2 2 2 2 2]


In [76]:
print(y_iris[::10])

[0 0 0 0 0 1 1 1 1 1 2 2 2 2 2]


### Vector Quantization

In [77]:
try:
    face = sp.face(gray=True)
except AttributeError:
    from scipy import misc
    face = misc.face(gray=True)

In [78]:
face.shape

(768, 1024)

In [79]:
X = face.reshape((-1, 1))

In [80]:
k_means = cluster.KMeans(n_clusters=5, n_init=1)
k_means.fit(X)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=5, n_init=1, n_jobs=1, precompute_distances='auto',
    random_state=None, tol=0.0001, verbose=0)

In [81]:
values = k_means.cluster_centers_.squeeze()
labels = k_means.labels_

In [82]:
face_compressed = np.choose(labels, values)
face_compressed.shape = face.shape

In [83]:
face_compressed.shape

(768, 1024)

In [84]:
len(face_compressed)

768

## Hierarchical agglomerative clustering

### Connectivity-constrained clustering

In [85]:
try:  # SciPy >= 0.16 have face in misc
    from scipy.misc import face
    face = face(gray=True)
except ImportError:
    face = sp.face(gray=True)
    
face = sp.misc.imresize(face, 0.10) / 255.
X = np.reshape(face, (-1, 1))

connectivity = grid_to_graph(*face.shape)

AttributeError: module 'scipy.misc' has no attribute 'imresize'

## Decompositions

### PCA

In [86]:
# Create a signal with only 2 useful dimensions
x1 = np.random.normal(size=100)
x2 = np.random.normal(size=100)
x3 = x1 + x2
X = np.c_[x1, x2, x3]

In [87]:
from sklearn import decomposition
pca = decomposition.PCA()
pca.fit(X)

PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [88]:
print(pca.explained_variance_)

[3.44124123e+00 1.07093468e+00 7.10736745e-32]


In [89]:
pca.n_components = 2
X_reduced = pca.fit(X)
X_reduced = pca.transform(X)
X_reduced.shape

(100, 2)

### ICA

In [90]:
from scipy import signal

time = np.linspace(0, 10, 2000)
s1 = np.sin(2 * time)  # Signal 1 : sinusoidal signal
s2 = np.sign(np.sin(3 * time))  # Signal 2 : square signal
s3 = signal.sawtooth(2 * np.pi * time)  # Signal 3: saw tooth signal
S = np.c_[s1, s2, s3]
S += 0.2 * np.random.normal(size=S.shape)  # Add noise
S /= S.std(axis=0)  # Standardize data
# Mix data
A = np.array([[1, 1, 1], [0.5, 2, 1], [1.5, 1, 2]])  # Mixing matrix
X = np.dot(S, A.T)  # Generate observations

# Compute ICA
ica = decomposition.FastICA()
S_ = ica.fit_transform(X)  # Get the estimated sources
A_ = ica.mixing_.T
np.allclose(X,  np.dot(S_, A_) + ica.mean_)



True