## Unsupervised learning: seeking representation of the data

### Clustering: grouping observations together

**K-means clustering**

In [17]:
from sklearn import cluster, datasets
X_iris, y_iris = datasets.load_iris(return_X_y=True)

In [18]:
k_means = cluster.KMeans(n_clusters=3)
k_means.fit(X_iris)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=3, n_init=10, n_jobs=1, precompute_distances='auto',
    random_state=None, tol=0.0001, verbose=0)

In [21]:
k_means.labels_[::10]

array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2], dtype=int32)

In [22]:
y_iris[::10]

array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2])

In [23]:
import scipy as sp
try:
    face = sp.face(gray=True)
except AttributeError:
    from scipy import misc
    face = misc.face(gray=True)

In [24]:
X = face.reshape((-1, 1))
k_means = cluster.KMeans(n_clusters=5, n_init=1)
k_means.fit(X)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=5, n_init=1, n_jobs=1, precompute_distances='auto',
    random_state=None, tol=0.0001, verbose=0)

In [26]:
print(face.shape)
print(face)
print(X)

(768, 1024)
[[114 130 145 ... 119 129 137]
 [ 83 104 123 ... 118 134 146]
 [ 68  88 109 ... 119 134 145]
 ...
 [ 98 103 116 ... 144 143 143]
 [ 94 104 120 ... 143 142 142]
 [ 94 106 119 ... 142 141 140]]
[[114]
 [130]
 [145]
 ...
 [142]
 [141]
 [140]]


In [31]:
values = k_means.cluster_centers_.squeeze()
labels = k_means.labels_

In [34]:
import numpy as np
face_compressed = np.choose(labels, values)
face_compressed.shape = face.shape
print(face_compressed.shape)
print(face_compressed)

(768, 1024)
[[109.71975211 148.25700663 148.25700663 ... 109.71975211 148.25700663
  148.25700663]
 [ 71.49771602 109.71975211 109.71975211 ... 109.71975211 148.25700663
  148.25700663]
 [ 71.49771602  71.49771602 109.71975211 ... 109.71975211 148.25700663
  148.25700663]
 ...
 [109.71975211 109.71975211 109.71975211 ... 148.25700663 148.25700663
  148.25700663]
 [109.71975211 109.71975211 109.71975211 ... 148.25700663 148.25700663
  148.25700663]
 [109.71975211 109.71975211 109.71975211 ... 148.25700663 148.25700663
  148.25700663]]


**Connectivity-constrained clustering**

In [36]:
from scipy.ndimage.filters import gaussian_filter

import matplotlib.pyplot as plt

import skimage
from skimage.data import coins
from skimage.transform import rescale

from sklearn.feature_extraction.image import grid_to_graph
from sklearn.cluster import AgglomerativeClustering

In [39]:
orig_coins = coins()

In [40]:
smoothened_coins = gaussian_filter(orig_coins, sigma=2)

**Feature agglomeration**

In [41]:
digits = datasets.load_digits()
images = digits.images
X = np.reshape(images, (len(images), -1))
connectivity = grid_to_graph(*images[0].shape)

In [42]:
agglo = cluster.FeatureAgglomeration(connectivity=connectivity, n_clusters=32)
agglo.fit(X)

FeatureAgglomeration(affinity='euclidean', compute_full_tree='auto',
           connectivity=<64x64 sparse matrix of type '<class 'numpy.int64'>'
	with 288 stored elements in COOrdinate format>,
           linkage='ward', memory=None, n_clusters=32,
           pooling_func=<function mean at 0x10ca37510>)

In [44]:
X_reduced = agglo.transform(X)
X_approx = agglo.inverse_transform(X_reduced)
images_approx = np.reshape(X_approx, images.shape)

### Principal component analysis: PCA

In [47]:
# Create a signal with only 2 useful dimensions
x1 = np.random.normal(size=100)
x2 = np.random.normal(size=100)
x3 = x1 + x2
X = np.c_[x1, x2, x3]
print(x1[:10], x2[:10], x3[:10])
print(X[:10])

[ 0.00888623  0.16479087  2.12727434  1.71355223  1.63674518  0.77503819
 -1.10710398  0.20903618  1.51977496  0.60370425] [-0.93095089  0.72476615 -0.2426903   0.20269196  1.36901722 -1.83578985
  1.21601555 -0.72778767 -0.61191741  0.1205131 ] [-0.92206465  0.88955702  1.88458405  1.91624419  3.0057624  -1.06075166
  0.10891157 -0.51875149  0.90785755  0.72421735]
[[ 0.00888623 -0.93095089 -0.92206465]
 [ 0.16479087  0.72476615  0.88955702]
 [ 2.12727434 -0.2426903   1.88458405]
 [ 1.71355223  0.20269196  1.91624419]
 [ 1.63674518  1.36901722  3.0057624 ]
 [ 0.77503819 -1.83578985 -1.06075166]
 [-1.10710398  1.21601555  0.10891157]
 [ 0.20903618 -0.72778767 -0.51875149]
 [ 1.51977496 -0.61191741  0.90785755]
 [ 0.60370425  0.1205131   0.72421735]]


In [49]:
from sklearn import decomposition 
pca = decomposition.PCA()
pca.fit(X)
print(pca.explained_variance_)

[3.16564232e+00 1.09536551e+00 8.75543105e-32]


In [51]:
# As we can see, only the 2 first components are useful
pca.n_components = 2
X_reduced = pca.fit_transform(X)
print(X_reduced)
X_reduced.shape

[[-1.00353142  0.60591993]
 [ 1.21436336 -0.45644581]
 [ 2.43473398  1.61432402]
 [ 2.47300618  1.00681358]
 [ 3.80665915  0.12668085]
 [-1.17240845  1.7876293 ]
 [ 0.25723907 -1.70238438]
 [-0.50957762  0.60337992]
 [ 1.23835401  1.4468314 ]
 [ 1.01247576  0.28135179]
 [-3.08859078 -2.09113524]
 [ 2.01632848  1.08072888]
 [-1.8148905  -0.99343992]
 [ 0.03362237 -0.65273302]
 [ 0.61056536  1.04696137]
 [ 0.56923497  0.96474434]
 [ 0.71474682  0.95053013]
 [-0.72889572  0.84873062]
 [-0.32140781 -0.40569113]
 [-4.52158451 -0.89600683]
 [ 0.8656881   0.52502407]
 [-1.1491441   0.04846375]
 [ 0.85971791  1.21573184]
 [-2.32551579 -0.09998278]
 [ 1.12675572  0.94327741]
 [ 0.20162727 -0.05663225]
 [-1.56829148 -0.29140343]
 [ 0.02726632 -1.34488363]
 [ 0.01872432 -0.83899062]
 [-0.73011779 -1.14662835]
 [-0.45331462 -0.75217515]
 [ 1.00546099 -1.43054978]
 [-3.54144673  1.34397328]
 [ 1.26551132 -0.27756249]
 [-0.68328381  0.81184471]
 [ 0.28617529 -0.8078148 ]
 [-1.02112755 -1.75724136]
 

(100, 2)

**Independent Component Analysis: ICA**

In [53]:
# Generate sample data
import numpy as np
from scipy import signal
time = np.linspace(0, 10, 2000)
s1 = np.sin(2*time)  # Signal 1 : sinusoidal signal
s2 = np.sign(np.sin(3*time))  # Signal 2 : square signal
s3 = signal.sawtooth(2*np.pi*time)  # Signal 3: saw tooth signal
S = np.c_[s1, s2, s3]
S += 0.2 * np.random.normal(size=S.shape)  # Add noise
S /= S.std(axis=0)  # Standardize data
# Mix data
A = np.array([[1, 1, 1], [0.5, 2, 1], [1.5, 1, 2]])  # Mixing matrix
X = np.dot(S, A.T) # Generate observations

In [54]:
# Compute ICA
ica = decomposition.FastICA()
S_ = ica.fit_transform(X)  # Get the estimated sources
A_ = ica.mixing_.T
np.allclose(X, np.dot(S_, A_) + ica.mean_)

True

In [55]:
S_

array([[ 3.92354981e-05,  3.83796543e-02,  1.25002565e-02],
       [ 4.95033714e-03,  3.36125491e-02, -1.81928265e-02],
       [ 1.43351077e-03,  2.92367839e-02, -1.59301801e-02],
       ...,
       [ 1.58981951e-02, -4.50493138e-02,  1.40377393e-02],
       [ 1.43537144e-02, -3.32467468e-02,  3.03019443e-02],
       [ 3.72095277e-02,  4.47319000e-02,  2.69802230e-02]])

In [56]:
A_

array([[ 44.48704788,  21.33099325,  67.44794734],
       [-45.22514513, -43.9374162 , -90.53016874],
       [-42.08493469, -87.50810177, -40.17562733]])