# Clustering

## Generate data

In [1]:
import numpy as np
from numpy.random import normal

np.random.seed(37)

def get_cluster_data(means, variances, label, N=1000):
    X = np.hstack([normal(m, v, N).reshape(-1, 1) for m, v in zip(means, variances)])
    y = np.full(N, label, dtype=np.int)
    
    return X, y

X1, y1 = get_cluster_data([5.0, 5.0], [1.0, 1.0], 0)
X2, y2 = get_cluster_data([6.0, 6.0], [1.0, 1.0], 1)

X = np.vstack([X1, X2])
y = np.hstack([y1, y2])

## Types of clustering

### K-means

In [2]:
from sklearn.cluster import KMeans

km = KMeans(n_clusters=2, random_state=37)
km.fit(X)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=2, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=37, tol=0.0001, verbose=0)

### Affinity propagation

In [3]:
from sklearn.cluster import AffinityPropagation

ap = AffinityPropagation()
ap.fit(X)

AffinityPropagation(affinity='euclidean', convergence_iter=15, copy=True,
                    damping=0.5, max_iter=200, preference=None, verbose=False)

### Mean-shift

In [4]:
from sklearn.cluster import MeanShift

ms = MeanShift()
ms.fit(X)

MeanShift(bandwidth=None, bin_seeding=False, cluster_all=True, min_bin_freq=1,
          n_jobs=None, seeds=None)

### Spectral

In [5]:
from sklearn.cluster import SpectralClustering

sc = SpectralClustering(n_clusters=2, random_state=37)
sc.fit(X)

SpectralClustering(affinity='rbf', assign_labels='kmeans', coef0=1, degree=3,
                   eigen_solver=None, eigen_tol=0.0, gamma=1.0,
                   kernel_params=None, n_clusters=2, n_init=10, n_jobs=None,
                   n_neighbors=10, random_state=37)

### Agglomerative

In [6]:
from sklearn.cluster import AgglomerativeClustering

ac = AgglomerativeClustering(n_clusters=2)
ac.fit(X)

AgglomerativeClustering(affinity='euclidean', compute_full_tree='auto',
                        connectivity=None, distance_threshold=None,
                        linkage='ward', memory=None, n_clusters=2,
                        pooling_func='deprecated')

### DBSCAN

### OPTICS

### Gaussian mixture models

### Birch