In [None]:
from sklearn.datasets import make_circles
from sklearn.datasets import make_moons
from sklearn.datasets import make_swiss_roll
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans

In [None]:
# specify number of samples
no_samples = 1500 # vary if you want
noise_factor=.05 # vary if you want; e.g. .5 -> circles become one

# Create the two circles dataset, add some noise
X_circle, _ = make_circles(n_samples=no_samples, factor=.5, noise=noise_factor)

kmeans = KMeans(n_clusters = 2, random_state = 42) #you can vary the random_state but you will never get cluster 1 for the center circle and cluster 2 for the extern circle
kmeans.fit(X_circle)

plt.scatter(X_circle[:, 0], X_circle[:, 1],c = kmeans.labels_)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 50, c = 'green')

In [None]:
# create two half moons and try to fit them with kmeans
X_moon,_ = make_moons(n_samples=no_samples, noise=noise_factor)


kmeans = KMeans(n_clusters = 2, random_state = 1) #you can vary the random_state but you will never get cluster 1 for the half moon left and cluster 2 for half moon right
kmeans.fit(X_moon)

plt.scatter(X_moon[:, 0], X_moon[:, 1],c = kmeans.labels_)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 50, c = 'green')

In [None]:
# Try out the datasets with agglomerative clustering and different linkages (single, average, complete)
# Here exemplary for single linkage; you can vary the noise level as well, the rest is quite straightforward
# scikit learn also provides broad tutorials on clustering topics including code that simply collects all possible
# procedures into one executable loop
from sklearn.cluster import AgglomerativeClustering

aggloCluster_single=AgglomerativeClustering(n_clusters=2, linkage='single')
aggloCluster_single.fit(X_moon)


plt.scatter(X_moon[:, 0], X_moon[:, 1],c = aggloCluster_single.labels_)
plt.title('Hierarchical clustering with single linkage for half moon data')

In [None]:
# Try out the datasets with the DBSCAN algorithm
from sklearn.cluster import DBSCAN
dbscan=DBSCAN(algorithm='auto', eps=0.3, leaf_size=30, metric='euclidean',
    metric_params=None, min_samples=20, n_jobs=None, p=None)

dbscan.fit(X_moon)
plt.scatter(X_moon[:, 0], X_moon[:, 1],c = dbscan.labels_)

In [None]:
# Here an example showing that DBSCAN identifies outlier -> increase in noise level
X_moon_noisy,_ = make_moons(n_samples=no_samples, noise=0.1)
dbscan=DBSCAN(algorithm='auto', eps=0.1, leaf_size=30, metric='euclidean',
    metric_params=None, min_samples=10, n_jobs=None, p=None)

dbscan.fit(X_moon_noisy)
plt.scatter(X_moon_noisy[:, 0], X_moon_noisy[:, 1],c = dbscan.labels_)

In [None]:
%matplotlib notebook
X_swiss, y = make_swiss_roll(no_samples, noise=noise_factor)

kmeans = KMeans(n_clusters = 6, random_state = 42) #for six clusters here good results; focus here is that when projecting this roll into 2D, local neighbourhood should be preserved (geodesic distance, embedding learning, see ML)
kmeans.fit(X_swiss)

# Plot result
import mpl_toolkits.mplot3d.axes3d as p3
fig = plt.figure()
ax = p3.Axes3D(fig)
ax.view_init(7, -80)
for l in np.unique(y):
    ax.scatter(X_swiss[y == l, 0], X_swiss[y == l, 1], X_swiss[y == l, 2],
               color=plt.cm.jet(float(l) / np.max(y + 1)),
               s=20, edgecolor='k')
plt.show()