In [4]:
import numpy as np
np.random.seed(0)
import pandas as pd
import hvplot.pandas
from sklearn import datasets
import warnings

In [5]:
warnings.filterwarnings('ignore')

### Build the Dataset

In [2]:
X, y = datasets.make_moons(n_samples=(500), noise=0.05, random_state=1)
X[0:10]

array([[ 0.26990344, -0.08961617],
       [ 0.65960878, -0.44401893],
       [ 0.85049952,  0.56270289],
       [ 0.60950684,  0.69134729],
       [ 2.00353027,  0.19446353],
       [ 1.98790193,  0.40053406],
       [ 0.24847592, -0.18050231],
       [ 0.94871933,  0.37843451],
       [-0.96065183, -0.10227327],
       [ 0.44418573,  0.90246804]])

### Fit and predict a K-Means Model

In [8]:
from sklearn.cluster import KMeans,AgglomerativeClustering,Birch
k_model = KMeans(n_clusters=3,random_state=0)
k_model.fit(X)
k_predictions = k_model.predict(X)

### Fit and Predict Birch and Agglomerative models

In [9]:
b_model = Birch(n_clusters=2)
b_model.fit(X)
b_predictions = b_model.predict(X)

In [10]:
a_model = AgglomerativeClustering(n_clusters=3)
a_predictions = a_model.fit_predict(X)

### Plot Model Predictions for Birch

In [11]:
b_predictions_df = pd.DataFrame(X)
b_predictions_df['Birch predict'] = b_predictions
b_predictions_df

Unnamed: 0,0,1,Birch predict
0,0.269903,-0.089616,0
1,0.659609,-0.444019,0
2,0.850500,0.562703,0
3,0.609507,0.691347,0
4,2.003530,0.194464,0
...,...,...,...
495,0.024589,0.392571,1
496,0.673013,0.715418,0
497,1.277861,-0.457502,0
498,-0.980544,0.194562,1


In [12]:
b_predictions_df.hvplot.scatter(
    x='0',
    y='1',
    by='Birch predict'
)

### Estimate Scores for two Versions of the Birch Model

In [13]:
B2_clusters = Birch(n_clusters=2)
B2_clusters.fit(X)
B2_predict = B2_clusters.predict(X)

In [14]:
B3_clusters = Birch(n_clusters=3)
B3_clusters.fit(X)
B3_predict = B3_clusters.predict(X)

In [15]:
from sklearn import metrics
labels = B2_clusters.labels_
score = metrics.calinski_harabasz_score(X, labels)  
score

588.1123857523019

In [16]:
labels = B3_clusters.labels_
score = metrics.calinski_harabasz_score(X, labels)  
score

654.2904571777168