In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
np.set_printoptions(precision=5)

import pandas as pd
pd.set_option('display.precision', 5)

In [2]:
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans

X_train, _ = make_blobs(random_state=1)
print('X_train.shape:', X_train.shape)

X_train.shape: (100, 2)


In [3]:
kmeans = KMeans(n_clusters=3)
kmeans.fit(X_train)

In [4]:
print(kmeans.cluster_centers_)

[[ -6.58197  -8.17239]
 [ -1.47108   4.33722]
 [-10.04935  -3.85954]]


In [5]:
assignments_X_train = kmeans.labels_
print(assignments_X_train)

[1 2 2 2 0 0 0 2 1 1 2 2 0 1 0 0 0 1 2 2 0 2 0 1 2 0 0 1 1 0 1 1 0 1 2 0 2
 2 2 0 0 2 1 2 2 0 1 1 1 1 2 0 0 0 1 0 2 2 1 1 2 0 0 2 2 0 1 0 1 2 2 2 0 1
 1 2 0 0 1 2 1 2 2 0 1 1 1 1 2 1 0 1 1 2 2 0 0 1 0 1]


In [6]:
X_new, _ = make_blobs()

assignments_X_new = kmeans.predict(X_new)
print(assignments_X_new)

[0 0 0 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 1 0 1 1 1 1 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 0 0 1]


In [7]:
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score

iris = load_iris()
X_train, y_train = iris.data, iris.target
print('X_train.shape:', X_train.shape)

X_train.shape: (150, 4)


In [8]:
kmeans = KMeans(n_clusters=3)
kmeans.fit(X_train)

In [9]:
print(kmeans.cluster_centers_)

[[5.006   3.428   1.462   0.246  ]
 [6.85385 3.07692 5.71538 2.05385]
 [5.88361 2.74098 4.38852 1.43443]]


In [10]:
assignments_X_train = kmeans.labels_
print('adjusted_rand_score: %.5f'%adjusted_rand_score(y_train, assignments_X_train))

adjusted_rand_score: 0.71634


In [11]:
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score

iris = load_iris()
X_train, y_train = iris.data, iris.target
print('X_train.shape:', X_train.shape)

X_train.shape: (150, 4)


In [12]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

In [13]:
kmeans = KMeans(n_clusters=3)
kmeans.fit(X_train_scaled)

In [14]:
print(scaler.inverse_transform(kmeans.cluster_centers_))

[[6.262   2.872   4.906   1.676  ]
 [4.87838 3.25946 1.44054 0.23243]
 [5.36923 3.90769 1.52308 0.28462]]


In [15]:
assignments_X_train_scaled = kmeans.labels_
print('adjusted_rand_score: %.5f'%adjusted_rand_score(y_train, assignments_X_train_scaled))

adjusted_rand_score: 0.46995


In [16]:
from sklearn.datasets import make_blobs
from sklearn.cluster import AgglomerativeClustering

X_train, _ = make_blobs(random_state=1)
print('X_train.shape:', X_train.shape)

X_train.shape: (100, 2)


In [17]:
agg = AgglomerativeClustering(n_clusters=3)
agg.fit(X_train)

In [18]:
assignments_X_train = agg.labels_
print(assignments_X_train)

[0 2 2 2 1 1 1 2 0 0 2 2 1 0 1 1 1 0 2 2 1 2 1 0 2 1 1 0 0 1 0 0 1 0 2 1 2
 2 2 1 1 2 0 2 2 1 0 0 0 0 2 1 1 1 0 1 2 2 0 0 2 1 1 2 2 1 0 1 0 2 2 2 1 0
 0 2 1 1 0 2 0 2 2 1 0 0 0 0 2 0 1 0 0 2 2 1 1 0 1 0]


In [19]:
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import adjusted_rand_score

iris = load_iris()
X_train, y_train = iris.data, iris.target
print('X_train.shape:', X_train.shape)

X_train.shape: (150, 4)


In [20]:
clustering_ari = []

linkage_settings = ['ward', 'average', 'single', 'complete']
for linkage in linkage_settings:
    # build the model
    agg = AgglomerativeClustering(n_clusters=3, linkage=linkage)
    agg.fit(X_train)
    
    # adjusted_random_index on the training set
    assignments_X_train = agg.labels_
    clustering_ari.append(adjusted_rand_score(y_train, assignments_X_train))

In [21]:
pd.DataFrame({'linkage': linkage_settings,
              'ARI': clustering_ari}
            )

Unnamed: 0,linkage,ARI
0,ward,0.7312
1,average,0.7592
2,single,0.56375
3,complete,0.64225


In [22]:
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import adjusted_rand_score

iris = load_iris()
X_train, y_train = iris.data, iris.target
print('X_train.shape:', X_train.shape)

X_train.shape: (150, 4)


In [23]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

In [24]:
clustering_ari = []

linkage_settings = ['ward','average','single','complete']
for linkage in linkage_settings:
    # build the model
    agg = AgglomerativeClustering(n_clusters=3, linkage=linkage)
    agg.fit(X_train_scaled)
    
    # adjusted_random_index on the training set
    assignments_X_train = agg.labels_
    clustering_ari.append(adjusted_rand_score(y_train, assignments_X_train))

In [25]:
pd.DataFrame({'linkage': linkage_settings,
              'ARI': clustering_ari}
            )

Unnamed: 0,linkage,ARI
0,ward,0.61532
1,average,0.56214
2,single,0.55837
3,complete,0.57263
