# <center>`Hartigan index`</center>

# 1. Import required libraries

In [1]:
import river
print('river module version:', river.__version__)

from river.stream import iter_array
from river.metrics.cluster import Hartigan

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn import datasets
from sklearn.cluster import KMeans

from MODULE.ClustersFeatures import ClustersCharacteristics # custom module

river module version: 0.7.0


# 2. Create dataset

In [2]:
features, _ = datasets.make_blobs(n_samples=500, centers=3, random_state=0)

In [3]:
features.shape

(500, 2)

# 3. Perform preprocessing

In [4]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# 4. Perform `K-Means clustering`

In [5]:
kmeans = KMeans(n_clusters=3, n_init='auto', random_state=0)
kmeans.fit(scaled_features)
preds = kmeans.labels_

# 5. Create a dataframe

In [6]:
df = pd.DataFrame(scaled_features)
df['class'] = preds
df.head()

Unnamed: 0,0,1,class
0,0.387897,1.127112,2
1,-1.366036,0.535399,0
2,-1.424717,-0.452852,0
3,0.684388,0.003832,1
4,-0.804022,1.008488,0


# 6. Calculate Hartigan index using `ClustersFeatures` module

In [7]:
cc = ClustersCharacteristics(df, label_target='class')

In [8]:
round(cc.score_between_group_dispersion() / cc.score_pooled_within_cluster_dispersion(), 5)

2.53056

In [9]:
round(np.log(cc.score_between_group_dispersion() / cc.score_pooled_within_cluster_dispersion()), 5)

0.92844

In [10]:
print('Hartigan index using ClustersFeatures module:', round(cc.score_index_log_ss_ratio(), 5))

Hartigan index using ClustersFeatures module: 0.92844


# 7. Calculate Hartigan index using `river` module

In [11]:
cc.data_centroids

class,0,1,2
0,-1.125549,0.888798,0.24754
1,0.127389,-1.062069,1.006024


In [12]:
metric = Hartigan()

In [13]:
for k, l in iter_array(scaled_features, preds):
    clustering_metric = metric.update(k, l, dict(cc.data_centroids))

In [14]:
print('Hartigan index using river module:', round(clustering_metric.get(), 5))

Hartigan index using river module: 0.92844
