# Milligan 1980 Ward-based Algorithm

In [6]:
import imports

import numpy as np
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import KMeans
import sklearn.datasets as skdatasets

from initialisations import milligan1980 as milligan
from metrics import ari

### Config

In [7]:
dataset = skdatasets.load_iris()
data = dataset.data
target = dataset.target

K = 3

### Run Ward to get initial labels and ARI

In [8]:
ward = AgglomerativeClustering(n_clusters=K, linkage='ward')
ward.fit(data)
U = ward.labels_

print(U)
print("ARI:", ari.score(target, U))

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 2 2 2 0 2 2 2 2
 2 2 0 0 2 2 2 2 0 2 0 2 0 2 2 0 0 2 2 2 2 2 0 0 2 2 2 0 2 2 2 0 2 2 2 0 2
 2 0]
ARI: 0.7311985567707745


### Calculate means to get initial centroids

In [9]:
Z_init = np.zeros((K, dataset.data.shape[1]))

for k in range(0, K):
    Z_init[k,:] = np.mean(data[U==k], axis=0)
    
print(Z_init)

[[5.9203125  2.7515625  4.4203125  1.434375  ]
 [5.006      3.428      1.462      0.246     ]
 [6.86944444 3.08611111 5.76944444 2.10555556]]


### Run K-Means and get ARI

In [12]:
est = KMeans(n_clusters=K, n_init=1, init=Z_init)
est.fit(data)

print(est.labels_)
print("ARI:", ari.score(target, est.labels_))

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 2 2 2 0 2 2 2 2
 2 2 0 0 2 2 2 2 0 2 0 2 0 2 2 0 0 2 2 2 2 2 0 2 2 2 2 0 2 2 2 0 2 2 2 0 2
 2 0]
ARI: 0.7302382722834697
