# K-Means trial run

## Initial setup and configuration

Import statements:

In [41]:
#import numpy as np
import sklearn.datasets as skdatasets
import sklearn.cluster as skcluster
import sklearn.metrics as skmetrics
import kmeans
import utils
from initialisations import random, ikmeans, erisoglu

Set up the options for this run:

In [43]:
args = {
    'dataset':'iris',
    #'dataset':'wine',
    #'dataset':'bc',
    #'algorithm':'random',
    #'algorithm':'ikmeans',
    'algorithm':'erisoglu',
    'K':3
}

Configuration available:

In [44]:
datasets = {
    'iris':  skdatasets.load_iris,
    'wine':  skdatasets.load_wine,
    'bc':    skdatasets.load_breast_cancer,
}

algorithms = {
    'random': random.generate,
    'ikmeans': ikmeans.generate,
    'erisoglu': erisoglu.generate,
}                

Run some setup code:

In [45]:
dataloader = datasets[args['dataset']]
initialiser = algorithms[args['algorithm']]
K = args['K']

dataset = dataloader()

data = utils.standardise(dataset.data)
target = dataset.target

## Discover some centroids:

In [46]:
centroids = initialiser(data, K)
print("Centroids:\n", centroids)

Centroids:
 [[-0.22222222  1.         -0.83050847 -0.75      ]
 [ 0.88888889 -0.5         1.          0.83333333]
 [-0.88888889 -0.75       -0.89830508 -0.83333333]]


## Run k-means clustering algorithm:

In [47]:
# Homemade version:
Z, U, clusters, iterations = kmeans.cluster(data, K, centroids.copy())

# SKLearn version
est1 = skcluster.KMeans(n_clusters=K, n_init=1, init=centroids.copy())
est1.fit(data)

KMeans(algorithm='auto', copy_x=True,
    init=array([[-0.22222,  1.     , -0.83051, -0.75   ],
       [ 0.88889, -0.5    ,  1.     ,  0.83333],
       [-0.88889, -0.75   , -0.89831, -0.83333]]),
    max_iter=300, n_clusters=3, n_init=1, n_jobs=None,
    precompute_distances='auto', random_state=None, tol=0.0001, verbose=0)

## Output

In [48]:
print('Me:\n', U)
print("SKL:\n", est1.labels_)
print("Target:\n", target)

print("\nIterations:")
print("Me:", iterations, "| SKL:", est1.n_iter_)

Me:
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2
 2 2 1 1 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 1 1 2 1 1 1 1
 1 1 2 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 2 1 1 1 1 1
 1 1]
SKL:
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2
 2 2 1 1 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 1 1 2 1 1 1 1
 1 1 2 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 2 1 1 1 1 1
 1 1]
Target:
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]

Iterations:
Me: 11 | SKL: 11


## Run metrics

In [49]:
acc_me = skmetrics.accuracy_score(target, U)
acc_them = skmetrics.accuracy_score(target, est1.labels_)

print("\nAccuracy Score:")
print("Me:", acc_me, "| SKL:", acc_them)

ari_me = skmetrics.adjusted_rand_score(target, U)
ari_them = skmetrics.adjusted_rand_score(target, est1.labels_)

print("\nAdjusted Rand Index:")
print("Me:", ari_me, "| SKL:", ari_them)


Accuracy Score:
Me: 0.4533333333333333 | SKL: 0.4533333333333333

Adjusted Rand Index:
Me: 0.7008666982225341 | SKL: 0.7008666982225341


## Confusion matrices

In [50]:
cm_me = skmetrics.confusion_matrix(target, U)
cm_them = skmetrics.confusion_matrix(target, est1.labels_)

print("Me:\n", cm_me, "\n")
print("SKL:\n", cm_them)

Me:
 [[50  0  0]
 [ 0 10 40]
 [ 0 42  8]] 

SKL:
 [[50  0  0]
 [ 0 10 40]
 [ 0 42  8]]
