In [3]:
import pandas as pd
import numpy as np
import math 
from sklearn.metrics.pairwise import pairwise_distances
from kmeans import StandardKMeans1, StandardKMeans2, mpKMeans, allowKMeans1,  allowKMeans2, chop
import numpy as np

from sklearn.metrics import homogeneity_score, completeness_score, v_measure_score
LOW_PREC = chop(prec='q43', rmode=1)

def load_data(file):
    data = pd.read_csv(file,sep="\\s+", header = None)
    return np.asarray(data.values).copy(order='C')

def sigificant_digit(number, digits=10):
    return round(number, digits - int(math.floor(math.log10(abs(number)))) - 1)

SSETS = ['dim032.txt', 'dim064.txt', 'dim128.txt', 'dim256.txt', 'dim512.txt', 'dim1024.txt']
SSETS_LABELS = ['dim032.txt', 'dim064.txt', 'dim128.txt', 'dim256.txt', 'dim512.txt', 'dim1024.txt']

In [4]:
for i in range(len(SSETS)):
    print(SSETS[i])
    X = load_data('data/Dim/'+SSETS[0])
    y = load_data('data/Dim/cb/'+SSETS_LABELS[0])
    y = pairwise_distances(X, y, metric='euclidean')
    y = np.argmin(y, axis=1)
    X = (X - X.mean(axis=0)) / X.std(axis=0)

    clusters = len(np.unique(y))
    kmeans_d2 = StandardKMeans2(n_clusters=clusters, seeding='d2')
    kmeans_d2.fit(X)

    kmeans_psa = StandardKMeans2(alpha=1, seeding='psa')
    kmeans_psa.fit(X)

    mpkmeans_d2 = mpKMeans(n_clusters=clusters, seeding='d2', low_prec=LOW_PREC)
    mpkmeans_d2.fit(X)


    print("kmeans++|", 'kmeans psa|', 'mixed precision kmeans++|', 'mixed precision kmeans psa|')

    print("clusters:", kmeans_d2.centers.shape[0], kmeans_psa.centers.shape[0],
         mpkmeans_d2.centers.shape[0], mpkmeans_psa.centers.shape[0])

    print('SSE:', sigificant_digit(kmeans_d2.inertia[-1]), 
                  sigificant_digit(kmeans_psa.inertia[-1]), 
                  sigificant_digit(mpkmeans_d2.inertia[-1]), 
                  sigificant_digit(mpkmeans_psa.inertia[-1]))

    print('homogeneity:', sigificant_digit(homogeneity_score(y, kmeans_d2.labels)), 
                           sigificant_digit(homogeneity_score(y, kmeans_psa.labels)),
                          sigificant_digit(homogeneity_score(y, mpkmeans_d2.labels)),
                          sigificant_digit(homogeneity_score(y, mpkmeans_psa.labels)))

    print('completeness:', sigificant_digit(completeness_score(y, kmeans_d2.labels)), 
                           sigificant_digit(completeness_score(y, kmeans_psa.labels)),
                          sigificant_digit(completeness_score(y, mpkmeans_d2.labels)),
                          sigificant_digit(completeness_score(y, mpkmeans_psa.labels)))

    print('v_measure:', sigificant_digit(v_measure_score(y, kmeans_d2.labels)), 
                           sigificant_digit(v_measure_score(y, kmeans_psa.labels)),
                          sigificant_digit(v_measure_score(y, mpkmeans_d2.labels)),
                          sigificant_digit(v_measure_score(y, mpkmeans_psa.labels)))

dim032.txt
kmeans++| kmeans psa| mixed precision kmeans++| mixed precision kmeans psa|
clusters: 16 16 16 16
SSE: 88.00265273 88.00265273 88.00265273 88.00760024
homogeneity: 1.0 1.0 1.0 1.0
completeness: 1.0 1.0 1.0 1.0
v_measure: 1.0 1.0 1.0 1.0
dim064.txt
kmeans++| kmeans psa| mixed precision kmeans++| mixed precision kmeans psa|
clusters: 16 16 16 16
SSE: 88.00265273 88.00265273 88.00265273 88.00760024
homogeneity: 1.0 1.0 1.0 1.0
completeness: 1.0 1.0 1.0 1.0
v_measure: 1.0 1.0 1.0 1.0
dim128.txt
kmeans++| kmeans psa| mixed precision kmeans++| mixed precision kmeans psa|
clusters: 16 16 16 16
SSE: 88.00265273 88.00265273 88.00265273 88.00760024
homogeneity: 1.0 1.0 1.0 1.0
completeness: 1.0 1.0 1.0 1.0
v_measure: 1.0 1.0 1.0 1.0
dim256.txt
kmeans++| kmeans psa| mixed precision kmeans++| mixed precision kmeans psa|
clusters: 16 16 16 16
SSE: 88.00265273 88.00265273 88.00265273 88.00760024
homogeneity: 1.0 1.0 1.0 1.0
completeness: 1.0 1.0 1.0 1.0
v_measure: 1.0 1.0 1.0 1.0
dim512.t