We need to compare k-means, hdbscan, quickshift, and classix

## Phoneme

In [1]:
import hdbscan
from sklearn.cluster import KMeans
from quickshift.QuickshiftPP import *
from threadpoolctl import threadpool_limits

In [2]:
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
from classix import CLASSIX, loadData
import classix
from time import time
import cv2
import os


import pandas as pd
import numpy as np

print(classix.__version__)

from HiPart.clustering import DePDDP
from sklearn.metrics import adjusted_rand_score as ari
from sklearn.metrics import adjusted_mutual_info_score as ami

import collections


1.0.5


In [3]:
X, y = loadData('Phoneme')
clusterNum = len(np.unique(y))

lari = list()
lami = list()
runtime = list()
min_cluster = min(collections.Counter(y).items(), key=lambda x: x[1])[1]

with threadpool_limits(limits=1, user_api='blas'):
    clx = classix.CLASSIX(radius=0.43, minPts=4, verbose=0);
    st = time()
    clx.fit(X)
    et = time()
    print("CLASSIX uses time:", et - st)
    print("ARI:", ari(clx.labels_, y))
    print("AMI:", ami(clx.labels_, y))
    lari.append(ari(clx.labels_, y))
    lami.append(ami(clx.labels_, y))
    runtime.append(et - st)

    _hdbscan = hdbscan.HDBSCAN(min_cluster_size=11, algorithm='best') 
    st = time()
    _hdbscan.fit(X)
    et = time()
    print("HDBSCAN uses time:", et - st)
    print("ARI:", ari(_hdbscan.labels_, y))
    print("AMI:", ami(_hdbscan.labels_, y))
    lari.append(ari(_hdbscan.labels_, y))
    lami.append(ami(_hdbscan.labels_, y))
    runtime.append(et - st)

    quicks = QuickshiftPP(k=245, beta=0.3)
    st = time()
    quicks.fit(X.copy(order='C'))
    et = time()
    print("Quickshift uses time:", et - st)
    print("ARI:", ari(quicks.memberships, y))
    print("AMI:", ami(quicks.memberships, y))
    lari.append(ari(quicks.memberships, y))
    lami.append(ami(quicks.memberships, y))
    runtime.append(et - st)


    kmeans = KMeans(n_clusters=clusterNum, init='k-means++', random_state=1)
    st = time()
    kmeans.fit(X)
    et = time()
    print("kmeans uses time:", et - st)
    print("ARI:", ari(kmeans.labels_, y))
    print("AMI:", ami(kmeans.labels_, y))
    lari.append(ari(kmeans.labels_, y))
    lami.append(ami(kmeans.labels_, y))
    runtime.append(et - st)


    # depddp_class = DePDDP(max_clusters_number=clusterNum).fit_predict(X)
    # print("DePDDP uses time:", et - st)
    # print("ARI:", ari(depddp_class, y))
    # print("AMI:", ami(depddp_class, y))
    # lari.append(ari(depddp_class, y))
    # lami.append(ami(depddp_class, y))
    # runtime.append(et - st)


labels_ = [clx.labels_, _hdbscan.labels_, quicks.memberships, kmeans.labels_]# , depddp_class]
lari = np.asarray(lari)
lami = np.asarray(lami)
runtime = np.asarray(runtime)
np.save("result/exp1/Phoneme_ari.npy", lari)
np.save("result/exp1/Phoneme_ami.npy", lami)
np.save("result/exp1/Phoneme_runtime.npy", runtime)
np.save("result/exp1/Phoneme_labels.npy", labels_)


CLASSIX uses time: 3.954103946685791
ARI: 0.7654435029179179
AMI: 0.8507474986036857
HDBSCAN uses time: 11.235790014266968
ARI: 0.41513243389557386
AMI: 0.6011646109072462
Quickshift uses time: 9.186430215835571
ARI: 0.7472841247124256
AMI: 0.8363803822795519




kmeans uses time: 0.9533700942993164
ARI: 0.6828648832770431
AMI: 0.7328508171299603
