In [1]:
import numpy as np
from src.kmeans import mpKMeans, StandardKMeans2,chop as kchop
from sklearn.datasets import make_blobs
from pychop import chop
import matplotlib.pyplot as plt
from sklearn.metrics.cluster import adjusted_rand_score, adjusted_mutual_info_score
from tqdm import tqdm 
import pandas as pd

sample_seeds = [0, 42, 2024]
ct = 10
DELTAS = np.arange(1, 2.1, 0.1)
STDS = [1, 2]


In [2]:
LOW_PREC = kchop(np.float16)
sse_arr = np.zeros((len(STDS), len(DELTAS)))
trigger_arr = np.zeros((len(STDS), len(DELTAS)))
ari_arr = np.zeros((len(STDS), len(DELTAS)))
ami_arr = np.zeros((len(STDS), len(DELTAS)))

norm_sse_arr = np.zeros((len(STDS), len(DELTAS)))
norm_trigger_arr = np.zeros((len(STDS), len(DELTAS)))
norm_ari_arr = np.zeros((len(STDS), len(DELTAS)))
norm_ami_arr = np.zeros((len(STDS), len(DELTAS)))

for s in range(len(STDS)):
    std = STDS[s]
    for d in tqdm(range(len(DELTAS))):
        delta = DELTAS[d]
        for seed in sample_seeds:
            X, y = make_blobs(n_samples=2000, n_features=2, cluster_std=std, centers=ct, random_state=seed)

            mu = X.mean(axis=0)
            sigma = X.std(axis=0)
            norm_X = (X - mu) / sigma

            mpkmeans = mpKMeans(n_clusters=ct, seeding='d2', low_prec=LOW_PREC, delta=delta, verbose=0)
            mpkmeans.fit(X)

            norm_mpkmeans = mpKMeans(n_clusters=ct, seeding='d2', low_prec=LOW_PREC, delta=delta, verbose=0)
            norm_mpkmeans.fit(norm_X)
            
            ari_arr[s, d] += adjusted_rand_score(y, mpkmeans.labels) / len(sample_seeds)
            ami_arr[s, d] += adjusted_mutual_info_score(y, mpkmeans.labels) / len(sample_seeds) 
             
            sse_arr[s, d] += mpkmeans.inertia[-1] / len(sample_seeds)
            trigger_arr[s, d] += mpkmeans.low_prec_trigger / len(sample_seeds)
            
            norm_ari_arr[s, d] += adjusted_rand_score(y, norm_mpkmeans.labels) / len(sample_seeds)
            norm_ami_arr[s, d] += adjusted_mutual_info_score(y, norm_mpkmeans.labels) / len(sample_seeds) 
             
            norm_sse_arr[s, d] += norm_mpkmeans.inertia[-1] / len(sample_seeds)
            norm_trigger_arr[s, d] += norm_mpkmeans.low_prec_trigger / len(sample_seeds)

100%|██████████| 11/11 [00:30<00:00,  2.77s/it]
100%|██████████| 11/11 [00:42<00:00,  3.90s/it]


In [3]:
pd.DataFrame(ari_arr).to_csv('results/ari_arr_fp16.csv')
pd.DataFrame(ami_arr).to_csv('results/ami_arr_fp16.csv')
pd.DataFrame(sse_arr).to_csv('results/sse_arr_fp16.csv')
pd.DataFrame(trigger_arr).to_csv('results/trigger_arr_fp16.csv')


pd.DataFrame(norm_ari_arr).to_csv('results/norm_ari_arr_fp16.csv')
pd.DataFrame(norm_ami_arr).to_csv('results/norm_ami_arr_fp16.csv')
pd.DataFrame(norm_sse_arr).to_csv('results/norm_sse_arr_fp16.csv')
pd.DataFrame(norm_trigger_arr).to_csv('results/norm_trigger_arr_fp16.csv')

In [4]:
LOW_PREC = chop(prec='q52', rmode=1)
sse_arr = np.zeros((len(STDS), len(DELTAS)))
trigger_arr = np.zeros((len(STDS), len(DELTAS)))
ari_arr = np.zeros((len(STDS), len(DELTAS)))
ami_arr = np.zeros((len(STDS), len(DELTAS)))

norm_sse_arr = np.zeros((len(STDS), len(DELTAS)))
norm_trigger_arr = np.zeros((len(STDS), len(DELTAS)))
norm_ari_arr = np.zeros((len(STDS), len(DELTAS)))
norm_ami_arr = np.zeros((len(STDS), len(DELTAS)))

for s in range(len(STDS)):
    std = STDS[s]
    for d in tqdm(range(len(DELTAS))):
        delta = DELTAS[d]
        for seed in sample_seeds:
            X, y = make_blobs(n_samples=2000, n_features=2,
                              cluster_std=std, centers=ct,
                              random_state=seed)

            mu = X.mean(axis=0)
            sigma = X.std(axis=0)
            norm_X = (X - mu) / sigma

            mpkmeans = mpKMeans(n_clusters=ct, seeding='d2', low_prec=LOW_PREC, delta=delta, verbose=0)
            mpkmeans.fit(X)

            norm_mpkmeans = mpKMeans(n_clusters=ct, seeding='d2', low_prec=LOW_PREC, delta=delta, verbose=0)
            norm_mpkmeans.fit(norm_X)
            
            ari_arr[s, d] += adjusted_rand_score(y, mpkmeans.labels) / len(sample_seeds)
            ami_arr[s, d] += adjusted_mutual_info_score(y, mpkmeans.labels) / len(sample_seeds) 
             
            sse_arr[s, d] += mpkmeans.inertia[-1] / len(sample_seeds)
            trigger_arr[s, d] += mpkmeans.low_prec_trigger / len(sample_seeds)
            
            norm_ari_arr[s, d] += adjusted_rand_score(y, norm_mpkmeans.labels) / len(sample_seeds)
            norm_ami_arr[s, d] += adjusted_mutual_info_score(y, norm_mpkmeans.labels) / len(sample_seeds) 
             
            norm_sse_arr[s, d] += norm_mpkmeans.inertia[-1] / len(sample_seeds)
            norm_trigger_arr[s, d] += norm_mpkmeans.low_prec_trigger / len(sample_seeds)

  new_centers = np.array([X[labels == i].mean(axis=0) for i in range(self.centers.shape[0])])
  ret = um.true_divide(
100%|██████████| 11/11 [07:33<00:00, 41.20s/it]
  new_centers = np.array([X[labels == i].mean(axis=0) for i in range(self.centers.shape[0])])
  ret = um.true_divide(
100%|██████████| 11/11 [10:24<00:00, 56.78s/it]


In [5]:
pd.DataFrame(ari_arr).to_csv('results/ari_arr_q52.csv')
pd.DataFrame(ami_arr).to_csv('results/ami_arr_q52.csv')
pd.DataFrame(sse_arr).to_csv('results/sse_arr_q52.csv')
pd.DataFrame(trigger_arr).to_csv('results/trigger_arr_q52.csv')

pd.DataFrame(norm_ari_arr).to_csv('results/norm_ari_arr_q52.csv')
pd.DataFrame(norm_ami_arr).to_csv('results/norm_ami_arr_q52.csv')
pd.DataFrame(norm_sse_arr).to_csv('results/norm_sse_arr_q52.csv')
pd.DataFrame(norm_trigger_arr).to_csv('results/norm_trigger_arr_q52.csv')