In [1]:
MAX_NUM_THREADS = 4

import ctypes
mkl_rt = ctypes.CDLL('libmkl_rt.so')
#print(mkl_rt.mkl_get_max_threads())
mkl_get_max_threads = mkl_rt.mkl_get_max_threads
def mkl_set_num_threads(cores):
    mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(cores)))

mkl_set_num_threads(MAX_NUM_THREADS)
print(f'Number of threads was limited to {mkl_get_max_threads()}.')

Number of threads was limited to 4.


In [2]:
import os
import time
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

import numpy_tools
import loaders.eth80 as eth80
import loaders.smni_eeg as smni_eeg

from experiment_tools import temporal_sscal
from experiment_tools import kmeans_gmm_clustering_experiment
from experiment_tools import hac_clustering_experiment

import cluster.cobe_contrasting

#from sklearn.metrics import accuracy_score

#from gtcd import reshape
#from computational_utilities import reshape



#from cluster.gica_contrasting import GICAContrast
#from cluster.gtcd_contrasting import GLROContrast
#from cluster.gtcd_contrasting import GTLDContrast

#from sklearn.cluster import AgglomerativeClustering
#from sklearn.cluster import KMeans
#from sklearn.mixture import GaussianMixture as GMM

# clustering performance measures
#from sklearn.metrics import fowlkes_mallows_score
#from sklearn.metrics import adjusted_rand_score
#from sklearn.metrics import normalized_mutual_info_score
#from sklearn.metrics import adjusted_mutual_info_score

#from sklearn.metrics import pairwise_distances


In [3]:
data_eth80_dirname = '../data/eth80/'
data_smni_dirname = '../data/eeg_smni/'

In [4]:
# load ETH80
data_eth80_path = os.path.join(data_eth80_dirname, 'eth80-cropped-close128')

image_shape = 32
Nclasses, Nobjects = 8, 10
Npixels = int(round(image_shape**2))

data, labels, classes = eth80.eth80_dataset(data_eth80_path, image_shape)
classes_reverse_dict = dict((y, x) for x, y in classes.items())
Nsamples, Nangles, _, _, Ncolors = data.shape
data = numpy_tools.reshape_np(data, [Nangles, -1, Ncolors], order='F', use_batch=True)
permutation = [0, 2, 1, 3]
data = np.transpose(data, permutation)

n = [Npixels, Nangles, Ncolors]

eth80_dataset = {
    'data': data,
    'n': n,
    'labels': labels,
    'classes': classes,
    'classes_reverse_dict': classes_reverse_dict
}

In [5]:
# load SMNI EEG
data_smni_path = os.path.join(data_smni_dirname, 'smni_eeg_processed.npz')
df = np.load(data_smni_path)
data, labels = df['data'], df['labels']

Nsubjects, Nchannels, Ntime, Nconditions = data.shape
permutation = [0, 2, 1, 3]
data = np.transpose(data, permutation)
n = [Ntime, Nchannels, Nconditions]

smni_dataset = {
    'data': data,
    'n': n,
    'labels': labels
}

In [6]:
datasets = {
    'eth80': eth80_dataset,
    'smni': smni_dataset
}
dataset_names = list(datasets.keys())

## Clustering with COBE contrast

In [7]:
save_model_dirname = '../models/clustering/cobe/'
save_filename_base = 'COBE_contrasted'
os.makedirs(save_model_dirname, exist_ok=True)

Ntrials = 20
Ntrials_cobe = 10
np_random_seed = 720
np.random.seed(np_random_seed)
random_states = np.random.randint(1, 1000, Ntrials_cobe)

commonRanks = 1+np.arange(5)

In [9]:
times = np.zeros([len(commonRanks), Ntrials])

for i in range(len(dataset_names)):
    dataset_name = dataset_names[i]
    print(f'\t\t\t {dataset_name}')
    n = datasets[dataset_name]['n']
    T = numpy_tools.reshape_np(
        datasets[dataset_name]['data'],
        [n[0], -1],
        use_batch=True
    ).astype(np.float32)
    labels = datasets[dataset_name]['labels']
    n_clusters = len(np.unique(labels))
    for k_comr in range(len(commonRanks)):
        commonRank = commonRanks[k_comr]
        print(f'\t COBE: crank={commonRank}')
        for k_trial in range(Ntrials_cobe):
            contrasting = cluster.cobe_contrasting.COBEContrast(
                commonRank,
                shapeObject=[n[0], int(np.prod(n[1:]))],
                maxitnum=100,
                epsilon=1e-5,
                random_state=random_states[k_trial]
            )
            t1 = time.process_time()
            _, inform = contrasting.fit_transform(T, commonRank)
            t2 = time.process_time()
            times[k_comr, k_trial] = t2-t1
            save_filename = (
                f'{save_filename_base}_dataset={dataset_name}_'
                f'crank={commonRank}_trial={k_trial}'
            )
            np.savez_compressed(
                os.path.join(save_model_dirname, f'support_{save_filename}'),
                inform=inform,
                times=times,
                random_states=random_states,
                dataset_names=dataset_names
            )
            contrasting.saveParameters(
                os.path.join(save_model_dirname, f'{save_filename}')
            )
            print(
                f'{commonRank}: t={times[k_comr, k_trial]:.3e}, '
                f'fval={inform["fval"][-1]:.3e}'
            )

			 eth80
	 COBE: crank=1
1: t=1.033e+01, fval=9.356e-05
1: t=1.097e+01, fval=9.259e-04
1: t=1.028e+01, fval=6.458e-03
1: t=9.778e+00, fval=3.387e-03
1: t=1.419e+01, fval=1.196e-04
1: t=1.223e+01, fval=4.178e-03
1: t=1.116e+01, fval=8.077e-04
1: t=1.096e+01, fval=1.930e-03
1: t=1.216e+01, fval=2.629e-03
1: t=1.218e+01, fval=8.210e-03
	 COBE: crank=2
2: t=3.797e+01, fval=2.774e-03
2: t=3.771e+01, fval=8.845e-04
2: t=3.922e+01, fval=2.079e-03
2: t=3.827e+01, fval=3.594e-03
2: t=3.367e+01, fval=6.691e-04
2: t=4.309e+01, fval=1.998e-03
2: t=3.353e+01, fval=6.397e-04
2: t=3.622e+01, fval=5.502e-03
2: t=3.571e+01, fval=1.254e-03
2: t=3.842e+01, fval=6.125e-04
	 COBE: crank=3
3: t=5.109e+01, fval=7.976e-04
3: t=4.436e+01, fval=2.667e-03
3: t=4.653e+01, fval=2.756e-03
3: t=4.500e+01, fval=1.601e-03
3: t=4.161e+01, fval=4.668e-04
3: t=4.514e+01, fval=8.147e-03
3: t=4.301e+01, fval=1.035e-03
3: t=4.453e+01, fval=9.618e-04
3: t=4.315e+01, fval=4.262e-03
3: t=4.694e+01, fval=1.112e-03
	 COBE: cran

In [9]:
model_dirname = '../models/clustering/cobe/'
model_filename_base = 'COBE_contrasted'

save_results_dirname = '../results/clustering/'
os.makedirs(save_results_dirname, exist_ok=True)

save_results_filename_ac = 'cobe_contrasted_ac'
save_results_filename_kg = 'cobe_contrasted_kg'

Ntrials = 20
Ntrials_cobe = 10
np_random_seed = 720
   
def preprocess_cobe(dataset, **kwargs):
    assert 'commonRank' in kwargs
    commonRank = kwargs['commonRank']
    assert 'model_path' in kwargs
    model_path = kwargs['model_path']
    if 'sscal' in kwargs:
        sscal = kwargs['sscal']
    else:
        sscal = False
    n = dataset['n']
    contrasting = cluster.cobe_contrasting.COBEContrast(
        commonRank=commonRank,
        shapeObject=[n[0], int(np.prod(n[1:]))],
        maxitnum=100,
        epsilon=1e-5
    )
    contrasting.loadParameters(model_path)
    T = contrasting._transform_fun(dataset['data'].copy())
    if sscal:
        T = T - np.mean(T, axis=1, keepdims=True)
        T /= np.std(T, axis=1, keepdims=True, ddof=1)
    return T, dataset['labels']

result_ac, result_kg = [], []
affinity_names_old, linkage_old, dataset_names_old, n_clusters_old = None, None, None, None
for k_comr in range(len(commonRanks)):
    commonRank = commonRanks[k_comr]
    print(f'\t COBE: crank={commonRank}')
    result_ac_rank, result_kg_rank = [], []
    for k_trial in range(Ntrials_cobe):
        model_path = lambda dataset_name: (
            f'{save_filename_base}_dataset={dataset_name}_'
            f'crank={commonRank}_trial={k_trial}.npz'
        )
        preprocess = {
            'eth80': lambda x: preprocess_cobe(
                x,
                commonRank=commonRank,
                model_path=os.path.join(model_dirname, model_path('eth80'))
            ),
            'smni': lambda x: preprocess_cobe(
                x,
                commonRank=commonRank,
                model_path=os.path.join(model_dirname, model_path('smni')),
                sscal=True
            )
        }
        result, affinity_names, linkage, dataset_names, n_clusters, df = hac_clustering_experiment(
            datasets,
            save_results_path=None,#save_results_path,
            preprocess=preprocess,
            verbose=True,
            return_datasets=True
        )
        if not ((k_comr == 0) and (k_trial == 0)):
            assert np.all([dataset_names[l] == dataset_names_old[l] for l in range(len(datasets))])
        dataset_names_old = dataset_names
        result_ac_rank.append(result)

        result, dataset_names, random_states, clust_alg_names, n_clusters = (
            kmeans_gmm_clustering_experiment(
                df,#datasets,
                save_results_path=None,#save_results_path,
                preprocess=None,#preprocess,
                Ntrials=Ntrials,
                np_random_seed=np_random_seed,
                verbose=True
            )
        )
        assert dataset_names == dataset_names_old
        if not ((k_comr == 0) and (k_trial == 0)):
            assert np.all([random_states_old[l] == random_states[l] for l in range(Ntrials)])
        random_states_old = random_states
        result_kg_rank.append(result)
        np.savez_compressed(
            'cobe_contrasted_clustering_temp',
            result_ac=result_ac,
            result_kg=result_kg,
            result_ac_rank=result_ac_rank,
            result_kg_rank=result_kg_rank
        )
    result_ac.append(result_ac_rank)
    result_kg.append(result_kg_rank)
result_ac, result_kg = np.array(result_ac), np.array(result_kg)
np.savez_compressed(
    'cobe_contrasted_clustering',
    result_ac=result_ac,
    result_kg=result_kg,
    random_states=random_states,
    clust_alg_names=clust_alg_names,
    Ntrials=Ntrials,
    Ntrials_cobe=Ntrials_cobe,
    np_random_seed=np_random_seed,
    affinity_names=affinity_names,
    linkage=linkage,
    dataset_names=dataset_names
)

	 COBE: crank=1
			 eth80
		 Affinity: l1
	 Linkage: complete
ARI=0.441 AMI=0.648 FMI=0.533 
	 Linkage: average
ARI=0.435 AMI=0.677 FMI=0.546 
		 Affinity: l2
	 Linkage: complete
ARI=0.521 AMI=0.706 FMI=0.599 
	 Linkage: average
ARI=0.436 AMI=0.659 FMI=0.538 
		 Affinity: cosine
	 Linkage: complete
ARI=0.383 AMI=0.587 FMI=0.495 
	 Linkage: average
ARI=0.299 AMI=0.551 FMI=0.461 
		 Affinity: canberra
	 Linkage: complete
ARI=0.497 AMI=0.639 FMI=0.562 
	 Linkage: average
ARI=0.514 AMI=0.711 FMI=0.595 
		 Affinity: correlation
	 Linkage: complete
ARI=0.383 AMI=0.587 FMI=0.495 
	 Linkage: average
ARI=0.299 AMI=0.551 FMI=0.461 
		 Affinity: rbf
	 Linkage: complete
ARI=0.016 AMI=0.032 FMI=0.327 
	 Linkage: average
ARI=0.016 AMI=0.032 FMI=0.327 
			 smni
		 Affinity: l1
	 Linkage: complete
ARI=-0.012 AMI=-0.007 FMI=0.610 
	 Linkage: average
ARI=0.013 AMI=0.009 FMI=0.729 
		 Affinity: l2
	 Linkage: complete
ARI=0.036 AMI=0.042 FMI=0.535 
	 Linkage: average
ARI=0.013 AMI=0.009 FMI=0.729 
		 Affi

In [14]:
result_ac, result_kg = np.array(result_ac), np.array(result_kg)
np.savez_compressed(
    'cobe_contrasted_clustering',
    result_ac=result_ac,
    result_kg=result_kg,
    random_states=random_states,
    clust_alg_names=clust_alg_names,
    Ntrials=Ntrials,
    Ntrials_cobe=Ntrials_cobe,
    np_random_seed=np_random_seed,
    affinity_names=affinity_names,
    linkage=linkage,
    dataset_names=dataset_names
)

In [25]:
stat_min = np.min(result, axis=2)
stat_max = np.max(result, axis=2)
stat_mean = np.mean(result, axis=2)
stat_median = np.median(result, axis=2)
for i in range(1, len(dataset_names)):
    print(dataset_names[i])
    for k_affinity in range(len(affinity_names)):
        current_affinity = affinity_names[k_affinity]
        print(f'\t\t Affinity: {current_affinity}')
        for k_linkage in range(len(linkage)):
            current_linkage = linkage[k_linkage]
            print(f'\t Linkage: {current_linkage}')
            for k_comr in range(len(commonRanks)):
                commonRank = commonRanks[k_comr]
                print(f'rank={commonRank}')
                print(
                    f'ARI={stat_min[i, k_comr, k_affinity, k_linkage, 0]:.3f}/'
                    f'{stat_mean[i, k_comr, k_affinity, k_linkage, 0]:.3f}/'
                    f'{stat_median[i, k_comr, k_affinity, k_linkage, 0]:.3f}/'
                    f'{stat_max[i, k_comr, k_affinity, k_linkage, 0]:.3f}/'
                )
                print(
                    f'AMI={stat_min[i, k_comr, k_affinity, k_linkage, 1]:.3f}/'
                    f'{stat_mean[i, k_comr, k_affinity, k_linkage, 1]:.3f}/'
                    f'{stat_median[i, k_comr, k_affinity, k_linkage, 1]:.3f}/'
                    f'{stat_max[i, k_comr, k_affinity, k_linkage, 1]:.3f}/'
                )
                print(
                    f'FMI={stat_min[i, k_comr, k_affinity, k_linkage, 2]:.3f}/'
                    f'{stat_mean[i, k_comr, k_affinity, k_linkage, 2]:.3f}/'
                    f'{stat_median[i, k_comr, k_affinity, k_linkage, 2]:.3f}/'
                    f'{stat_max[i, k_comr, k_affinity, k_linkage, 2]:.3f}/'
                )
            print('\n')
    

smni
		 Affinity: l1
	 Linkage: complete
rank=1
ARI=-0.048/0.033/0.023/0.198/
AMI=-0.006/0.042/0.045/0.117/
FMI=0.535/0.590/0.591/0.645/
rank=2
ARI=-0.013/0.064/0.067/0.189/
AMI=-0.014/0.045/0.038/0.133/
FMI=0.521/0.583/0.573/0.712/
rank=3
ARI=-0.026/0.065/0.073/0.188/
AMI=-0.007/0.054/0.051/0.139/
FMI=0.527/0.590/0.585/0.674/
rank=4
ARI=-0.034/0.049/0.015/0.207/
AMI=-0.007/0.049/0.033/0.180/
FMI=0.512/0.573/0.572/0.729/
rank=5
ARI=-0.035/0.029/0.011/0.125/
AMI=-0.014/0.028/0.019/0.096/
FMI=0.522/0.591/0.560/0.712/
rank=6
ARI=-0.029/0.034/0.017/0.159/
AMI=-0.011/0.042/0.041/0.147/
FMI=0.523/0.601/0.574/0.729/
rank=7
ARI=-0.037/-0.003/-0.008/0.066/
AMI=-0.014/0.006/0.002/0.053/
FMI=0.515/0.607/0.602/0.720/
rank=8
ARI=-0.040/0.018/-0.001/0.128/
AMI=-0.014/0.027/0.027/0.075/
FMI=0.518/0.595/0.570/0.729/
rank=9
ARI=-0.044/0.015/0.009/0.118/
AMI=-0.011/0.025/0.010/0.119/
FMI=0.514/0.593/0.574/0.704/
rank=10
ARI=-0.041/0.002/-0.002/0.087/
AMI=-0.014/0.018/0.011/0.080/
FMI=0.519/0.593/0.577/0