In [1]:
MAX_NUM_THREADS = 10

import ctypes
mkl_rt = ctypes.CDLL('libmkl_rt.so')
#print(mkl_rt.mkl_get_max_threads())
mkl_get_max_threads = mkl_rt.mkl_get_max_threads
def mkl_set_num_threads(cores):
    mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(cores)))

mkl_set_num_threads(MAX_NUM_THREADS)
print(f'Number of threads was limited to {mkl_get_max_threads()}.')

Number of threads was limited to 10.


In [2]:
import os
import time
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

import numpy_tools
import loaders.eth80 as eth80
import loaders.smni_eeg as smni_eeg

from experiment_tools import temporal_sscal
from experiment_tools import kmeans_gmm_clustering_experiment
from experiment_tools import hac_clustering_experiment

import cluster.gtcd_contrasting

#from cluster.gtcd_contrasting import GLROContrast
#from cluster.gtcd_contrasting import GTLDContrast

In [3]:
data_eth80_dirname = '/home/hariyuki/data/eth80/'
data_smni_dirname = '/home/hariyuki/data/eeg_smni/'

In [4]:
# load ETH80
data_eth80_path = os.path.join(data_eth80_dirname, 'eth80-cropped-close128')

image_shape = 32
Nclasses, Nobjects = 8, 10
Npixels = int(round(image_shape**2))

data, labels, classes = eth80.eth80_dataset(data_eth80_path, image_shape)
classes_reverse_dict = dict((y, x) for x, y in classes.items())
Nsamples, Nangles, _, _, Ncolors = data.shape
data = numpy_tools.reshape_np(data, [Nangles, -1, Ncolors], order='F', use_batch=True)
permutation = [0, 2, 1, 3]
data = np.transpose(data, permutation)

n = [Npixels, Nangles, Ncolors]

eth80_dataset = {
    'data': data,
    'n': n,
    'labels': labels,
    'classes': classes,
    'classes_reverse_dict': classes_reverse_dict
}

In [5]:
# load SMNI EEG
data_smni_path = os.path.join(data_smni_dirname, 'smni_eeg_processed.npz')
df = np.load(data_smni_path)
data, labels = df['data'], df['labels']

Nsubjects, Nchannels, Ntime, Nconditions = data.shape
permutation = [0, 2, 1, 3]
data = np.transpose(data, permutation)
n = [Ntime, Nchannels, Nconditions]

smni_dataset = {
    'data': data,
    'n': n,
    'labels': labels
}

In [6]:
datasets = {
    'eth80': eth80_dataset,
    'smni': smni_dataset
}
dataset_names = list(datasets.keys())

## GLRO contrasting

In [9]:
commonRanks = 1+np.arange(5)
individualRanks = 1+np.arange(5)

In [None]:
save_results_dirname = '../results/clustering/'
os.makedirs(save_results_dirname, exist_ok=True)

#save_results_filename_ac = 'gica_contrasted_ac'
#save_results_filename_kg = 'gica_contrasted_kg'

Ntrials = 20
Ntrials_glro = 10
np_random_seed = 720

np_random_state_glro = 800
np.random.seed(np_random_state_glro)
random_states_glro = np.random.randint(1, 1000, Ntrials_glro)
   
def preprocess_glro(dataset, **kwargs):
    assert 'commonRank' in kwargs
    commonRank = kwargs['commonRank']
    assert 'individualRank' in kwargs
    individualRank = kwargs['individualRank']
    assert 'random_state' in kwargs
    random_state = kwargs['random_state']
    if 'sscal' in kwargs:
        sscal = kwargs['sscal']
    else:
        sscal = False
    n = dataset['n']
    contrasting = cluster.gtcd_contrasting.GLROContrast(
        individualRank=individualRank,
        commonRank=commonRank,
        shapeObject=n,
        sourceModes=[0],
        method='als',
        nShortModes=None,
        constraintMethod='projected',
        fullModesConstraint=None,
        maxitnum=10,
        epsilon=1e-5,
        random_state=random_state
    )
    T = contrasting.fit_transform(dataset['data'].copy())
    if sscal:
        T = T - np.mean(T, axis=1, keepdims=True)
        T /= np.std(T, axis=1, keepdims=True, ddof=1)
    return T, dataset['labels']

result_ac, result_kg = [], []
affinity_names_old, linkage_old, dataset_names_old, n_clusters_old = None, None, None, None
for k_comr in range(len(commonRanks)):
    commonRank = commonRanks[k_comr]
    result_ac_crank, result_kg_crank = [], []
    for k_indr in range(len(individualRanks)):
        individualRank = individualRanks[k_indr]
        print(f'\t GLRO: crank={commonRank}, irank={individualRank}')
        result_ac_crank_irank, result_kg_crank_irank = [], []
        for k_trial in range(Ntrials_glro):
            preprocess = {
                'eth80': lambda x: preprocess_glro(
                    x,
                    commonRank=commonRank,
                    individualRank=individualRank,
                    random_state=random_states_glro[k_trial]
                ),
                'smni': lambda x: preprocess_glro(
                    x,
                    commonRank=commonRank,
                    individualRank=individualRank,
                    random_state=random_states_glro[k_trial],
                    sscal=False
                )
            }
            result, affinity_names, linkage, dataset_names, n_clusters, df = hac_clustering_experiment(
                datasets,
                save_results_path=None,#save_results_path,
                preprocess=preprocess,
                verbose=True,
                return_datasets=True
            )
            if not ((k_comr == 0) and (k_indr == 0)):
                assert np.all([dataset_names[l] == dataset_names_old[l] for l in range(len(datasets))])
            dataset_names_old = dataset_names
            #result = np.empty([3, 4, 5])
            result_ac_crank_irank.append(result)

            result, dataset_names, random_states, clust_alg_names, n_clusters = (
                kmeans_gmm_clustering_experiment(
                    df,#datasets,
                    save_results_path=None,#save_results_path,
                    preprocess=None,#preprocess,
                    Ntrials=Ntrials,
                    np_random_seed=np_random_seed,
                    verbose=True
                )
            )
            assert dataset_names == dataset_names_old
            if not ((k_comr == 0) and (k_indr == 0)):
                assert np.all([random_states_old[l] == random_states[l] for l in range(Ntrials)])
            random_states_old = random_states
            #result = np.empty([8, 5])
            result_kg_crank_irank.append(result)
            
            np.savez_compressed(
                'glro_contrasted_clustering_temporal',
                result_ac=result_ac,
                result_kg=result_kg,
                result_ac_crank=result_ac_crank,
                result_kg_crank=result_kg_crank,
                result_ac_crank_irank=result_ac_crank_irank,
                result_kg_crank_irank=result_kg_crank_irank,
            )
            
            
        result_ac_crank.append(result_ac_crank_irank)
        result_kg_crank.append(result_kg_crank_irank)
    result_ac.append(result_ac_crank)
    result_kg.append(result_kg_crank)
result_ac, result_kg = np.array(result_ac), np.array(result_kg)
np.savez_compressed(
    'glro_contrasted_clustering',
    result_ac=result_ac,
    result_kg=result_kg,
    random_states=random_states,
    clust_alg_names=clust_alg_names,
    Ntrials=Ntrials,
    np_random_seed=np_random_seed,
    affinity_names=affinity_names,
    linkage=linkage,
    dataset_names=dataset_names,
    random_states_glro=random_states_glro,
    np_random_state_glro=np_random_state_glro
)

	 GLRO: crank=1, irank=1
			 eth80
		 Affinity: l1
	 Linkage: complete
ARI=0.431 AMI=0.595 FMI=0.513 
	 Linkage: average
ARI=0.356 AMI=0.591 FMI=0.484 
		 Affinity: l2
	 Linkage: complete
ARI=0.354 AMI=0.598 FMI=0.479 
	 Linkage: average
ARI=0.373 AMI=0.607 FMI=0.499 
		 Affinity: cosine
	 Linkage: complete
ARI=0.401 AMI=0.564 FMI=0.485 
	 Linkage: average
ARI=0.399 AMI=0.646 FMI=0.521 
		 Affinity: canberra
	 Linkage: complete
ARI=0.213 AMI=0.420 FMI=0.366 
	 Linkage: average
ARI=0.336 AMI=0.573 FMI=0.468 
		 Affinity: correlation
	 Linkage: complete
ARI=0.401 AMI=0.564 FMI=0.485 
	 Linkage: average
ARI=0.399 AMI=0.646 FMI=0.521 
		 Affinity: rbf
	 Linkage: complete
ARI=0.354 AMI=0.598 FMI=0.479 
	 Linkage: average
ARI=0.373 AMI=0.607 FMI=0.499 
			 smni
		 Affinity: l1
	 Linkage: complete
ARI=0.013 AMI=0.009 FMI=0.729 
	 Linkage: average
ARI=0.013 AMI=0.009 FMI=0.729 
		 Affinity: l2
	 Linkage: complete
ARI=0.013 AMI=0.009 FMI=0.729 
	 Linkage: average
ARI=0.013 AMI=0.009 FMI=0.729 


In [None]:
 smni
	 kmeans min/mean/median/max
ARI=0.013/0.013/0.013/0.013
AMI=0.009/0.009/0.009/0.009
FMI=0.729/0.729/0.729/0.729
	 GMM min/mean/median/max
ARI=0.005/0.009/0.009/0.013
AMI=-0.014/-0.002/-0.002/0.009
FMI=0.720/0.725/0.725/0.729

In [22]:
dataset_num = 0

for k in range(3):
    print(
        f'{np.min(result_ac, axis=2)[:, :, dataset_num, :, :, k].max():.3f}/'
        f'{np.mean(result_ac, axis=2)[:, :, dataset_num, :, :, k].max():.3f}/'
        f'{np.median(result_ac, axis=2)[:, :, dataset_num, :, :, k].max():.3f}/'
        f'{np.max(result_ac, axis=2)[:, :, dataset_num, :, :, k].max():.3f}\t\t'
        f'{np.min(result_kg.mean(axis=-2), axis=2)[:, :, dataset_num, :, k].max():.3f}/'
        f'{np.mean(result_kg.mean(axis=-2), axis=2)[:, :, dataset_num, :, k].max():.3f}/'
        f'{np.median(result_kg.mean(axis=-2), axis=2)[:, :, dataset_num, :, k].max():.3f}/'
        f'{np.max(result_kg.mean(axis=-2), axis=2)[:, :, dataset_num, :, k].max():.3f}\t\t'
    )

0.407/0.472/0.520/0.687		0.442/0.473/0.482/0.519		
0.637/0.693/0.730/0.810		0.628/0.662/0.657/0.712		
0.526/0.580/0.609/0.731		0.530/0.554/0.561/0.597		


In [None]:
from td.utils import reshape, prodTenMat

import group_tcd
#import gtcd_jit as gtcd
import gtcd

maxitnum = 100
epsilon = 1e-8
verbose = True

_maxInnerIt = 15
_tolRes = 1e-8
_tolGrad = 1e-8
_tolSwamp = 1e-8

def alsM(a, x0, cdN, ldN, tdN, maxitnum, constraints, verbose):
    return gtcd.tcd(
        a, x0=x0, canonical_dict=cdN, lro_dict=ldN, tucker_dict=tdN, maxitnum=maxitnum, 
        tolRes=_tolRes, tolGrad=_tolGrad, tolSwamp=_tolSwamp, method='als', verbose=verbose, 
        regTGD=None, regPGD=None, doSA=0, constraints=constraints
    )

def doglegM(a, x0, cdN, ldN, tdN, maxitnum, constraints, verbose):
    return gtcd.tcd(
        a, x0=x0, canonical_dict=cdN, lro_dict=ldN, tucker_dict=tdN, maxitnum=maxitnum, 
        maxInnerIt=_maxInnerIt, tolRes=_tolRes, tolGrad=_tolGrad, tolSwamp=_tolSwamp, method='tr', 
        verbose=verbose, doSA=0, constraints=constraints, trStep='dogleg',
        trDelta0=1.2,trEta=0.23
    )
        
groupConstraintMethod = 'projected'


source_mode = [1]
nShortModes = 1
fullModesConstraint = None

individualRank = 3
commonRank = 4
random_state = 576

shape = [-1] + list(n)
T = reshape(data, shape)
transposition = list(range(1, len(n)+1)) + [0]
T = np.transpose(T, transposition)
T = T.astype(np.float32)
n = T.shape
groupConstraint = group_tcd.group_constraint(
    n, source_mode, groupConstraintMethod
)
normT = np.linalg.norm(T)
T /= normT
cdN = None
P = len(T.shape)-nShortModes
ldN = {
    'L': [individualRank]*n[-1] + [commonRank],
    'P': P,
    'fullModesConstraint': fullModesConstraint
}
tdN = None
x0 = None
np.random.seed(random_state)
#tic = time.clock()
cdN, ldN, tdN, info = doglegM(#alsM(
    T, x0, cdN, ldN, tdN, maxitnum, groupConstraint, verbose
)
#toc = time.clock()
#self.timesEstimate = toc-tic
indCommon = commonRank
fmc = fullModesConstraint
contrasted = [None]*len(n)
group_part = copy.deepcopy(ldn)
for k in range(len(n)):
    if k < P:
        if (fmc is not None) and (fmc[k] is not None):
            contrasted[k] = ldN['B'][k][2][:, -indCommon:].copy()
            if ldN['B'][k][2].shape[1] == indCommon:
                ldN['B'][k][2] = None
            else:
                ldN['B'][k][2] = ldN['B'][k][2][:, :-indCommon].copy()
        else:
            contrasted[k] = ldN['B'][k][:, -indCommon:].copy()
            ldN['B'][k] = ldN['B'][k][:, :-indCommon].copy()
            group_part['B'][k] = group_part['B'][k][:, -indCommon]
    else:
        contrasted[k] = ldN['B'][k][:, -1:].copy()
        ldN['B'][k] = ldN['B'][k][:, :-1].copy()
        group_part['B'][k] = group_part['B'][k][:, -1:]
ldN['L'] = ldN['L'][:-1]
group_part['L'] = group_part['L'][-1:]
del ldN['E'], group_part['E']
contrastedT = gtcd.recover(n, lro_dict=ldN)
permutation = np.roll(np.arange(contrastedT.ndim), 1)
contrastedT = np.transpose(contrastedT, permutation)
groupT = gtcd.recover(n, lro_dict=group_part)
groupT = np.transpose(groupT, permutation)

np.savez_compressed(
    'example_glro_eth80.npz',
    groupT=groupT,
    contrastedT=contrastedT,
    individualRank=individualRank,
    commonRank=commonRank,
    random_state=random_state
)

In [None]:
from PIL import Image

nframe = 25
nobj = 71

def get_image(tensor, nobj, nframe):
    image = tensor[nobj, :, nframe, :]
    image = image - image.min()
    image = image / image.max()
    image = reshape(image, [32, 32, 3])
    image *= 255.
    return image.astype(np.uint8)

image = get_image(data, nobj, nframe)
plt.imshow(image)
Image.fromarray(image).save('original.jpg')
plt.show()
image = get_image(groupT, nobj, nframe)
plt.imshow(image)
Image.fromarray(image).save('glro_common.jpg')
plt.show()
image = get_image(contrastedT, nobj, nframe)
plt.imshow(image)
Image.fromarray(image).save('glro_individual.jpg')
plt.show()