In [1]:
MAX_NUM_THREADS = 8

import ctypes
mkl_rt = ctypes.CDLL('libmkl_rt.so')
#print(mkl_rt.mkl_get_max_threads())
mkl_get_max_threads = mkl_rt.mkl_get_max_threads
def mkl_set_num_threads(cores):
    mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(cores)))

mkl_set_num_threads(MAX_NUM_THREADS)
print(f'Number of threads was limited to {mkl_get_max_threads()}.')

Number of threads was limited to 8.


In [2]:
import os
import time
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

import numpy_tools
import loaders.eth80 as eth80
import loaders.smni_eeg as smni_eeg

from experiment_tools import temporal_sscal
from experiment_tools import kmeans_gmm_clustering_experiment
from experiment_tools import hac_clustering_experiment

import cluster.gtcd_contrasting


In [3]:
data_eth80_dirname = '/home/hariyuki/data/eth80/'
data_smni_dirname = '/home/hariyuki/data/eeg_smni/'

In [4]:
# load ETH80
data_eth80_path = os.path.join(data_eth80_dirname, 'eth80-cropped-close128')

image_shape = 32
Nclasses, Nobjects = 8, 10
Npixels = int(round(image_shape**2))

data, labels, classes = eth80.eth80_dataset(data_eth80_path, image_shape)
classes_reverse_dict = dict((y, x) for x, y in classes.items())
Nsamples, Nangles, _, _, Ncolors = data.shape
data = numpy_tools.reshape_np(data, [Nangles, -1, Ncolors], order='F', use_batch=True)
permutation = [0, 2, 1, 3]
data = np.transpose(data, permutation)

n = [Npixels, Nangles, Ncolors]

eth80_dataset = {
    'data': data,
    'n': n,
    'labels': labels,
    'classes': classes,
    'classes_reverse_dict': classes_reverse_dict
}

In [5]:
# load SMNI EEG
data_smni_path = os.path.join(data_smni_dirname, 'smni_eeg_processed.npz')
df = np.load(data_smni_path)
data, labels = df['data'], df['labels']

Nsubjects, Nchannels, Ntime, Nconditions = data.shape
permutation = [0, 2, 1, 3]
data = np.transpose(data, permutation)
n = [Ntime, Nchannels, Nconditions]

smni_dataset = {
    'data': data,
    'n': n,
    'labels': labels
}

In [6]:
datasets = {
    'eth80': eth80_dataset,
    'smni': smni_dataset
}
dataset_names = list(datasets.keys())

## GTLD contrasting

In [7]:
commonRanks = 1+np.arange(5)
individualRanks = 1+np.arange(5)

In [None]:
save_results_dirname = '../results/clustering/'
os.makedirs(save_results_dirname, exist_ok=True)

#save_results_filename_ac = 'gica_contrasted_ac'
#save_results_filename_kg = 'gica_contrasted_kg'

Ntrials = 20
Ntrials_gtld = 10
np_random_seed = 720

np_random_state_gtld = 800
np.random.seed(np_random_state_gtld)
random_states_gtld = np.random.randint(1, 1000, Ntrials_gtld)
   
def preprocess_gtld(dataset, **kwargs):
    assert 'commonRank' in kwargs
    commonRank = kwargs['commonRank']
    assert 'individualRank' in kwargs
    individualRank = kwargs['individualRank']
    assert 'random_state' in kwargs
    random_state = kwargs['random_state']
    if 'sscal' in kwargs:
        sscal = kwargs['sscal']
    else:
        sscal = False
    n = dataset['n']
    contrasting = cluster.gtcd_contrasting.GTLDContrast(
        individualRank=individualRank,
        commonRank=commonRank,
        shapeObject=n,
        sourceModes=[0],
        method='als',
        nShortModes=None,
        constraintMethod='projected',
        fullModesConstraint=None,
        maxitnum=10,
        epsilon=1e-5,
        random_state=random_state
    )
    T = contrasting.fit_transform(dataset['data'].copy())
    if sscal:
        T = T - np.mean(T, axis=1, keepdims=True)
        T /= np.std(T, axis=1, keepdims=True, ddof=1)
    return T, dataset['labels']

result_ac, result_kg = [], []
affinity_names_old, linkage_old, dataset_names_old, n_clusters_old = None, None, None, None
for k_comr in range(len(commonRanks)):
    commonRank = commonRanks[k_comr]
    result_ac_crank, result_kg_crank = [], []
    for k_indr in range(len(individualRanks)):
        individualRank = individualRanks[k_indr]
        print(f'\t GTLD: crank={commonRank}, irank={individualRank}')
        result_ac_crank_irank, result_kg_crank_irank = [], []
        for k_trial in range(Ntrials_gtld):
            preprocess = {
                'eth80': lambda x: preprocess_gtld(
                    x,
                    commonRank=commonRank,
                    individualRank=individualRank,
                    random_state=random_states_gtld[k_trial]
                ),
                'smni': lambda x: preprocess_gtld(
                    x,
                    commonRank=commonRank,
                    individualRank=individualRank,
                    random_state=random_states_gtld[k_trial],
                    sscal=False
                )
            }
            result, affinity_names, linkage, dataset_names, n_clusters, df = hac_clustering_experiment(
                datasets,
                save_results_path=None,#save_results_path,
                preprocess=preprocess,
                verbose=True,
                return_datasets=True
            )
            if not ((k_comr == 0) and (k_indr == 0)):
                assert np.all([dataset_names[l] == dataset_names_old[l] for l in range(len(datasets))])
            dataset_names_old = dataset_names
            #result = np.empty([3, 4, 5])
            result_ac_crank_irank.append(result)

            result, dataset_names, random_states, clust_alg_names, n_clusters = (
                kmeans_gmm_clustering_experiment(
                    df,#datasets,
                    save_results_path=None,#save_results_path,
                    preprocess=None,#preprocess,
                    Ntrials=Ntrials,
                    np_random_seed=np_random_seed,
                    verbose=True
                )
            )
            assert dataset_names == dataset_names_old
            if not ((k_comr == 0) and (k_indr == 0)):
                assert np.all([random_states_old[l] == random_states[l] for l in range(Ntrials)])
            random_states_old = random_states
            #result = np.empty([8, 5])
            result_kg_crank_irank.append(result)
            
            np.savez_compressed(
                'gtld_contrasted_clustering_temporal',
                result_ac=result_ac,
                result_kg=result_kg,
                result_ac_crank=result_ac_crank,
                result_kg_crank=result_kg_crank,
                result_ac_crank_irank=result_ac_crank_irank,
                result_kg_crank_irank=result_kg_crank_irank,
            )
            
            
        result_ac_crank.append(result_ac_crank_irank)
        result_kg_crank.append(result_kg_crank_irank)
    result_ac.append(result_ac_crank)
    result_kg.append(result_kg_crank)
result_ac, result_kg = np.array(result_ac), np.array(result_kg)
np.savez_compressed(
    'gtld_contrasted_clustering',
    result_ac=result_ac,
    result_kg=result_kg,
    random_states=random_states,
    clust_alg_names=clust_alg_names,
    Ntrials=Ntrials,
    np_random_seed=np_random_seed,
    affinity_names=affinity_names,
    linkage=linkage,
    dataset_names=dataset_names,
    random_states_gtld=random_states_gtld,
    np_random_state_gtld=np_random_state_gtld
)

	 GTLD: crank=1, irank=1
			 eth80
		 Affinity: l1
	 Linkage: complete
ARI=0.397 AMI=0.676 FMI=0.540 
	 Linkage: average
ARI=0.414 AMI=0.703 FMI=0.555 
		 Affinity: l2
	 Linkage: complete
ARI=0.379 AMI=0.661 FMI=0.521 
	 Linkage: average
ARI=0.414 AMI=0.703 FMI=0.555 
		 Affinity: cosine
	 Linkage: complete
ARI=0.410 AMI=0.626 FMI=0.504 
	 Linkage: average
ARI=0.367 AMI=0.674 FMI=0.520 
		 Affinity: canberra
	 Linkage: complete
ARI=0.395 AMI=0.672 FMI=0.539 
	 Linkage: average
ARI=0.414 AMI=0.703 FMI=0.555 
		 Affinity: correlation
	 Linkage: complete
ARI=0.371 AMI=0.678 FMI=0.525 
	 Linkage: average
ARI=0.345 AMI=0.638 FMI=0.500 
		 Affinity: rbf
	 Linkage: complete
ARI=0.379 AMI=0.661 FMI=0.521 
	 Linkage: average
ARI=0.414 AMI=0.703 FMI=0.555 
			 smni
		 Affinity: l1
	 Linkage: complete
ARI=0.013 AMI=0.009 FMI=0.729 
	 Linkage: average
ARI=0.013 AMI=0.009 FMI=0.729 
		 Affinity: l2
	 Linkage: complete
ARI=0.013 AMI=0.009 FMI=0.729 
	 Linkage: average
ARI=0.013 AMI=0.009 FMI=0.729 


In [None]:
from td.utils import reshape, prodTenMat

import group_tcd
#import gtcd_jit as gtcd
import gtcd

maxitnum = 200
epsilon = 1e-8
verbose = True

_maxInnerIt = 15
_tolRes = 1e-8
_tolGrad = 1e-8
_tolSwamp = 1e-8

def alsM(a, x0, cdN, ldN, tdN, maxitnum, constraints, verbose):
    return gtcd.tcd(
        a, x0=x0, canonical_dict=cdN, lro_dict=ldN, tucker_dict=tdN, maxitnum=maxitnum, 
        tolRes=_tolRes, tolGrad=_tolGrad, tolSwamp=_tolSwamp, method='als', verbose=verbose, 
        regTGD=None, regPGD=None, doSA=0, constraints=constraints
    )
        
groupConstraintMethod = 'projected'


source_mode = [1]
nShortModes = 1
fullModesConstraint = None
modeSizeFirstPriority = True

individualRank = 3
commonRank = 5
random_state = 576

shape = [-1] + list(n)
T = reshape(data, shape)
transposition = list(range(1, len(n)+1)) + [0]
T = np.transpose(T, transposition)
T = T.astype(np.float32)
n = T.shape
groupConstraint = group_tcd.group_constraint(
    n, source_mode, groupConstraintMethod
)
normT = np.linalg.norm(T)
T /= normT
cdN = None
P = len(T.shape)-nShortModes

ldN = {
    'L': [individualRank]*n[-1],
    'P': P,
    'fullModesConstraint': fullModesConstraint
}
r = np.zeros([1, len(T.shape)])
r[0, :-1] = commonRank
if modeSizeFirstPriority:
    r[0, :-1] = np.minimum(r[0, :-1], n[:-1]) 
# last mode - group axis
r[:, -1] = n[-1]
tdN = {
    'r': r.astype('i')
}
x0 = None
np.random.seed(random_state)
#tic = time.clock()
cdN, ldN, tdN, info = alsM(
    T, x0, cdN, ldN, tdN, maxitnum, groupConstraint, verbose
)
#toc = time.clock()
#self.timesEstimate = toc-tic
indCommon = commonRank
fmc = fullModesConstraint
contrasted = [None]*len(n)

del ldN['E']
contrastedT = gtcd.recover(n, lro_dict=ldN)
permutation = np.roll(np.arange(T.ndim), 1)
contrastedT = np.transpose(contrastedT, permutation)
groupT = gtcd.recover(n, tucker_dict=tdN)
groupT = np.transpose(groupT, permutation)

In [None]:
np.savez_compressed(
    'example_gtld_eth80.npz',
    groupT=groupT,
    contrastedT=contrastedT,
    individualRank=individualRank,
    commonRank=commonRank,
    random_state=random_state
)

In [None]:
from PIL import Image

nframe = 25
nobj = 71

def get_image(tensor, nobj, nframe):
    image = tensor[nobj, :, nframe, :]
    image = image - image.min()
    image = image / image.max()
    image = reshape(image, [32, 32, 3])
    image *= 255.
    return image.astype(np.uint8)

image = get_image(data, nobj, nframe)
plt.imshow(image)
Image.fromarray(image).save('original.jpg')
plt.show()
image = get_image(groupT, nobj, nframe)
plt.imshow(image)
Image.fromarray(image).save('gtld_common.jpg')
plt.show()
image = get_image(contrastedT, nobj, nframe)
plt.imshow(image)
Image.fromarray(image).save('gtld_individual.jpg')
plt.show()