In [1]:
%load_ext autoreload
%autoreload 2

from source.read_HAR_dataset import read_har_dataset, har_dimensions, har_activities, har_activities_map, har_ind_IDS
from source.utils import  filter_dimensions
from source.tserie import TSerie
from source.utils import classify_dataset
from itertools import chain, combinations
from sklearn.ensemble import AdaBoostClassifier
from source.utils import idsStd
from sklearn import metrics
from sklearn import svm
import numpy as np
import umap
import sys
sys.path.insert(0, '/home/texs/Documentos/Repositories/mts_viz')

from server.source.storage import MTSStorage

def powerset(iterable):
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)  # allows duplicate elements
    return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))

Z_SCORE_NORM = True
DATASET = 'HAR-UML20'
KFOLDS = 3
N_TESTS = 2
METRIC  = 'braycurtis'
N_COMPONENTS_LIST = [32,64,128]
# N_DIMS_NAMES = ['Acc', 'Gyro', 'Mag']
N_DIMS_NAMES = ['Acc', 'Gyro']
N_DIMENSIONS = [
    [
        'Accelerometer-X',	
        'Accelerometer-Y',	
        'Accelerometer-Z',
    ],
    [
        'Gyrometer-X',
        'Gyrometer-Y',
        'Gyrometer-Z',
    ],
    [
        'Magnetometer-X',
        'Magnetometer-Y',
        'Magnetometer-Z'
    ]
]

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
storage = MTSStorage('har_kfold')
storage.delete()
storage.load()



results_map = {}
for N_COMPONENTS in N_COMPONENTS_LIST:
    components_map = {}

    for k in range(KFOLDS):
        if DATASET == 'HAR-UML20':
            all_ids = har_ind_IDS
            test_ids = all_ids[k: k + N_TESTS]
            train_ids = all_ids[:k] + all_ids[k + N_TESTS:]        
            
            data = read_har_dataset('./datasets/HAR-UML20/', train_ids=train_ids, test_ids=test_ids, val_ids=[])
            ids_train, X_train, y_train, I_train, train_kcal_MET = data['train']
            # ids_val, X_val, y_val, I_val, val_kcal_MET = data['val']
            ids_test, X_test, y_test, I_test, test_kcal_MET = data['test']
            
            all_dimensions = har_dimensions
            activities_map = har_activities_map
        
        train_embeddings = []
        test_embeddings = []
        train_ind = train_ids
        test_ind = test_ids
        
        print('----------------------------------------------------')
        print(X_train.shape)
        print('----------------------------------------------------')
        
        ind_std_train = idsStd(train_ids , X_train, I_train)
        ind_std_test = idsStd(test_ids, X_test, I_test)
        
        
        if Z_SCORE_NORM:
            N_tr, T, D = X_train.shape
            N_te, T, D = X_test.shape
            X_train_sh = np.zeros(X_train.shape)
            X_test_sh = np.zeros(X_test.shape)
            for i in range(N_tr):
                
                for k in range(D):
                    mag = np.mean(X_train[i, :, k], axis = 0)
                    indice = np.where(train_ind ==  I_train[i])[0][0]
                    std = ind_std_train[indice][k] * 6
                    X_train_sh[i, :, k] = (X_train[i, :, k] - mag) / std
                    # X_train_sh[i, :, k] = np.concatenate([[0], fft(X_train[i, :, k])[1:]])

            for i in range(N_te):
                for k in range(D):
                    mag = np.mean(X_test[i, :, k], axis = 0)
                    indice = np.where(test_ind ==  I_test[i])[0][0]
                    std = ind_std_test[indice][k] * 6
                    X_test_sh[i, :, k] = (X_test[i, :, k] - mag) / std
                    # X_test_sh[i, :, k] = fft(X_test[i, :, k])[0:]
        else:
            X_train_sh = np.copy(X_train)
            X_test_sh = np.copy(X_test)
        
        X_train = X_train_sh
        X_test = X_test_sh
        
        
        names_comb = []
        embeddings_comb = []
        
        for i, combo in enumerate(powerset(list(range(len(N_DIMS_NAMES)))), 1):
            indexes = list(combo)
            name = ''
            dimensions = []
            
            if len(indexes) == 0:
                continue
            for ind in indexes:
                name = name + ' ' + N_DIMS_NAMES[ind]
                dimensions = dimensions + N_DIMENSIONS[ind]
            
            names_comb.append(name)
            
            X_train_f = filter_dimensions(X_train, all_dimensions, dimensions)
            X_test_f = filter_dimensions(X_test, all_dimensions, dimensions)

            mts_train = TSerie(X = X_train_f, y = y_train, I = I_train, dimensions = dimensions, classLabels=activities_map)
            mts_test = TSerie(X = X_test_f, y = y_test, I = I_test, dimensions = dimensions, classLabels=activities_map)
            
            mts_train.folding_features_v2()
            mts_test.folding_features_v2()
            
            n_comp = int(N_COMPONENTS * len(dimensions) / 3)
            print('----------')
            print('N components: ', n_comp)
            reducer = umap.UMAP(n_components= n_comp, metric=METRIC)
            
            embeddings_train = reducer.fit_transform(mts_train.features, y = mts_train.y)
            embeddings_test = reducer.transform(mts_test.features)
            
            embeddings_comb.append((embeddings_train, embeddings_test))
            
            
            # Save visualizations of embeddings
            
            reducer2D = umap.UMAP(n_components=2, metric=METRIC)
            coords_train = reducer2D.fit_transform(mts_train.features, y = mts_train.y)
            coords_test = reducer2D.transform(mts_test.features)
            
            indMapTrain = {int(ind): 'sub_' + str(ind) for ind in np.unique(mts_train.I)}
            indMapTest = {int(ind): 'sub_' + str(ind) for ind in np.unique(mts_test.I)}
            
            storage.add_mts(
                '{}_{}_train_{}'.format(n_comp, name, ' '.join([str(idd) for idd in ids_test]) ), X_train_f, 
                dimensions,
                coords={'umap': coords_train}, 
                labels={'activities': mts_train.y, 'participants': mts_train.I}, 
                labelsNames={'activities': activities_map, 'participants': indMapTrain},
                sampling = True,
                n_samples = 400
            )
            
            storage.add_mts(
                '{}_{}_test_{}'.format(n_comp, name, ' '.join([str(idd) for idd in ids_test])), X_test_f, 
                dimensions,
                coords={'umap': coords_test}, 
                labels={'activities': mts_test.y, 'participants': mts_test.I}, 
                labelsNames={'activities': activities_map, 'participants': indMapTest},
                sampling = True,
                n_samples = 400
            )
            storage.save()
            print('VIZ SAVED')
        
        print(names_comb)
        # print(embeddings_comb)
        
        for j in range(len(names_comb)):
            name = names_comb[j]
            clf = AdaBoostClassifier()
            # clf = svm.SVC()
            train_feat, test_feat = embeddings_comb[j]
            clf.fit(train_feat, mts_train.y)
            pred_train, pred_test = clf.predict(train_feat), clf.predict(test_feat)
            
            f1_tr = metrics.f1_score(mts_train.y, pred_train, average='weighted')
            f1_te = metrics.f1_score(mts_test.y, pred_test, average='weighted')
            
            if name not in components_map:
                components_map[name] = [(f1_tr, f1_te)]
            else:
                components_map[name] = components_map[name] + [(f1_tr, f1_te)]
    
    results_map[N_COMPONENTS] = components_map
            
        

IDS: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Train IDS: [2, 3, 4, 5, 6, 7, 8, 9]
Test IDS: [0, 1]
Val IDS: []
----------------------------------------------------
(6300, 200, 9)
----------------------------------------------------
Loaded mts - N: 6300, T: 200, D: 3 
Loaded mts - N: 2100, T: 200, D: 3 
Features shape: (6300, 600)
Features shape: (2100, 600)
----------
N components:  32


OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


VIZ SAVED
Loaded mts - N: 6300, T: 200, D: 3 
Loaded mts - N: 2100, T: 200, D: 3 
Features shape: (6300, 600)
Features shape: (2100, 600)
----------
N components:  32
VIZ SAVED
Loaded mts - N: 6300, T: 200, D: 6 
Loaded mts - N: 2100, T: 200, D: 6 
Features shape: (6300, 1200)
Features shape: (2100, 1200)
----------
N components:  64
VIZ SAVED
[' Acc', ' Gyro', ' Acc Gyro']


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


IDS: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Train IDS: [0, 3, 4, 5, 6, 7, 8, 9]
Test IDS: [1, 2]
Val IDS: []
----------------------------------------------------
(6300, 200, 9)
----------------------------------------------------
Loaded mts - N: 6300, T: 200, D: 3 
Loaded mts - N: 2100, T: 200, D: 3 
Features shape: (6300, 600)
Features shape: (2100, 600)
----------
N components:  32
VIZ SAVED
Loaded mts - N: 6300, T: 200, D: 3 
Loaded mts - N: 2100, T: 200, D: 3 
Features shape: (6300, 600)
Features shape: (2100, 600)
----------
N components:  32
VIZ SAVED
Loaded mts - N: 6300, T: 200, D: 6 
Loaded mts - N: 2100, T: 200, D: 6 
Features shape: (6300, 1200)
Features shape: (2100, 1200)
----------
N components:  64
VIZ SAVED
[' Acc', ' Gyro', ' Acc Gyro']


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


IDS: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Train IDS: [0, 1, 4, 5, 6, 7, 8, 9]
Test IDS: [2, 3]
Val IDS: []
----------------------------------------------------
(6300, 200, 9)
----------------------------------------------------
Loaded mts - N: 6300, T: 200, D: 3 
Loaded mts - N: 2100, T: 200, D: 3 
Features shape: (6300, 600)
Features shape: (2100, 600)
----------
N components:  32
VIZ SAVED
Loaded mts - N: 6300, T: 200, D: 3 
Loaded mts - N: 2100, T: 200, D: 3 
Features shape: (6300, 600)
Features shape: (2100, 600)
----------
N components:  32
VIZ SAVED
Loaded mts - N: 6300, T: 200, D: 6 
Loaded mts - N: 2100, T: 200, D: 6 
Features shape: (6300, 1200)
Features shape: (2100, 1200)
----------
N components:  64
VIZ SAVED
[' Acc', ' Gyro', ' Acc Gyro']


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


IDS: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Train IDS: [2, 3, 4, 5, 6, 7, 8, 9]
Test IDS: [0, 1]
Val IDS: []
----------------------------------------------------
(6300, 200, 9)
----------------------------------------------------
Loaded mts - N: 6300, T: 200, D: 3 
Loaded mts - N: 2100, T: 200, D: 3 
Features shape: (6300, 600)
Features shape: (2100, 600)
----------
N components:  64
VIZ SAVED
Loaded mts - N: 6300, T: 200, D: 3 
Loaded mts - N: 2100, T: 200, D: 3 
Features shape: (6300, 600)
Features shape: (2100, 600)
----------
N components:  64
VIZ SAVED
Loaded mts - N: 6300, T: 200, D: 6 
Loaded mts - N: 2100, T: 200, D: 6 
Features shape: (6300, 1200)
Features shape: (2100, 1200)
----------
N components:  128
VIZ SAVED
[' Acc', ' Gyro', ' Acc Gyro']


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


IDS: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Train IDS: [0, 3, 4, 5, 6, 7, 8, 9]
Test IDS: [1, 2]
Val IDS: []
----------------------------------------------------
(6300, 200, 9)
----------------------------------------------------
Loaded mts - N: 6300, T: 200, D: 3 
Loaded mts - N: 2100, T: 200, D: 3 
Features shape: (6300, 600)
Features shape: (2100, 600)
----------
N components:  64
VIZ SAVED
Loaded mts - N: 6300, T: 200, D: 3 
Loaded mts - N: 2100, T: 200, D: 3 
Features shape: (6300, 600)
Features shape: (2100, 600)
----------
N components:  64
VIZ SAVED
Loaded mts - N: 6300, T: 200, D: 6 
Loaded mts - N: 2100, T: 200, D: 6 
Features shape: (6300, 1200)
Features shape: (2100, 1200)
----------
N components:  128
VIZ SAVED
[' Acc', ' Gyro', ' Acc Gyro']


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


IDS: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Train IDS: [0, 1, 4, 5, 6, 7, 8, 9]
Test IDS: [2, 3]
Val IDS: []
----------------------------------------------------
(6300, 200, 9)
----------------------------------------------------
Loaded mts - N: 6300, T: 200, D: 3 
Loaded mts - N: 2100, T: 200, D: 3 
Features shape: (6300, 600)
Features shape: (2100, 600)
----------
N components:  64
VIZ SAVED
Loaded mts - N: 6300, T: 200, D: 3 
Loaded mts - N: 2100, T: 200, D: 3 
Features shape: (6300, 600)
Features shape: (2100, 600)
----------
N components:  64
VIZ SAVED
Loaded mts - N: 6300, T: 200, D: 6 
Loaded mts - N: 2100, T: 200, D: 6 
Features shape: (6300, 1200)
Features shape: (2100, 1200)
----------
N components:  128
VIZ SAVED
[' Acc', ' Gyro', ' Acc Gyro']


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


IDS: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Train IDS: [2, 3, 4, 5, 6, 7, 8, 9]
Test IDS: [0, 1]
Val IDS: []
----------------------------------------------------
(6300, 200, 9)
----------------------------------------------------
Loaded mts - N: 6300, T: 200, D: 3 
Loaded mts - N: 2100, T: 200, D: 3 
Features shape: (6300, 600)
Features shape: (2100, 600)
----------
N components:  128
VIZ SAVED
Loaded mts - N: 6300, T: 200, D: 3 
Loaded mts - N: 2100, T: 200, D: 3 
Features shape: (6300, 600)
Features shape: (2100, 600)
----------
N components:  128
VIZ SAVED
Loaded mts - N: 6300, T: 200, D: 6 
Loaded mts - N: 2100, T: 200, D: 6 
Features shape: (6300, 1200)
Features shape: (2100, 1200)
----------
N components:  256
VIZ SAVED
[' Acc', ' Gyro', ' Acc Gyro']


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


IDS: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Train IDS: [0, 3, 4, 5, 6, 7, 8, 9]
Test IDS: [1, 2]
Val IDS: []
----------------------------------------------------
(6300, 200, 9)
----------------------------------------------------
Loaded mts - N: 6300, T: 200, D: 3 
Loaded mts - N: 2100, T: 200, D: 3 
Features shape: (6300, 600)
Features shape: (2100, 600)
----------
N components:  128
VIZ SAVED
Loaded mts - N: 6300, T: 200, D: 3 
Loaded mts - N: 2100, T: 200, D: 3 
Features shape: (6300, 600)
Features shape: (2100, 600)
----------
N components:  128
VIZ SAVED
Loaded mts - N: 6300, T: 200, D: 6 
Loaded mts - N: 2100, T: 200, D: 6 
Features shape: (6300, 1200)
Features shape: (2100, 1200)
----------
N components:  256
VIZ SAVED
[' Acc', ' Gyro', ' Acc Gyro']


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


IDS: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Train IDS: [0, 1, 4, 5, 6, 7, 8, 9]
Test IDS: [2, 3]
Val IDS: []
----------------------------------------------------
(6300, 200, 9)
----------------------------------------------------
Loaded mts - N: 6300, T: 200, D: 3 
Loaded mts - N: 2100, T: 200, D: 3 
Features shape: (6300, 600)
Features shape: (2100, 600)
----------
N components:  128
VIZ SAVED
Loaded mts - N: 6300, T: 200, D: 3 
Loaded mts - N: 2100, T: 200, D: 3 
Features shape: (6300, 600)
Features shape: (2100, 600)
----------
N components:  128
VIZ SAVED
Loaded mts - N: 6300, T: 200, D: 6 
Loaded mts - N: 2100, T: 200, D: 6 
Features shape: (6300, 1200)
Features shape: (2100, 1200)
----------
N components:  256
VIZ SAVED
[' Acc', ' Gyro', ' Acc Gyro']


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [3]:
# results_map
# N_COMPONENTS_LIST  = [32]
results_map

{32: {' Acc': [(0.4516731756242877, 0.4015983383082013),
   (0.5082319198668502, 0.4235643274687192),
   (0.453816460527926, 0.45306883869398307)],
  ' Gyro': [(0.7623354721879803, 0.49693082750572676),
   (0.34062746197691735, 0.30195283268583395),
   (0.6189582637886307, 0.4072825984118079)],
  ' Acc Gyro': [(0.6576563324708304, 0.49750543931264757),
   (0.6396236240070627, 0.39008987007794244),
   (0.33161478683791484, 0.29153402143981133)]},
 64: {' Acc': [(0.4693371443916082, 0.31737124648532017),
   (0.6593793629292395, 0.40806688412618825),
   (0.18162038206255202, 0.18090746388257567)],
  ' Gyro': [(0.80581552595177, 0.48262065767480605),
   (0.5113736773136923, 0.33893095820139985),
   (0.6166169745977387, 0.4772600196284271)],
  ' Acc Gyro': [(0.4588618949218736, 0.3442218832332953),
   (0.5732328858589028, 0.35941831327871987),
   (0.3791949187374127, 0.3254876128822025)]},
 128: {' Acc': [(0.814978118913221, 0.6204212988811901),
   (0.5983367276119792, 0.4758555395447992),


In [4]:
import csv

path = 'teste.csv'
with open(path, 'w', newline='') as csvfile:
    row = ['Sensors']
    row = row + [str(com) for com in N_COMPONENTS_LIST]
    spamwriter = csv.writer(csvfile, delimiter=',',
                            quoting=csv.QUOTE_MINIMAL)
    spamwriter.writerow(row)
    for name in names_comb:
        row = [name]
        for comp in N_COMPONENTS_LIST:
            mean_tr = np.array([ f1[0] for f1 in results_map[comp][name]]).mean()
            stds_tr = np.array([ f1[0] for f1 in results_map[comp][name]]).std()
            mean_te = np.array([ f1[1] for f1 in results_map[comp][name]]).mean()
            stds_te = np.array([ f1[1] for f1 in results_map[comp][name]]).std()
            
            cell_str = '{:.3f} ({:.3f}) - {:.3f} ({:.3f})'.format(mean_tr, stds_tr, mean_te, stds_te)
            row.append(cell_str)
            
        spamwriter.writerow(row)
            