In [1]:
%load_ext autoreload
%autoreload 2

from source.read_HAR_dataset import read_har_dataset, har_dimensions, har_activities, har_activities_map, har_ind_IDS
from source.utils import  filter_dimensions
from source.tserie import TSerie
from source.utils import classify_dataset
from itertools import chain, combinations
from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn import svm
from source.utils import idsStd
from sklearn import metrics
import numpy as np
from sklearn import svm
import umap
from imblearn.under_sampling import RandomUnderSampler
from source.augmentation import  * 
# from cuml.datasets import make_blobs
# from cuml.neighbors import NearestNeighbors
# from cuml.manifold import UMAP
# from cuml.cluster import DBSCAN


sys.path.insert(0, '/home/texs/Documentos/Repositories/mts_viz')
from server.source.storage import MTSStorage

def powerset(iterable):
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)  # allows duplicate elements
    return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))

NORM = 0 # 0: No normalization, 1: centering 2: z_score_norm
DATASET = 'HAR-UML20'
KFOLDS = 1
N_TESTS = 2
# EPOCHS = 15
EPOCHS = 10
FEATURE_SIZE = 512
ENCODING_SIZE = 8
METRIC  = 'braycurtis'
RESULTS_PATH = 'outputs/augmentation/'
# AUGMENTATIONS = ['rotation', 'permutation', 'time_warp', 'magnitude_warp', 'scaling', 'jitter']
AUGMENTATIONS = ['none']
# AUGMENTATIONS = ['none', 'rotation', 'permutation', 'time_warp', 'magnitude_warp', 'scaling', 'jitter']
# AUGMENTATIONS = ['none']
# AUGMENTATIONS = ['scaling']
ALL_AUGMENTATIONS = ['none', 'rotation', 'permutation', 'time_warp', 'magnitude_warp', 'scaling', 'jitter']
# ALL_AUGMENTATIONS = ['none']
# AUGMENTATIONS = ['magnitude_warp']
REPEATS_PER_AUGMENTATION = 1
INCLUDE_ORIGINAL = False
# N_DIMS_NAMES = ['Acc', 'Gyro', 'Mag']
N_DIMS_NAMES = ['Acc', 'Gyro']
# N_DIMS_NAMES = ['Acc']
N_DIMENSIONS = [
    [
        'Accelerometer-X',	
        'Accelerometer-Y',	
        'Accelerometer-Z',
    ],
    [
        'Gyrometer-X',
        'Gyrometer-Y',
        'Gyrometer-Z',
    ],
    # [
    #     'Magnetometer-X',
    #     'Magnetometer-Y',
    #     'Magnetometer-Z'
    # ]
]

firstTimeSave = True

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class FeatureExtractor:
    def __init__(self, epochs = 100, batch_size = 32, loss_metric = 'SimCLR', encoding_size = 8, mode = 'subsequences'):
        self.model = None
        self.device = None
        self.epochs = epochs
        self.batch_size = batch_size
        self.loss_metric = loss_metric
        self.encoding_size = encoding_size
        self.mode = mode

    def fit_transform(self, X, y=None):
        embeddings, self.model, self.device =  getContrastiveFeatures(X.transpose([0, 2, 1]), y,
                epochs = self.epochs, 
                loss_metric=self.loss_metric, 
                feat_size=FEATURE_SIZE, 
                encoding_size=ENCODING_SIZE,
                mode=self.mode,
        )
        print(X.shape)
        return embeddings
    
    def transform(self, X):
        print(X.shape)
        return self.model.encode(X.transpose([0, 2, 1]), self.device)


def augmentData(X, y, augmentation, repeat = 3):
    X_out = []
    y_out = []
    for i in range(repeat):
        if augmentation == 'rotation':
            augmented = rotation(X, angle_range=[-np.pi/4, np.pi/4])
            # augmented = rotation(X, angle_range=[-np.pi/64, np.pi/64])
        elif augmentation == 'permutation':
            augmented = permutation(X)
        elif augmentation == 'time_warp':
            augmented = time_warp(X, sigma=0.03)
        elif augmentation == 'magnitude_warp':
            augmented = magnitude_warp(X, sigma=0.04, knot=4)
        elif augmentation == 'scaling':
            augmented = scaling(X, sigma=0.05)
        elif augmentation == 'jitter':
            augmented = jitter(X, sigma=0.01)
        else:
            augmented = X.copy()
        if len(X_out) == 0:
            X_out = augmented
            y_out = y.copy()
        else:
            X_out = np.concatenate((X_out, augmented), axis=0)
            y_out = np.concatenate((y_out, y), axis=0)
    return X_out, y_out

def augment(X, y, augmentations, repeats_per_augmentation=1, include_original=False):
    X_aug = []
    y_aug = []
    if include_original:
        X_aug = X.copy()
        y_aug = y.copy()
    for augmentation in augmentations:
        curr_X_aug, curr_y_aug = augmentData(X, y, augmentation, repeat=repeats_per_augmentation)
        if len(X_aug) == 0:
            X_aug = curr_X_aug
            y_aug = curr_y_aug
        else:
            X_aug = np.concatenate((X_aug, curr_X_aug), axis=0)
            y_aug = np.concatenate((y_aug, curr_y_aug), axis=0)
    return X_aug, y_aug
        
def minoritySampling(X, y):
    rus = RandomUnderSampler(sampling_strategy='not minority', random_state=1)
    N, T, D = X.shape
    X_temp = X.reshape([N, T * D])
    X_temp, y = rus.fit_resample(X_temp, y)
    X = X_temp.reshape([X_temp.shape[0], T, D])
    return X, y


activities_map = {
    0: "Sedentary",
    1: "Walking",
    2: "Running",
    3: "Downstairs",
    4: "Upstairs"
}
all_dimensions = har_dimensions

def load_data(k):
    all_ids = har_ind_IDS
    test_ids = all_ids[k: k + N_TESTS]
    train_ids = all_ids[:k] + all_ids[k + N_TESTS:]        
    
    data = read_har_dataset('./datasets/HAR-UML20/', train_ids=train_ids, test_ids=test_ids, val_ids=[], cache=True)
    ids_train, X_train, y_train, I_train, train_kcal_MET = data['train']
    # ids_val, X_val, y_val, I_val, val_kcal_MET = data['val']
    ids_test, X_test, y_test, I_test, test_kcal_MET = data['test']
    
    

    all_dimensions = har_dimensions
    activities_map = har_activities_map
    
    y_train[y_train==0] = 0
    y_train[y_train==1] = 0
    y_train[y_train==2] = 0
    y_test[y_test==0] = 0
    y_test[y_test==1] = 0
    y_test[y_test==2] = 0

    for i in range(3, len(har_activities)):
        y_train[y_train==i] = i - 2
        y_test[y_test==i] = i - 2
    
    ind_std_train = idsStd(train_ids , X_train, I_train)
    ind_std_test = idsStd(test_ids, X_test, I_test)
    
    unique, counts = np.unique(y_train, return_counts=True)
    unique, counts = np.unique(y_test, return_counts=True)
    
    I_train = np.expand_dims(I_train, axis=1)
    I_test = np.expand_dims(I_test, axis=1)
    ltrain = np.arange(len(y_train))
    ltest = np.arange(len(y_test))
    
    X_train, zlabels_train = minoritySampling(X_train, ltrain)
    X_test, zlabels_test = minoritySampling(X_test, ltest)
    
    y_train = y_train[ltrain]
    I_train = I_train[ltrain]
    y_test = y_test[ltest]
    I_test = I_test[ltest]
    
    return X_train, y_train, I_train, X_test, y_test, I_test

In [3]:
from source.torch_utils import getContrastiveFeatures
import torch

storage = MTSStorage('har_augmentations')
storage.delete()
storage.load()


components_map = {}

for k in range(KFOLDS):
    
    print('FOLD: {}'.format(k))
    # ------------------------ Reading the dataset ------------------------
    X_train, y_train, I_train, X_test, y_test, I_test = load_data(k+3)
    # ---------------------------------------------------------------------

    
    # --------------------------------AugmentData ------------------------------------
    all_train_mts = TSerie(X = X_train, y = y_train, I = I_train, classLabels = activities_map)
    all_test_mts = TSerie(X = X_test, y = y_test, I = I_test, classLabels = activities_map)
    # --------------------------------------------------------------------------------
    
    # minl, maxl = all_train_mts.minMaxNormalization()
    # all_test_mts.minMaxNormalization(minl=minl, maxl=maxl)
    
    
    X_train, y_train = augment(all_train_mts.X, y_train, repeats_per_augmentation = REPEATS_PER_AUGMENTATION, augmentations = AUGMENTATIONS, include_original = INCLUDE_ORIGINAL)
    # all_train_mts.X
    X_test = all_test_mts.X
    
    additional = 1 if INCLUDE_ORIGINAL else 0
    # X_train = np.repeat(X_train, REPEATS_PER_AUGMENTATION * len(AUGMENTATIONS) + additional, axis=0)
    # y_train = np.repeat(y_train, REPEATS_PER_AUGMENTATION * len(AUGMENTATIONS) + additional, axis=0)
    I_train = np.repeat(I_train, REPEATS_PER_AUGMENTATION * len(AUGMENTATIONS) + additional, axis=0)
    # X_train = np.concatenate([X_train, X_train], axis=0)
    # y_train = np.concatenate([y_train, y_train], axis=0)
    # I_train = np.concatenate([I_train, I_train], axis=0)
    
    
    # -----------------------------------------------------------------------------------------------

    
    train_embeddings = []
    test_embeddings = []
    for t in range(len(N_DIMENSIONS)):    
        dimensions = N_DIMENSIONS[t]
        X_train_f = filter_dimensions(X_train, all_dimensions, dimensions)
        X_test_f = filter_dimensions(X_test, all_dimensions, dimensions)
        
        mts_train = TSerie(X = X_train_f, y = y_train, I = I_train, dimensions = dimensions, classLabels=activities_map)
        mts_test = TSerie(X = X_test_f, y = y_test, I = I_test, dimensions = dimensions, classLabels=activities_map)
        
        if NORM == 1:
            mts_train.center()
            mts_test.center()
        elif NORM == 2:
            mts_train.znorm()
            mts_test.znorm()
        
        test_tranformations = [
            augment(X_test_f.copy(), y_test, repeats_per_augmentation = 1, augmentations = [ALL_AUGMENTATIONS[i]], include_original = False)
            for i in range(len(ALL_AUGMENTATIONS))
        ]
        
        print('LEN')
        print(len(test_tranformations))
        
        for i in range(len(test_tranformations)):
            test_tranformations[i] = test_tranformations[i][0]
            # test_tranformations[i] = test_tranformations[i].transpose([0, 2, 1])
                
        
        reducer = FeatureExtractor(epochs = EPOCHS, loss_metric='SupConLoss')        
        # reducer = FeatureExtractor(epochs = EPOCHS, loss_metric='SimCLR')        
        
        embeddings_train = reducer.fit_transform(mts_train.X, mts_train.y)
        test_embes = []
        for test_tr in test_tranformations:
            mts_aug = TSerie(X = test_tr, y = y_test, I = I_test, dimensions = dimensions, classLabels=activities_map)
            if NORM == 1:
                mts_aug.center()
            elif NORM == 2:
                mts_aug.znorm()
            
            embed = reducer.transform(mts_aug.X)
            test_embes.append(embed)
        
        
        test_map={}
        for i in range(len(ALL_AUGMENTATIONS)):
             test_map[ALL_AUGMENTATIONS[i]] = test_embes[i]
        
        # ['none', 'rotation', 'permutation', 'time_warp', 'magnitude_warp', 'scaling', 'jitter']
        train_embeddings.append(embeddings_train)
        test_embeddings.append(test_map)
        
        reducer = None
        torch.cuda.empty_cache()
        
        mts_train.features = embeddings_train

        # reducer = umap.UMAP(n_components=2, metric=METRIC, n_neighbors=n_neighbors)
        # coords_train = reducer.fit_transform(mts_train.features, y=mts_train.y)
        
    
    names_comb = []
    embeddings_comb = []
    for i, combo in enumerate(powerset(list(range(len(N_DIMS_NAMES)))), 1):
        indexes = list(combo)
        name = ''
        train_embedding = []
        test_embedding = {}
        if len(indexes) == 0:
            continue
        for ind in indexes:
            name = name + ' ' + N_DIMS_NAMES[ind]
            if len(train_embedding) == 0:
                train_embedding = train_embeddings[ind]
                for aug in ALL_AUGMENTATIONS:
                    test_embedding[aug] = test_embeddings[ind][aug]
            else:
                train_embedding = np.concatenate([train_embedding, train_embeddings[ind]], axis=1)
                for aug in ALL_AUGMENTATIONS:
                    test_embedding[aug] = np.concatenate([test_embedding[aug], test_embeddings[ind][aug]], axis=1)        
        names_comb.append(name)
        embeddings_comb.append((train_embedding, test_embedding))
    
    print('Classifying')
    for j in range(len(names_comb)):
        name = names_comb[j]
        # clf = AdaBoostClassifier()
        # clf = XGBClassifier()
        # clf = XGBClassifier(tree_method='gpu_hist', predictor='gpu_predictor')
        clf = svm.SVC()
        train_feat, test_feat_map = embeddings_comb[j]
        clf.fit(train_feat, mts_train.y)
        
        pred_train = clf.predict(train_feat)
        f1_tr = metrics.f1_score(mts_train.y, pred_train, average='weighted')
        
        f1_scores = [f1_tr]
        
        for aug in ALL_AUGMENTATIONS:
            test_feat = test_feat_map[aug]
            pred_test = clf.predict(test_feat)
            f1_te = metrics.f1_score(y_test, pred_test, average='weighted')
            f1_scores.append(f1_te)
        
        if name not in components_map:
            components_map[name] = [f1_scores]
        else:
            components_map[name] = components_map[name] + [f1_scores]
    print('Classifying done')
    


        
    

FOLD: 0
Train IDS: [2, 3, 4, 5, 6, 7, 8, 9]
Test IDS: [0, 1]
Val IDS: []
Loading dataset from cache...


  warp = np.concatenate(np.random.permutation(splits)).ravel()


LEN
7
Subsequence length: 180
Epoch[1] Train loss    avg: 3.935458023536024
Epoch[10] Train loss    avg: 3.4930665831880523
(6300, 200, 3)
(2100, 200, 3)
(2100, 200, 3)
(2100, 200, 3)
(2100, 200, 3)
(2100, 200, 3)
(2100, 200, 3)
(2100, 200, 3)
LEN
7
Subsequence length: 180
Epoch[1] Train loss    avg: 3.881516998794478
Epoch[10] Train loss    avg: 3.4603446788594203
(6300, 200, 3)
(2100, 200, 3)
(2100, 200, 3)
(2100, 200, 3)
(2100, 200, 3)
(2100, 200, 3)
(2100, 200, 3)
(2100, 200, 3)
Classifying


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Classifying done


In [4]:
import csv

path = os.path.join(RESULTS_PATH, 'har_kfold_{}.csv'.format('_'.join(AUGMENTATIONS)))
with open(path, 'w', newline='') as csvfile:
    row = ['Sensors', 'f1 train', 'f1 test']
    spamwriter = csv.writer(csvfile, delimiter=',',
                            quoting=csv.QUOTE_MINIMAL)
    spamwriter.writerow(row)
    for name in names_comb:
        row = [name]
        f1_mean_tr = np.array([ f1[0] for f1 in components_map[name]]).mean()
        f1_stds_tr = np.array([ f1[0] for f1 in components_map[name]]).std()
        f1_mean_te = np.array([ f1[1] for f1 in components_map[name]]).mean()
        f1_stds_te = np.array([ f1[1] for f1 in components_map[name]]).std()
        f1_mean_te_rot = np.array([ f1[2] for f1 in components_map[name]]).mean()
        f1_stds_te_rot = np.array([ f1[2] for f1 in components_map[name]]).std()
        f1_mean_te_per = np.array([ f1[3] for f1 in components_map[name]]).mean()
        f1_stds_te_per = np.array([ f1[3] for f1 in components_map[name]]).std()
        f1_mean_te_tim = np.array([ f1[4] for f1 in components_map[name]]).mean()
        f1_stds_te_tim = np.array([ f1[4] for f1 in components_map[name]]).std()
        f1_mean_te_mag = np.array([ f1[5] for f1 in components_map[name]]).mean()
        f1_stds_te_mag = np.array([ f1[5] for f1 in components_map[name]]).std()
        f1_mean_te_sca = np.array([ f1[6] for f1 in components_map[name]]).mean()
        f1_stds_te_sca = np.array([ f1[6] for f1 in components_map[name]]).std()
        f1_mean_te_jit = np.array([ f1[7] for f1 in components_map[name]]).mean()
        f1_stds_te_jit = np.array([ f1[7] for f1 in components_map[name]]).std()
        
        row = [
            name, 
            '{:.3f} ({:.3f})'.format(f1_mean_tr, f1_stds_tr), 
            '{:.3f} ({:.3f})'.format(f1_mean_te, f1_stds_te), 
            '{:.3f} ({:.3f})'.format(f1_mean_te_rot, f1_stds_te_rot), 
            '{:.3f} ({:.3f})'.format(f1_mean_te_per, f1_stds_te_per), 
            '{:.3f} ({:.3f})'.format(f1_mean_te_tim, f1_stds_te_tim), 
            '{:.3f} ({:.3f})'.format(f1_mean_te_mag, f1_stds_te_mag), 
            '{:.3f} ({:.3f})'.format(f1_mean_te_sca, f1_stds_te_sca), 
            '{:.3f} ({:.3f})'.format(f1_mean_te_jit, f1_stds_te_jit), 
        ]
        print(row)
        spamwriter.writerow(row)
            

[' Acc', '0.998 (0.000)', '0.637 (0.000)', '0.668 (0.000)', '0.581 (0.000)', '0.612 (0.000)', '0.614 (0.000)', '0.680 (0.000)', '0.607 (0.000)']
[' Gyro', '0.999 (0.000)', '0.678 (0.000)', '0.635 (0.000)', '0.621 (0.000)', '0.614 (0.000)', '0.664 (0.000)', '0.668 (0.000)', '0.675 (0.000)']
[' Acc Gyro', '0.999 (0.000)', '0.680 (0.000)', '0.696 (0.000)', '0.606 (0.000)', '0.637 (0.000)', '0.670 (0.000)', '0.708 (0.000)', '0.646 (0.000)']
