In [1]:
%load_ext autoreload
%autoreload 2

from source.torch_utils import getContrastiveFeatures
import torch
from source.read_HAR_dataset import read_har_dataset, har_dimensions, har_activities, har_activities_map, har_ind_IDS
from source.utils import  filter_dimensions
from source.tserie import TSerie
from source.utils import classify_dataset
from itertools import chain, combinations
from sklearn.ensemble import AdaBoostClassifier
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn import svm
from source.utils import idsStd
from sklearn import metrics
import numpy as np
from sklearn import svm
import umap
from imblearn.under_sampling import RandomUnderSampler
from source.augmentation import  * 
# from cuml.datasets import make_blobs
# from cuml.neighbors import NearestNeighbors
# from cuml.manifold import UMAP
# from cuml.cluster import DBSCAN

from scipy.spatial import distance
import matplotlib.pyplot as plt


sys.path.insert(0, '/home/texs/Documentos/Repositories/mts_viz')
from server.source.storage import MTSStorage

def powerset(iterable):
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)  # allows duplicate elements
    return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))

# Z_SCORE_NORM = True
EXPERIMENTS_DIR = 'distance_experiments_cont'
NORM = 2 # 0: No normalization, 1: centering 2: z_score_norm
DATASET = 'HAR-UML20'
KFOLDS = 9
N_TESTS = 2
EPOCHS = 5
FEATURE_SIZE = 1024
ENCODING_SIZE = 16
# METRIC  = 'braycurtis'
METRIC  = 'cosine'
RESULTS_PATH = 'outputs/augmentation/'
AUGMENTATIONS = ['rotation', 'permutation', 'time_warp', 'magnitude_warp', 'scaling', 'jitter']
# AUGMENTATIONS = ['none']
# AUGMENTATIONS = ['scaling']
# ALL_AUGMENTATIONS = ['none', 'rotation', 'permutation', 'time_warp', 'magnitude_warp', 'scaling', 'jitter']
# ALL_AUGMENTATIONS = ['none', 'rotation', 'rotation', 'rotation', 'rotation', 'rotation', 'rotation']
# ALL_AUGMENTATIONS = ['none'] * 7
# AUGMENTATIONS = ['magnitude_warp']
REPEATS_PER_AUGMENTATION = 1
INCLUDE_ORIGINAL = True
# N_DIMS_NAMES = ['Acc', 'Gyro', 'Mag']
N_DIMS_NAMES = ['Acc', 'Gyro']
# N_DIMS_NAMES = ['Acc']
N_DIMENSIONS = [
    [
        'Accelerometer-X',	
        'Accelerometer-Y',	
        'Accelerometer-Z',
    ],
    [
        'Gyrometer-X',
        'Gyrometer-Y',
        'Gyrometer-Z',
    ],
    # [
    #     'Magnetometer-X',
    #     'Magnetometer-Y',
    #     'Magnetometer-Z'
    # ]
]


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class FeatureExtractor:
    def __init__(self, epochs = 100, batch_size = 32, loss_metric = 'SimCLR', encoding_size = 8, mode = 'subsequences'):
        self.model = None
        self.device = None
        self.epochs = epochs
        self.batch_size = batch_size
        self.loss_metric = loss_metric
        self.encoding_size = encoding_size
        self.mode = mode

    def fit_transform(self, X, y=None):
        embeddings, self.model, self.device =  getContrastiveFeatures(X.transpose([0, 2, 1]), y,
                epochs = self.epochs, 
                loss_metric=self.loss_metric, 
                feat_size=FEATURE_SIZE, 
                encoding_size=ENCODING_SIZE,
                mode=self.mode,
                conv_filters = [16, 16, 16], 
                conv_kernels = [5, 5, 5]
        )
        return embeddings
    
    def transform(self, X):
        print(X.shape)
        return self.model.encode(X.transpose([0, 2, 1]), self.device)


def augmentData(X, y, I, augmentation, repeat = 3):
    X_out = []
    y_out = []
    I_out = []
    for i in range(repeat):
        if augmentation == 'rotation':
            augmented = rotation(X, angle_range=[-np.pi/4, np.pi/4])
            # augmented = rotation(X, angle_range=[-np.pi/64, np.pi/64])
        elif augmentation == 'permutation':
            augmented = permutation(X)
        elif augmentation == 'time_warp':
            augmented = time_warp(X, sigma=0.03)
        elif augmentation == 'magnitude_warp':
            augmented = magnitude_warp(X, sigma=0.04, knot=4)
        elif augmentation == 'scaling':
            augmented = scaling(X, sigma=0.05)
        elif augmentation == 'jitter':
            augmented = jitter(X, sigma=0.01)
        # elif augmentation == 'magnitude_pert':
        #     augmented = magnitude_pert(X, prange=[0, 1])
        else:
            augmented = X
        if len(X_out) == 0:
            X_out = augmented
            y_out = y
            I_out = I
        else:
            X_out = np.concatenate((X_out, augmented), axis=0)
            y_out = np.concatenate((y_out, y), axis=0)
            I_out = np.concatenate((I_out, I), axis=0)
    return X_out, y_out, I_out

def augment(X, y, I, augmentations, repeats_per_augmentation=1, include_original=False):
    X_aug = []
    train_y_aug = []
    train_I_aug = []
    if include_original:
        X_aug = X
        train_y_aug = y
        train_I_aug = I
    for augmentation in augmentations:
        curr_X_aug, curr_train_y_aug, curr_train_I_aug = augmentData(X, y, I, augmentation, repeat=repeats_per_augmentation)
        if len(X_aug) == 0:
            X_aug = curr_X_aug
            train_y_aug = curr_train_y_aug
            train_I_aug = curr_train_I_aug
        else:
            X_aug = np.concatenate((X_aug, curr_X_aug), axis=0)
            train_y_aug = np.concatenate((train_y_aug, curr_train_y_aug), axis=0)
            train_I_aug = np.concatenate((train_I_aug, curr_train_I_aug), axis=0)
    return X_aug, train_y_aug, train_I_aug

def minoritySampling(X, y):
    rus = RandomUnderSampler(sampling_strategy='not minority', random_state=1)
    N, T, D = X.shape
    X_temp = X.reshape([N, T * D])
    X_temp, y = rus.fit_resample(X_temp, y)
    X = X_temp.reshape([X_temp.shape[0], T, D])
    return X, y


activities_map = {
    0: "Sedentary",
    1: "Walking",
    2: "Running",
    3: "Downstairs",
    4: "Upstairs"
}
def load_data(k):
    all_ids = har_ind_IDS
    test_ids = all_ids[k: k + N_TESTS]
    train_ids = all_ids[:k] + all_ids[k + N_TESTS:]        
    
    data = read_har_dataset('./datasets/HAR-UML20/', train_ids=train_ids, test_ids=test_ids, val_ids=[], cache=True)
    ids_train, X_train, y_train, I_train, train_kcal_MET = data['train']
    # ids_val, X_val, y_val, I_val, val_kcal_MET = data['val']
    ids_test, X_test, y_test, I_test, test_kcal_MET = data['test']
    
    

    all_dimensions = har_dimensions
    activities_map = har_activities_map
    
    y_train[y_train==0] = 0
    y_train[y_train==1] = 0
    y_train[y_train==2] = 0
    y_test[y_test==0] = 0
    y_test[y_test==1] = 0
    y_test[y_test==2] = 0

    for i in range(3, len(har_activities)):
        y_train[y_train==i] = i - 2
        y_test[y_test==i] = i - 2
    
    ind_std_train = idsStd(train_ids , X_train, I_train)
    ind_std_test = idsStd(test_ids, X_test, I_test)
    
    unique, counts = np.unique(y_train, return_counts=True)
    unique, counts = np.unique(y_test, return_counts=True)
    
    I_train = np.expand_dims(I_train, axis=1)
    I_test = np.expand_dims(I_test, axis=1)
    ltrain = np.arange(len(y_train))
    ltest = np.arange(len(y_test))
    
    X_train, zlabels_train = minoritySampling(X_train, ltrain)
    X_test, zlabels_test = minoritySampling(X_test, ltest)
    
    y_train = y_train[ltrain]
    I_train = I_train[ltrain]
    y_test = y_test[ltest]
    I_test = I_test[ltest]
    
    return X_train, y_train, I_train, X_test, y_test, I_test

In [3]:
X_train, y_train, I_train, X_test, y_test, I_test = load_data(0)

Train IDS: [2, 3, 4, 5, 6, 7, 8, 9]
Test IDS: [0, 1]
Val IDS: []
Loading dataset from cache...


In [4]:
dimensions = [
    'Accelerometer-X',	
    'Accelerometer-Y',	
    'Accelerometer-Z',
    'Gyrometer-X',
    'Gyrometer-Y',
    'Gyrometer-Z'
]

X_train = filter_dimensions(X_train, har_dimensions, dimensions)
X_test = filter_dimensions(X_test, har_dimensions, dimensions)

mts_train = TSerie(X = X_train, y = y_train, I = I_train, dimensions = dimensions, classLabels=har_activities_map)
mts_test = TSerie(X = X_test, y = y_test, I = I_test, dimensions = dimensions, classLabels=har_activities_map)


In [5]:
import os
storage = MTSStorage('har_augmentations_cont')

storage.delete()
storage.load()

mode = 0 # 0: original - 1: minMax - 2: Centered - 3: Zscore

mts_train.X = mts_train.X_o
mts_test.X = mts_test.X_o

# if mode == 1 or mode == 2 or mode == 3:
        # minl, maxl = mts_train.minMaxNormalization()
        # mts_test.minMaxNormalization(minl=minl, maxl=maxl)

train_aug, train_y_aug, train_I_aug = augment(
        mts_train.X, mts_train.y, mts_train.I,
        repeats_per_augmentation = 1,
        augmentations = ['rotation', 'permutation', 'time_warp', 'magnitude_warp', 'scaling', 'jitter'],
        include_original = False
)

test_aug, test_y_aug, test_I_aug = augment(
        mts_test.X, mts_test.y, mts_test.I,
        repeats_per_augmentation = 1,
        # augmentations = ['rotation', 'permutation', 'time_warp', 'magnitude_warp', 'scaling', 'jitter'],
        augmentations = ['none', 'none', 'none', 'none', 'none', 'none'],
        include_original = False
)



mts_aug_train = TSerie(X = train_aug, y = train_y_aug, I = train_I_aug, dimensions = dimensions, classLabels=har_activities_map)
mts_aug_test = TSerie(X = test_aug, y = test_y_aug, I = test_I_aug, dimensions = dimensions, classLabels=har_activities_map)

if mode == 2:
        mts_train.center()
        mts_test.center()
        mts_aug_train.center()
        mts_aug_test.center()
elif mode == 3:
        mts_train.znorm()
        mts_test.znorm()
        mts_aug_train.znorm()
        mts_aug_test.znorm()


reducer = FeatureExtractor(epochs = EPOCHS, loss_metric='SupConLoss')
mts_train.features = reducer.fit_transform(mts_train.X, mts_train.y)
mts_test.features = reducer.transform(mts_test.X)
mts_aug_train.features = reducer.transform(mts_aug_train.X)
mts_aug_test.features = reducer.transform(mts_aug_test.X)

umapRed = umap.UMAP(n_components=2)
train_coords = umapRed.fit_transform(mts_train.features, mts_train.y)
test_coords = umapRed.transform(mts_test.features)
train_aug_coords = umapRed.transform(mts_aug_train.features)
test_aug_coords = umapRed.transform(mts_aug_test.features)




storage.add_mts(
        'train_{}_{}'.format('all', mode),
        mts_train.X, 
        dimensions,
        coords={
                'umap': train_coords
        }, 
        labels={
                'activities': mts_train.y, 
        }, 
        labelsNames={'activities': activities_map },
        sampling = True,
        n_samples = 400
)

storage.add_mts(
        'test_{}_{}'.format('all', mode),
        mts_test.X, 
        dimensions,
        coords={
                'umap': test_coords
        }, 
        labels={
                'activities': mts_test.y, 
        }, 
        labelsNames={'activities': activities_map },
        sampling = True,
        n_samples = 400
)

storage.add_mts(
        'train_augmented_{}_{}'.format('all', mode),
        mts_aug_train.X, 
        dimensions,
        coords={
                'umap': train_aug_coords
        }, 
        labels={
                'activities': mts_aug_train.y, 
        }, 
        labelsNames={'activities': activities_map },
        sampling = True,
        n_samples = 400
)

storage.add_mts(
        'test_augmented_{}_{}'.format('all', mode),
        mts_aug_test.X, 
        dimensions,
        coords={
                'umap': test_aug_coords
        }, 
        labels={
                'activities': mts_aug_test.y, 
        }, 
        labelsNames={'activities': activities_map },
        sampling = True,
        n_samples = 400
)

storage.save()


  warp = np.concatenate(np.random.permutation(splits)).ravel()


Subsequence length: 180
Using contrastive metric!!!!!!!!!!!!
Epoch[1] Train loss    avg: 3.8824811199594875
(2100, 200, 6)
(37800, 200, 6)
(12600, 200, 6)


OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


mts shape: N: 6300 -  T: 200 - D: 6
mts shape: N: 2100 -  T: 200 - D: 6
mts shape: N: 37800 -  T: 200 - D: 6
mts shape: N: 12600 -  T: 200 - D: 6
