# Plot datasets

This notebook will visualize the data from diferent datasets (KuHar, MotionSense, and UCI-HAR). The data is without gravity and it was removed with a high-pass filter.

1. Apply DFT over dataset windows
3. Plot UMAP and T-SNE

In [1]:
from pathlib import Path  # For defining dataset Paths
import sys
Root = "../../../.."
sys.path.append("../../../..")

In [2]:
import time
import warnings
warnings.filterwarnings('ignore')

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from umap import UMAP
#from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE

import plotly.express as px
import plotly.graph_objects as go
import itertools
from itertools import combinations

# Librep imports
from librep.utils.dataset import PandasDatasetsIO          # For quick load train, test and validation CSVs
from librep.datasets.har.loaders import (
    KuHar_BalancedView20HzMotionSenseEquivalent,
    MotionSense_BalancedView20HZ,
    ExtraSensorySense_UnbalancedView20HZ,
    CHARM_BalancedView20Hz,
    WISDM_UnbalancedView20Hz,
    UCIHAR_UnbalancedView20Hz
)
from librep.datasets.multimodal import PandasMultiModalDataset, TransformMultiModalDataset, WindowedTransform
from librep.transforms.fft import FFT
from librep.transforms. stats import StatsTransform
from librep.utils.workflow import SimpleTrainEvalWorkflow, MultiRunWorkflow
from librep.estimators import RandomForestClassifier, SVC, KNeighborsClassifier
from librep.metrics.report import ClassificationReport
from librep.transforms.resampler import SimpleResampler

2022-11-15 02:56:32.104924: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-11-15 02:56:32.104944: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


# Datasets to train the manifold

In [4]:
labels_activity = {
    0: "sit",
    1: "stand",
    2: "walk",
    3: "stair up",
    4: "stair down",
    5: "run",
    6: "stair up and down",
}

classes = list(labels_activity.keys())
print(labels_activity)

labels_dataset = {
    'KuHar': 'KuHar', 
    'MotionSense': 'MotionSense',
    'ExtraSensory': 'ExtraSensory',
    'WISDM': 'WISDM',
    'UCI': 'UCI',
}

{0: 'sit', 1: 'stand', 2: 'walk', 3: 'stair up', 4: 'stair down', 5: 'run', 6: 'stair up and down'}


## Load KuHar

In [5]:
# Load KuHar, creating PandasMultiModalDatasets with the correct pre-defined windows
loader = KuHar_BalancedView20HzMotionSenseEquivalent(
    Root+"/data/views/KuHar/balanced_20Hz_motionsense_equivalent-v1", 
    download=False)
train_val_kuhar, test_kuhar = loader.load(concat_train_validation=True, label="standard activity code")
train_val_kuhar, test_kuhar

(PandasMultiModalDataset: samples=3114, features=360, no. window=6, label_columns='standard activity code',
 PandasMultiModalDataset: samples=246, features=360, no. window=6, label_columns='standard activity code')

In [6]:
train_val_kuhar.data.iloc[:,:-10]

Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gyro-z-50,gyro-z-51,gyro-z-52,gyro-z-53,gyro-z-54,gyro-z-55,gyro-z-56,gyro-z-57,gyro-z-58,gyro-z-59
0,-1.042828,7.069610,3.455672,0.858443,-13.806092,-5.950121,-0.608514,2.916913,4.881929,8.610287,...,-0.472805,0.343294,0.260441,-0.082865,0.299246,0.215924,0.322555,0.508903,-0.113054,-0.380190
1,-2.437900,-2.628449,-2.166861,-3.854701,0.527599,11.567884,6.840191,-2.435600,-3.324599,-0.796095,...,0.267483,-0.713088,0.337778,-0.016247,-0.170998,0.067450,-0.153206,-0.094137,-0.238300,0.377260
2,-0.664337,0.483465,3.090238,3.559119,3.888318,1.113559,3.656104,4.866379,0.073993,-14.136122,...,0.371043,0.478258,-0.547497,-0.010739,0.005712,-0.275884,0.048012,0.288451,0.118925,0.288595
3,-2.269775,-3.209509,-2.508545,-2.923223,-1.269270,7.864898,7.683389,2.325797,-3.086381,-3.675483,...,0.011855,0.110627,0.131676,0.085040,0.143971,0.129493,0.189543,0.057577,-0.121568,-0.037776
4,-0.487718,0.226401,0.609332,1.490074,-0.295221,-1.111420,-0.627382,0.028419,-0.823724,-0.469242,...,-0.224966,0.039142,-0.232840,0.027406,-0.153478,-0.047371,0.020793,0.186724,0.132858,0.318369
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3109,-16.047706,-10.232285,-1.393564,22.982999,13.706479,7.789294,-4.720361,-15.633462,-8.674399,-11.634360,...,-0.110008,1.042713,-1.098746,-0.203113,0.062445,0.489244,-0.153889,-0.825626,1.408920,-0.048886
3110,1.911780,-6.910686,1.625745,12.975344,12.066726,5.300426,-10.848059,-14.261385,-3.988711,-4.235381,...,-0.255121,0.348626,1.796430,-0.504031,-0.271142,-0.141103,-0.355877,0.450288,-0.041400,-0.955341
3111,3.927244,19.311380,0.403673,-14.236972,-10.454149,-4.218160,-4.307016,17.655546,12.570647,7.031853,...,-0.825732,-0.816069,-0.332729,-0.034519,0.551195,-0.635052,-1.169866,0.926100,1.036354,-0.090644
3112,-7.208894,-11.734247,13.677432,21.571222,3.191808,2.215446,-12.397302,-13.905091,-8.743007,11.580156,...,0.261531,1.145154,-0.355114,1.075646,-1.983060,-1.722173,2.191585,-0.139135,0.742427,0.765420


In [7]:
train_kuhar_X = train_val_kuhar.data.iloc[:,:-10]
train_kuhar_Y = train_val_kuhar.data['standard activity code']
tam = len(train_kuhar_Y)
train_kuhar_id_dataset = np.array(['KuHar']*tam)
train_kuhar_label = np.array([labels_activity[i] + ' - ' + labels_dataset[j] for i, j in zip(train_kuhar_Y, train_kuhar_id_dataset)])

In [8]:
test_kuhar_X = test_kuhar.data.iloc[:,:-10]
test_kuhar_Y = test_kuhar.data['standard activity code']
tam = len(test_kuhar_Y)
test_kuhar_id_dataset = np.array(['KuHar']*tam)
test_kuhar_label = np.array([labels_activity[i] + ' - ' + labels_dataset[j] for i, j in zip(test_kuhar_Y, test_kuhar_id_dataset)])

## Load MotionSense

In [9]:
loader = MotionSense_BalancedView20HZ(
    Root+"/data/views/MotionSense/balanced_20Hz_filtered", 
    download=False) 
train_val_motion, test_motion = loader.load(concat_train_validation=True, label="standard activity code")
train_val_motion, test_motion

(PandasMultiModalDataset: samples=4092, features=360, no. window=6, label_columns='standard activity code',
 PandasMultiModalDataset: samples=936, features=360, no. window=6, label_columns='standard activity code')

In [10]:
train_val_motion.data.iloc[:,540:-6]

Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,accel-z-50,accel-z-51,accel-z-52,accel-z-53,accel-z-54,accel-z-55,accel-z-56,accel-z-57,accel-z-58,accel-z-59
0,1.013198,1.213734,2.009169,-2.182658,-2.550637,-2.970507,-0.217632,0.999594,-1.246883,-1.457703,...,0.234732,0.376987,-2.081169,1.446188,-0.180460,-0.182989,0.666390,4.116260,-3.067699,-0.439384
1,3.468220,1.409920,-3.214915,-2.867658,-2.020174,1.271113,2.866810,3.731912,1.499058,-2.534589,...,-7.070344,-8.429237,-2.872219,1.445719,1.175976,1.904625,2.944915,2.950987,1.207228,0.922734
2,1.421498,0.017303,5.173924,-2.088614,-5.692213,-1.351758,3.589653,-0.080849,-0.191445,-0.111856,...,0.407130,1.247785,2.920146,1.520683,-1.583850,-1.367630,0.809506,1.110136,0.587751,5.467525
3,-0.497819,1.410787,-1.164364,0.479037,3.013279,-1.456027,1.731780,0.791390,1.319055,-0.111974,...,0.264238,0.378184,0.725342,0.523087,0.079355,0.776904,-1.858955,2.016718,0.997634,-1.641303
4,1.050593,-1.057788,-3.268201,-3.543602,-1.746914,3.277555,1.059173,2.462798,4.433639,-0.412389,...,4.262298,2.839317,-1.736072,5.396047,-0.261701,1.531596,-2.827182,3.128759,0.924363,-0.535934
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4087,-17.668994,-15.796567,13.450008,18.471052,-4.040552,1.667676,-8.170134,-11.118836,13.521934,15.424149,...,-6.841650,6.033155,3.450336,-11.016209,10.950529,2.824244,0.281256,-5.484301,1.684478,-5.569913
4088,-0.393061,27.099271,8.426433,3.189037,-1.072904,-7.403098,-13.889129,-3.711910,26.909272,0.370784,...,3.142812,-0.932790,2.929048,0.187074,0.553981,-0.055033,-0.426271,0.132747,0.651569,-1.746332
4089,0.662944,6.660645,4.749940,1.016570,1.108675,4.230326,1.817753,-13.157193,4.937717,5.632297,...,-9.620893,-2.601771,-1.346564,-6.743621,-0.734221,-7.479054,9.195418,4.792734,17.335256,-16.206342
4090,7.479763,5.519249,4.721840,-7.289376,-12.255056,10.381261,-4.060757,-1.835244,-1.076286,1.147647,...,0.005425,0.441949,14.997873,-5.182443,-6.984730,10.626246,7.581343,9.989122,-17.096584,-10.032150


In [11]:
train_val_motion.data.iloc[:,360:540]

Unnamed: 0,gyro-x-0,gyro-x-1,gyro-x-2,gyro-x-3,gyro-x-4,gyro-x-5,gyro-x-6,gyro-x-7,gyro-x-8,gyro-x-9,...,gyro-z-50,gyro-z-51,gyro-z-52,gyro-z-53,gyro-z-54,gyro-z-55,gyro-z-56,gyro-z-57,gyro-z-58,gyro-z-59
0,2.416168,1.425987,0.432724,-0.442331,-1.008685,-0.968557,-1.727375,-0.785960,0.301713,-0.265985,...,-0.000940,0.192489,-0.019042,-0.486224,-0.136334,0.208895,0.523736,-0.763385,-0.543151,0.782424
1,-0.392869,0.680290,2.331535,2.825752,3.159755,0.603375,-1.526515,-1.490733,-2.338737,-2.199652,...,0.040037,-0.545322,-1.036972,-1.589284,-0.734425,-0.297475,-0.168993,1.026077,-0.110496,-0.984670
2,3.877532,1.026625,0.482123,0.281094,0.799710,-0.169250,-0.605498,-1.113426,-2.861108,-2.588045,...,0.680082,1.321175,1.452527,-0.171931,-0.036254,-0.526024,0.032629,-0.227881,-0.063752,0.299167
3,0.024890,0.100237,-0.765744,-0.135315,0.360571,1.073031,-0.325650,-0.951654,-0.412893,0.330772,...,0.462495,0.408306,0.067837,-0.175092,-0.132694,-0.127267,-0.038517,-0.349052,0.243316,0.035424
4,0.676135,-1.723360,-0.990657,-0.941559,0.313503,0.238453,-0.321289,-0.665136,0.447474,0.180847,...,-0.023703,0.183257,0.743221,0.550107,-0.560860,-0.092277,0.532015,0.227761,0.579212,0.218732
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4087,-0.247605,-2.099500,4.699621,-2.269430,-5.143161,1.582621,0.336849,-0.324414,2.572621,4.087930,...,0.107509,-1.943026,-0.868964,-2.458795,2.723768,-2.128262,-2.784944,0.044596,-0.869363,1.245218
4088,2.128638,1.996030,-3.607358,-3.396977,1.379970,-0.079131,0.628603,1.622539,2.050493,3.398936,...,-0.360710,-0.584986,-0.425965,-0.163156,0.446713,1.328129,1.617562,1.924690,2.626158,2.448484
4089,1.024417,-1.225617,-1.827302,-0.416473,1.235665,2.614283,2.736754,1.895785,0.936654,0.496502,...,-0.146796,-0.671290,0.084034,0.031287,0.393411,1.333125,1.798631,1.044272,0.613779,-1.959926
4090,1.159540,1.912327,4.633749,3.953739,4.575195,3.220003,-0.081139,-3.831612,-3.317124,-0.587382,...,1.611007,0.880478,0.544124,-1.220530,0.903206,2.609368,-0.707003,-3.845836,2.029022,-0.999886


In [12]:
train_motion_X = pd.concat([train_val_motion.data.iloc[:,540:-6], train_val_motion.data.iloc[:,360:540]], axis=1)
train_motion_Y = train_val_motion.data['standard activity code']
tam = len(train_motion_Y)
train_motion_id_dataset = np.array(['MotionSense']*tam)
train_motion_label = np.array([labels_activity[i] + ' - ' + labels_dataset[j] for i, j in zip(train_motion_Y, train_motion_id_dataset)])

In [13]:
test_motion_X = pd.concat([test_motion.data.iloc[:,540:-6], test_motion.data.iloc[:,360:540]], axis=1)
test_motion_Y = test_motion.data['standard activity code']
tam = len(test_motion_Y)
test_motion_id_dataset = np.array(['MotionSense']*tam)
test_motion_label = np.array([labels_activity[i] + ' - ' + labels_dataset[j] for i, j in zip(test_motion_Y, test_motion_id_dataset)])

In [14]:
test_motion_X

Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gyro-z-50,gyro-z-51,gyro-z-52,gyro-z-53,gyro-z-54,gyro-z-55,gyro-z-56,gyro-z-57,gyro-z-58,gyro-z-59
0,2.392525,1.010849,-1.837046,2.382687,0.560566,1.558890,-0.536123,1.448614,-1.111429,1.088511,...,-0.063269,-0.832593,-1.022303,-1.465600,-0.889994,0.092372,0.938172,1.395964,1.854584,1.528382
1,-0.764337,-1.559951,0.768210,-0.889451,-0.986200,1.002285,1.308556,0.608890,-0.955361,0.035943,...,0.530857,-0.629762,0.447728,0.356770,-0.410738,0.255747,-0.354545,-0.178889,0.279129,-0.123669
2,2.276473,3.670672,-1.032184,-7.535935,-2.765599,3.404802,0.155799,-0.769830,-3.665570,2.033248,...,1.809926,1.371561,1.526474,-1.776912,-2.808992,-1.648125,1.527892,0.108182,0.330011,0.150026
3,-0.959420,1.098418,2.567931,1.799847,-0.305560,-2.365533,-1.567886,-2.357492,0.074104,1.559147,...,-0.609650,0.039361,0.109677,-0.138885,0.461325,1.073265,2.949285,1.302511,-0.092762,-0.591284
4,-1.875666,-0.105881,3.336611,1.758897,-0.433328,3.734179,-0.264658,-0.722431,-1.178098,-2.066781,...,-0.914891,0.572047,2.059948,2.356221,1.231093,0.896176,-0.887437,-0.932334,-0.212734,0.485264
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
931,2.004535,2.609273,-0.777566,5.790095,4.359054,-4.121035,0.958092,-0.469032,4.947858,2.126789,...,3.305324,2.149209,1.545474,-1.451223,-1.679996,0.619296,-0.576180,-3.000535,-2.343239,-2.079869
932,0.791806,1.613315,6.142404,-1.948661,0.721081,-7.445180,7.764421,-3.382841,-0.655162,-9.244234,...,-0.513408,0.059170,-0.869835,1.418709,-0.333015,2.028773,1.078272,0.475017,-0.198779,-1.514206
933,0.025046,0.818777,-9.129934,1.533859,3.172891,-0.118608,1.699464,2.342641,-5.414453,-4.136918,...,-0.248408,1.259049,-0.569404,-0.527184,-1.586541,-1.975594,-0.571789,-0.385179,0.536645,0.489831
934,1.136342,0.959323,-0.002209,0.267273,-4.759702,2.358702,0.207890,-2.672748,-3.667548,-2.131495,...,-0.667787,0.185832,-0.737232,2.638622,1.365777,-0.041777,1.050452,-0.329185,1.534976,0.465610


## Load UCI-HAR

In [15]:
loader = UCIHAR_UnbalancedView20Hz(
    Root+"/data/views/UCI-HAR/balanced_20Hz_filtered",
    download=False) 
train_val_uci, test_uci = loader.load(concat_train_validation=True, label="standard activity code")
train_val_uci, test_uci

(PandasMultiModalDataset: samples=2169, features=360, no. window=6, label_columns='standard activity code',
 PandasMultiModalDataset: samples=671, features=360, no. window=6, label_columns='standard activity code')

In [16]:
train_val_uci.data.iloc[:,:-3]

Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gyro-z-50,gyro-z-51,gyro-z-52,gyro-z-53,gyro-z-54,gyro-z-55,gyro-z-56,gyro-z-57,gyro-z-58,gyro-z-59
0,0.156566,-1.424124,-2.905373,-1.449460,-1.068465,-1.123434,-1.195326,-0.833242,1.702248,2.030561,...,0.080595,-0.112580,0.156026,0.313386,0.000744,-0.047502,-0.569485,-0.652966,0.050269,0.477185
1,-0.537229,-0.186480,1.051567,0.605424,-0.185871,3.032825,4.907514,4.161293,1.228430,-0.243705,...,0.307554,0.768617,0.822427,0.841789,0.214277,-0.155207,-0.054555,0.275682,0.461437,-0.171866
2,-2.151572,-2.340571,0.602388,2.951933,1.112684,3.037523,-0.053386,-1.381455,-0.637672,-2.754747,...,0.239563,0.095893,0.056733,-0.278544,-0.050879,0.363474,0.356592,0.452005,0.304909,-0.237935
3,-0.121271,-1.496275,-1.492793,-1.547828,-0.905671,-0.681165,-0.371455,0.291828,1.207175,1.622875,...,-0.232457,-0.074255,0.416932,0.290052,0.337902,0.202508,-0.316462,-0.537158,-0.476181,-0.320307
4,1.557988,1.860748,1.597169,2.202641,3.586844,1.869756,-1.940422,-3.407308,-3.873758,-2.844197,...,-0.143105,0.161871,0.269892,0.205870,-0.161104,-0.067245,-0.006595,-0.044617,-0.235705,0.503788
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2164,-0.026208,-0.024057,-0.196275,-0.139232,-0.018196,-0.031718,-0.064043,-0.024471,-0.033220,0.005276,...,-0.008467,-0.024507,-0.035031,-0.013156,0.002951,0.015558,0.016599,0.009087,0.002833,0.004040
2165,-0.125996,0.076052,-0.152721,-0.113567,-0.076924,0.261904,0.006202,0.149830,-0.070275,-0.137767,...,-0.117224,-0.145218,-0.158068,-0.169206,-0.138596,-0.130194,-0.112169,-0.091970,-0.074266,-0.100618
2166,-0.015568,-0.002566,-0.021435,-0.005103,-0.019504,0.006622,-0.036400,-0.016476,0.013177,0.025060,...,-0.000391,0.002910,0.002480,-0.002914,-0.003599,-0.005550,-0.008568,-0.012022,-0.003929,0.008076
2167,0.002989,-0.074446,-0.064812,-0.093166,-0.091162,-0.102577,-0.128314,-0.114664,-0.079710,-0.081442,...,-0.011717,-0.008635,0.000560,-0.016810,-0.021231,-0.084481,0.003491,0.123360,-0.004188,-0.140925


In [17]:
train_uci_X = train_val_uci.data.iloc[:,:-3]
train_uci_Y = train_val_uci.data['standard activity code']

tam = len(train_uci_Y)
train_uci_id_dataset = np.array(['UCI']*tam)
train_uci_label = np.array([labels_activity[i] + ' - ' + labels_dataset[j] for i, j in zip(train_uci_Y, train_uci_id_dataset)])

In [18]:
test_uci_X = test_uci.data.iloc[:,:-3]
test_uci_Y = test_uci.data['standard activity code']

tam = len(test_uci_Y)
test_uci_id_dataset = np.array(['UCI']*tam)
test_uci_label = np.array([labels_activity[i] + ' - ' + labels_dataset[j] for i, j in zip(test_uci_Y, test_uci_id_dataset)])

## Load WISDM

In [19]:
loader = UCIHAR_UnbalancedView20Hz(
    Root+"/data/views/WISDM/balanced_20Hz_filtered", 
    download=False) 
train_val_wisdm, test_wisdm = loader.load(concat_train_validation=True, label="standard activity code")
train_val_wisdm, test_wisdm

(PandasMultiModalDataset: samples=12305, features=360, no. window=6, label_columns='standard activity code',
 PandasMultiModalDataset: samples=2891, features=360, no. window=6, label_columns='standard activity code')

In [20]:
train_val_wisdm.data['standard activity code'].unique()

array([2, 5, 6, 0, 1])

In [21]:
train_val_wisdm.data.iloc[:,:-3]

Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gyro-z-50,gyro-z-51,gyro-z-52,gyro-z-53,gyro-z-54,gyro-z-55,gyro-z-56,gyro-z-57,gyro-z-58,gyro-z-59
0,-0.074538,-0.580834,2.338327,0.883443,-1.666145,-3.929086,-0.039061,2.076679,0.325360,-1.234530,...,-0.400735,-0.302704,-0.193205,0.021654,-0.008257,-0.392786,-0.329143,-0.074698,-0.027220,0.314905
1,1.221374,1.089056,1.585969,2.236866,-0.839452,-3.612999,-0.900370,1.857142,1.516450,-0.299742,...,-0.425415,0.143507,-0.229825,0.345166,0.294343,-0.417068,-0.172208,0.067521,0.406141,0.615971
2,1.496420,0.450270,0.064726,1.625363,-0.522916,-3.635099,2.741516,2.868284,0.081997,-1.445224,...,-0.369517,-0.031353,-0.265816,0.341863,0.039910,-0.253467,-0.607339,-0.102324,0.100644,0.608968
3,2.913555,-0.473575,1.045040,1.709677,-1.312416,-4.159398,-0.479272,2.969279,1.993486,-1.367073,...,-0.667743,-0.639873,-0.394629,0.408067,0.268658,-0.341150,-0.359142,-0.065309,-0.015974,0.365056
4,1.110426,0.817269,2.589152,3.346747,-2.516123,-5.412037,-1.846764,0.277373,2.061178,1.953869,...,-0.832488,0.009191,0.124568,-0.459501,0.421399,-0.182202,-0.406128,-0.590792,-0.174049,0.307581
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12300,0.005137,0.001071,-0.029377,0.052400,-0.027917,0.050822,0.056932,0.064868,-0.013144,-0.015911,...,0.009147,0.005244,0.009650,0.013886,0.011750,0.011679,0.010992,0.017275,0.022407,0.022379
12301,0.000525,-0.057251,-0.023739,0.020029,-0.026601,0.033515,0.008574,-0.014161,0.016582,0.028816,...,0.023069,0.020255,0.016610,0.016003,0.017558,0.014994,0.015921,0.006687,-0.006651,-0.014813
12302,0.025295,-0.001123,-0.068839,-0.051851,-0.016945,-0.054127,-0.033314,-0.046895,-0.031655,0.016124,...,-0.000458,-0.002151,0.000580,0.001983,0.004424,0.004452,0.003483,0.004856,0.002237,0.002204
12303,0.000131,-0.001482,0.021021,0.011645,0.006958,-0.006430,-0.022609,-0.016584,0.011639,0.013780,...,-0.013222,-0.015457,-0.016099,-0.015105,-0.012509,-0.012392,-0.005629,0.003152,0.003607,0.009492


In [22]:
train_wisdm_X = train_val_wisdm.data.iloc[:,:-3]
train_wisdm_Y = train_val_wisdm.data['standard activity code']

tam = len(train_wisdm_Y)
train_wisdm_id_dataset = np.array(['WISDM']*tam)
train_wisdm_label = np.array([labels_activity[i] + ' - ' + labels_dataset[j] for i, j in zip(train_wisdm_Y, train_wisdm_id_dataset)])

In [23]:
test_wisdm_X = test_wisdm.data.iloc[:,:-3]
test_wisdm_Y = test_wisdm.data['standard activity code']

tam = len(test_wisdm_Y)
test_wisdm_id_dataset = np.array(['WISDM']*tam)
test_wisdm_label = np.array([labels_activity[i] + ' - ' + labels_dataset[j] for i, j in zip(test_wisdm_Y, test_wisdm_id_dataset)])

## Load ExtraSensory

In [24]:
loader = ExtraSensorySense_UnbalancedView20HZ(
    Root+"/data/views/ExtraSensory/unbalanced_20Hz_train-gnoravity-v1", 
    download=False) 
train_val_extrasensory, test_extrasensory = loader.load(concat_train_validation=True, label="standard activity code")
train_val_extrasensory, test_extrasensory

(PandasMultiModalDataset: samples=25018, features=360, no. window=6, label_columns='standard activity code',
 PandasMultiModalDataset: samples=0, features=360, no. window=6, label_columns='standard activity code')

In [25]:
train_val_extrasensory.data.iloc[:,1:-8]

Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gyro-z-50,gyro-z-51,gyro-z-52,gyro-z-53,gyro-z-54,gyro-z-55,gyro-z-56,gyro-z-57,gyro-z-58,gyro-z-59
0,0.180988,0.166653,0.220385,0.180553,0.172057,0.193328,0.183921,0.217724,0.165645,0.188824,...,-0.011666,-0.017572,0.009088,0.004291,-0.009793,-0.015108,0.018131,0.003135,-0.001602,0.004429
1,0.213085,0.156338,0.232402,0.167833,0.199010,0.156184,0.279663,0.116217,0.145872,0.130112,...,-0.010852,-0.009537,0.017913,0.015976,-0.031632,0.006427,-0.009688,0.041417,-0.029518,0.000592
2,0.168877,0.179419,0.237898,0.153882,0.156423,0.187238,0.226915,0.190233,0.139495,0.202597,...,-0.038088,0.004451,-0.017425,0.043681,-0.019011,-0.007205,-0.006057,0.021553,0.001779,-0.021204
3,0.217478,0.166874,0.262751,0.156685,0.156064,0.183014,0.266148,0.155898,0.146684,0.195494,...,0.000372,0.006173,-0.012053,0.014293,0.009693,0.023829,-0.035405,0.011010,-0.001287,0.033465
4,0.167281,0.143630,0.183344,0.210805,0.208415,0.129704,0.211796,0.219654,0.162489,0.176982,...,-0.004364,0.000689,-0.000075,-0.002294,-0.003565,0.002105,-0.002895,0.001102,-0.002336,0.002777
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25013,0.120023,0.102523,0.182343,0.192553,0.130164,0.132290,0.170037,0.157019,0.177253,0.155428,...,-0.079363,-0.077876,-0.076816,-0.078549,-0.077157,-0.077373,-0.077243,-0.079048,-0.078291,-0.075607
25014,0.173010,0.145972,0.167845,0.168520,0.136331,0.150179,0.158759,0.166362,0.139440,0.144607,...,-0.076338,-0.074947,-0.076408,-0.079775,-0.077221,-0.076154,-0.075042,-0.076802,-0.076328,-0.077452
25015,0.157320,0.160179,0.144302,0.150981,0.160996,0.156867,0.156504,0.149133,0.151943,0.160496,...,-0.076381,-0.074026,-0.075965,-0.079001,-0.077446,-0.076203,-0.076593,-0.075095,-0.075350,-0.076196
25016,0.163902,0.146540,0.147255,0.150095,0.156038,0.157817,0.141753,0.147672,0.149712,0.149800,...,-0.075934,-0.077367,-0.076920,-0.076633,-0.076951,-0.076240,-0.074629,-0.075549,-0.075781,-0.078337


In [26]:
train_extrasensory_X = train_val_extrasensory.data.iloc[:,1:-8]
train_extrasensory_Y = train_val_extrasensory.data['standard activity code']

tam = len(train_extrasensory_Y)
train_extrasensory_id_dataset = np.array(['ExtraSensory']*tam)
train_extrasensory_label = np.array([labels_activity[i] + ' - ' + labels_dataset[j] 
for i, j in zip(train_extrasensory_Y, train_extrasensory_id_dataset)])

In [27]:
test_extrasensory_X = test_extrasensory.data.iloc[:,1:-8]
test_extrasensory_Y = test_extrasensory.data['standard activity code']

tam = len(test_extrasensory_Y)
test_extrasensory_id_dataset = np.array(['ExtraSensory']*tam)
test_extrasensory_label = np.array([labels_activity[i] + ' - ' + labels_dataset[j] 
for i, j in zip(test_extrasensory_Y, test_extrasensory_id_dataset)])

## Concatenate datasets

In [28]:
train_data_X = pd.concat([train_kuhar_X, train_motion_X, train_uci_X, train_wisdm_X, train_extrasensory_X])
train_data_y = pd.concat([train_kuhar_Y, train_motion_Y, train_uci_Y, train_wisdm_Y, train_extrasensory_Y])
train_data_id_dataset = np.concatenate([train_kuhar_id_dataset, train_motion_id_dataset, train_uci_id_dataset, train_wisdm_id_dataset, 
                                  train_extrasensory_id_dataset])
train_data_label = np.concatenate(
    [train_kuhar_label, train_motion_label, train_uci_label, train_wisdm_label, 
    train_extrasensory_label])

In [29]:
test_data_X = pd.concat([test_kuhar_X, test_motion_X, test_uci_X, test_wisdm_X, test_extrasensory_X])
test_data_y = pd.concat([test_kuhar_Y, test_motion_Y, test_uci_Y, test_wisdm_Y, test_extrasensory_Y])
test_data_id_dataset = np.concatenate([test_kuhar_id_dataset, test_motion_id_dataset, test_uci_id_dataset, test_wisdm_id_dataset, 
                                  test_extrasensory_id_dataset])
test_data_label = np.concatenate(
    [test_kuhar_label, test_motion_label, test_uci_label, test_wisdm_label, 
    test_extrasensory_label])

In [30]:
train_kuhar_X.shape, train_motion_X.shape, train_uci_X.shape, train_wisdm_X.shape

((3114, 360), (4092, 360), (2169, 360), (12305, 360))

In [31]:
train_data = train_data_X
train_data['standard activity code'] = np.array(train_data_y)
train_data['DataSet'] = np.array(train_data_id_dataset)
train_data['label'] = np.array(train_data_label)
train_data

Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gyro-z-53,gyro-z-54,gyro-z-55,gyro-z-56,gyro-z-57,gyro-z-58,gyro-z-59,standard activity code,DataSet,label
0,-1.042828,7.069610,3.455672,0.858443,-13.806092,-5.950121,-0.608514,2.916913,4.881929,8.610287,...,-0.082865,0.299246,0.215924,0.322555,0.508903,-0.113054,-0.380190,4,KuHar,stair down - KuHar
1,-2.437900,-2.628449,-2.166861,-3.854701,0.527599,11.567884,6.840191,-2.435600,-3.324599,-0.796095,...,-0.016247,-0.170998,0.067450,-0.153206,-0.094137,-0.238300,0.377260,4,KuHar,stair down - KuHar
2,-0.664337,0.483465,3.090238,3.559119,3.888318,1.113559,3.656104,4.866379,0.073993,-14.136122,...,-0.010739,0.005712,-0.275884,0.048012,0.288451,0.118925,0.288595,4,KuHar,stair down - KuHar
3,-2.269775,-3.209509,-2.508545,-2.923223,-1.269270,7.864898,7.683389,2.325797,-3.086381,-3.675483,...,0.085040,0.143971,0.129493,0.189543,0.057577,-0.121568,-0.037776,4,KuHar,stair down - KuHar
4,-0.487718,0.226401,0.609332,1.490074,-0.295221,-1.111420,-0.627382,0.028419,-0.823724,-0.469242,...,0.027406,-0.153478,-0.047371,0.020793,0.186724,0.132858,0.318369,4,KuHar,stair down - KuHar
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25013,0.120023,0.102523,0.182343,0.192553,0.130164,0.132290,0.170037,0.157019,0.177253,0.155428,...,-0.078549,-0.077157,-0.077373,-0.077243,-0.079048,-0.078291,-0.075607,1,ExtraSensory,stand - ExtraSensory
25014,0.173010,0.145972,0.167845,0.168520,0.136331,0.150179,0.158759,0.166362,0.139440,0.144607,...,-0.079775,-0.077221,-0.076154,-0.075042,-0.076802,-0.076328,-0.077452,1,ExtraSensory,stand - ExtraSensory
25015,0.157320,0.160179,0.144302,0.150981,0.160996,0.156867,0.156504,0.149133,0.151943,0.160496,...,-0.079001,-0.077446,-0.076203,-0.076593,-0.075095,-0.075350,-0.076196,1,ExtraSensory,stand - ExtraSensory
25016,0.163902,0.146540,0.147255,0.150095,0.156038,0.157817,0.141753,0.147672,0.149712,0.149800,...,-0.076633,-0.076951,-0.076240,-0.074629,-0.075549,-0.075781,-0.078337,1,ExtraSensory,stand - ExtraSensory


In [32]:
test_data = test_data_X
test_data['standard activity code'] = np.array(test_data_y)
test_data['DataSet'] = np.array(test_data_id_dataset)
test_data['label'] = np.array(test_data_label)
test_data

Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gyro-z-53,gyro-z-54,gyro-z-55,gyro-z-56,gyro-z-57,gyro-z-58,gyro-z-59,standard activity code,DataSet,label
0,-1.817547,-0.844842,-3.420241,-0.649158,-8.713420,-13.048061,0.046454,20.817153,-3.396631,-6.481073,...,0.302485,-0.478398,-0.691531,-0.525984,-0.478862,-0.257241,-1.223968,4,KuHar,stair down - KuHar
1,8.300473,-1.277434,5.480062,4.009002,-2.322498,-2.248010,2.682446,-2.853009,-1.585743,0.013649,...,0.793676,-0.399001,0.049087,-0.175016,-0.346793,-0.641922,0.460233,4,KuHar,stair down - KuHar
2,-0.198026,-1.105365,-2.081579,6.499143,5.225471,-0.733483,0.159785,-0.881280,-2.586387,-3.968907,...,-0.509164,-0.618710,-0.038927,0.259971,0.409172,0.391531,-0.156667,4,KuHar,stair down - KuHar
3,-0.137963,-2.363386,2.721758,0.512723,-4.357867,-3.255659,-0.376147,-0.317403,-2.430310,2.866916,...,0.388173,-0.286754,-0.383855,-0.224480,0.234658,0.465154,0.612124,4,KuHar,stair down - KuHar
4,4.258530,0.139909,0.092678,0.209390,-2.416187,-3.454770,-2.104965,-0.627667,3.377186,8.627660,...,-0.275851,-0.043464,-0.000114,-0.099552,-0.041100,0.212678,0.452012,4,KuHar,stair down - KuHar
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2886,0.068292,0.098343,-0.076416,-0.023492,0.086462,0.079259,0.028139,-0.022419,-0.075487,-0.037037,...,-0.062382,-0.032012,-0.139899,-0.004694,0.085539,-0.072366,0.102914,1,WISDM,stand - WISDM
2887,-0.307594,0.071253,0.417730,1.086465,0.310087,-0.287224,-0.160821,0.061372,0.334338,1.086662,...,-0.014175,-0.029097,-0.037724,-0.003698,0.017190,-0.016007,-0.031203,1,WISDM,stand - WISDM
2888,-0.021779,-0.092840,0.099584,0.072275,-0.015085,-0.114407,0.034932,0.093248,0.053823,0.025874,...,-0.000124,0.005841,0.003804,0.000856,-0.006622,-0.010246,0.000292,1,WISDM,stand - WISDM
2889,-0.187424,-0.135107,-0.025969,0.048096,0.004855,-0.018962,-0.038663,-0.024901,0.024983,0.065581,...,-0.003824,0.006343,-0.000464,0.009976,-0.060380,0.009847,0.067267,1,WISDM,stand - WISDM


In [33]:
# Features to select
features = [
    "accel-x",
    "accel-y",
    "accel-z",
    "gyro-x",
    "gyro-y",
    "gyro-z"
]

# Creating the datasets

# Train
train_data = PandasMultiModalDataset(
    train_data,
    feature_prefixes=features,
    label_columns="standard activity code",
    as_array=True
)

# Test
test_data = PandasMultiModalDataset(
    test_data,
    feature_prefixes=features,
    label_columns='standard activity code',
    as_array=True
)

In [34]:
# Create the objects

fft_transform = FFT(centered=True)

# Compose the transform
# First apply the normalizer over whole dataset and then apply FFT over each window
transformer = TransformMultiModalDataset(
    transforms=[fft_transform], new_window_name_prefix="fft."
)

In [35]:
data_umap_name = ['KuHar', 'MotionSense', 'ExtraSensory', 'WISDM', 'UCI']
# data_umap_name = ['KuHar', 'MotionSense', 'WISDM', 'UCI']

data_train_name = ['KuHar', 'MotionSense', 'ExtraSensory', 'WISDM', 'UCI']
# data_train_name = ['KuHar', 'MotionSense', 'WISDM', 'UCI']

data_test_name = ['KuHar', 'MotionSense', 'WISDM', 'UCI']

combinations_sets = { 
    'Umap': [],
    'Train': [],
    'Test': []
}

In [36]:
from itertools import combinations

tam = len(data_umap_name)
combination_umap = [list(combinations(data_umap_name, i+1)) for i in range(tam)]
combination_umap[0].insert(0, ('-',))

tam = len(data_train_name)
combination_train = [list(combinations(data_train_name, i+1)) for i in range(tam)]

combination_test = list(combinations(data_test_name, 1))
# combination_umap, comb_train, comb_test

In [37]:
un = 0
for c in combination_umap:
    un += len(c)
    
tr = 0
for c in combination_train:
    tr += len(c)
ts = 0
for c in combination_test:
    ts += len(c)
print(f'Total of combinations: {un * tr * ts}',
      f'\nTotal of combinations - umap: {un}',
      f'\nTotal of combinations - train: {tr}',
      f'\nTotal of combinations - test: {ts}')

Total of combinations: 3968 
Total of combinations - umap: 32 
Total of combinations - train: 31 
Total of combinations - test: 4


In [38]:
for comb_umap in combination_umap:
    for set_umap in comb_umap:
        for comb_train in combination_train:
            for set_train in comb_train:
                for comb_test in combination_test:
                    for set_test in comb_test:
                        if len(set_train) != 1 or set_train[0] != 'ExtraSensory':
                            combinations_sets['Umap'].append(set_umap)
                            combinations_sets['Train'].append(set_train)
                            combinations_sets['Test'].append(set_test)   

In [39]:
n = len(combinations_sets['Umap'])
print(f'Total of combinations without only ExtraSensory as train data: {n}')

Total of combinations without only ExtraSensory as train data: 3840


In [40]:
columns = ['Classifier', 'Umap', 'Train', 'Test']

metrics = ['accuracy', 'f1 score (weighted)']
stats = ['mean', 'std']
columns += [metric + ' - ' + stat
            for metric in metrics
            for stat in stats]
metrics_class = ['f1-score', 'precision', 'recall', 'support']
columns += [
    metric + ' - ' + stat + ' - ' + activity
    for metric in metrics_class
    for stat in stats
    for activity in labels_activity.values()
]
            
columns, len(columns)
df_results = {column: [] for column in columns}
results_dict = {
    'RandomForest': {}, 
    'SVC': {}, 
    'KNN': {}
}
for classifier in results_dict.keys():
    results_dict[classifier] = {
        'Umap': [],
        'Train': [],
        'Test': [],
        'result': []
    }
        
# metrics_class

In [41]:
def create_data_multimodal(data):
    # Features to select
    features = [
        "accel-x",
        "accel-y",
        "accel-z",
        "gyro-x",
        "gyro-y",
        "gyro-z"
    ]

    # Creating the datasets

    # Data
    data_multimodal = PandasMultiModalDataset(
        data,
        feature_prefixes=features,
        label_columns="standard activity code",
        as_array=True
    )

    return data_multimodal

In [42]:
def evaluate(umap, train, test, evaluators, df, results_dict, umap_name, train_name, test_name, labels_activity, metrics_class, reporter):
# The reporter will be the same

    fft_transform = FFT(centered=True)

    if umap_name == '-':
        transformer = TransformMultiModalDataset(transforms=[fft_transform], 
                                                 new_window_name_prefix="fft.")

    else:
        transformer_fft = TransformMultiModalDataset(transforms=[fft_transform], 
                                                 new_window_name_prefix="reduced.")

        umap = UMAP(n_components=10, random_state=42)
        train_fft = transformer_fft(train)

        umap.fit(train_fft[:][0])

        umap_transform = WindowedTransform(
            transform=umap, fit_on=None, transform_on="all"
        )

        transformer = TransformMultiModalDataset(transforms=[fft_transform, umap_transform], 
                                                 new_window_name_prefix="reduced.")
    train_fft = transformer(train)
    test_fft = transformer(test)

    for estimator, evaluator in evaluators.items():
        multi_run_experiment = MultiRunWorkflow(
            workflow=evaluator['experiment'], 
            num_runs=evaluator['num_runs'],
            debug=False)

        results = multi_run_experiment(train_fft, test_fft)
        results_dict[estimator]['Umap'].append(umap_name)
        results_dict[estimator]['Train'].append(train_name)
        results_dict[estimator]['Test'].append(test_name)
        results_dict[estimator]['result'].append(results)

        df['Classifier'].append(estimator)
        df['Umap'].append(umap_name)
        df['Train'].append(train_name)
        df['Test'].append(test_name)

        df['accuracy - mean'].append(
            np.average(
                [res["result"][0]["accuracy"] for res in results["runs"]]
            )
        )
        df['accuracy - std'].append(
            np.std(
                [res["result"][0]["accuracy"] for res in results["runs"]]
            )
        )
        df['f1 score (weighted) - mean'].append(
            np.average(
                [res["result"][0]["f1 score (weighted)"] for res in results["runs"]]
            )
        )
        df['f1 score (weighted) - std'].append(
            np.std(
                [res["result"][0]["f1 score (weighted)"] for res in results["runs"]]
            )
        )

        labels = test.data['standard activity code'].unique()
        for metric in metrics_class:
            for index, activity in labels_activity.items():
                df[f'{metric} - mean - {activity}'].append(
                    np.average(
                        [res['result'][0]['classification report'][str(index)][metric] for res in results["runs"]]
                    )
                ) if index in labels else  df[f'{metric} - mean - {activity}'].append(np.nan)

                df[f'{metric} - std - {activity}'].append(
                    np.std(
                        [res['result'][0]['classification report'][str(index)][metric] for res in results["runs"]]
                    )
                ) if index in labels else  df[f'{metric} - std - {activity}'].append(np.nan)
    return df, results_dict

In [49]:
start = time.time()
reporter = ClassificationReport(
    use_accuracy=True,
    use_f1_score=True,
    use_classification_report=True,
    use_confusion_matrix=True,
    plot_confusion_matrix=False,
#     normalize='true',
#     display_labels=labels,
)

evaluators = {
    'RandomForest':
    {
        'experiment':
        SimpleTrainEvalWorkflow(
            estimator=RandomForestClassifier, 
            estimator_creation_kwags ={'n_estimators':100}, 
            do_not_instantiate=False, 
            do_fit=True, 
            evaluator=reporter),
        'num_runs':
        10

    },
    'SVC':
    {
        'experiment':
        SimpleTrainEvalWorkflow(
            estimator=SVC, 
            estimator_creation_kwags ={'C':3.0, 'kernel':"rbf"} , 
            do_not_instantiate=False, 
            do_fit=True, 
            evaluator=reporter),
        'num_runs':
        1
    },
    'KNN':
    {
        'experiment':
        SimpleTrainEvalWorkflow(
            estimator=KNeighborsClassifier, 
            estimator_creation_kwags={'n_neighbors' :1}, 
            do_not_instantiate=False, 
            do_fit=True, 
            evaluator=reporter),
        'num_runs':
        1
    }
}

train_data.data['standard activity code'] = train_data.data['standard activity code'].astype('int')
test_data.data['standard activity code'] = test_data.data['standard activity code'].astype('int')
k = 1
for umap_name, train_name, test_name in zip(combinations_sets['Umap'], combinations_sets['Train'], combinations_sets['Test']):
    umap_name, train_name = list(umap_name), list(train_name)
    
    if umap_name != ['-']:
        umap = train_data.data[train_data.data['DataSet'].isin(umap_name)]
        umap = create_data_multimodal(umap)
    else:
        umap = None
                           
    train = train_data.data[train_data.data['DataSet'].isin(train_name)]
    train = create_data_multimodal(train)
                           
    test = test_data.data[test_data.data['DataSet'].isin([test_name])]
    test = create_data_multimodal(test)
    
    new_start = time.time()
    df_results, results_dict = evaluate(umap, train, test, evaluators, df_results, results_dict, 
                                        umap_name, train_name, test_name, labels_activity, 
                                        metrics_class, reporter)
    new_end = time.time()
    print(f'Combination: {k} \t Time of execution: {int(new_end - new_start) // 360} hours, {int(new_end - new_start) // 60} minutes and {int(new_end - new_start) % 60} seconds')
    k+=1

end = time.time()
total = int(end - start)
print(f'Time of execution: {total} seconds')
print(f'Time of execution: {total // 60} minutes and {total % 60} seconds')
print(f'Time of execution: {total // 360} hours, {total // 60} minutes and {total % 60} seconds')

Combination: 1 	 Time of execution: 0 hours, 0 minutes and 15 seconds
Combination: 2 	 Time of execution: 0 hours, 0 minutes and 20 seconds
Combination: 3 	 Time of execution: 0 hours, 0 minutes and 30 seconds
Combination: 4 	 Time of execution: 0 hours, 0 minutes and 18 seconds


KeyboardInterrupt: 

In [None]:
df_results = pd.DataFrame(df_results)
df_results
# df_results.dropna(axis=1)

In [None]:
# Save results
import pickle

with open('df_results.pkl', 'wb') as file:
    pickle.dump(df_results, file)
    
with open('results_dict.pkl', 'wb') as file:
    pickle.dump(results_dict, file)

In [None]:
with open('df_results.pkl', 'rb') as f:
    model = pickle.load(f)
df_results

In [None]:
with open('results_dict.pkl', 'rb') as f:
    model = pickle.load(f)
results_dict