# Plot datasets

This notebook will visualize the data from diferent datasets (KuHar, MotionSense, and UCI-HAR). The data is without gravity and it was removed with a high-pass filter.

1. Apply DFT over dataset windows
3. Plot UMAP and T-SNE

In [1]:
from pathlib import Path  # For defining dataset Paths
import sys
sys.path.append("../../..")

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from umap import UMAP
#from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE

import plotly.express as px
import plotly.graph_objects as go

# Librep imports
from librep.utils.dataset import PandasDatasetsIO          # For quick load train, test and validation CSVs
from librep.datasets.har.loaders import (
    KuHar_BalancedView20HzMotionSenseEquivalent,
    MotionSense_BalancedView20HZ,
    ExtraSensorySense_UnbalancedView20HZ,
    CHARM_BalancedView20Hz,
    WISDM_UnbalancedView20Hz,
    UCIHAR_UnbalancedView20Hz
)
from librep.datasets.multimodal import PandasMultiModalDataset, TransformMultiModalDataset, WindowedTransform
from librep.transforms.fft import FFT
from librep.transforms. stats import StatsTransform
from librep.utils.workflow import SimpleTrainEvalWorkflow, MultiRunWorkflow
from librep.estimators import RandomForestClassifier, SVC, KNeighborsClassifier
from librep.metrics.report import ClassificationReport
from librep.transforms.resampler import SimpleResampler

2022-11-18 01:37:29.967731: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-11-18 01:37:29.967750: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


## Datasets to train the manifold

In [3]:
labels_activity = {
    0: "sit",
    1: "stand",
    2: "walk",
    3: "stair up",
    4: "stair down",
    5: "run",
    6: "stair up and down",
}

classes = list(labels_activity.keys())
print(labels_activity)

labels_dataset = {
    'K': 'KuHar', 
    'M': 'MotionSense',
    'E': 'ExtraSensory',
    'W': 'WISDM',
    'U': 'UCI',
}

{0: 'sit', 1: 'stand', 2: 'walk', 3: 'stair up', 4: 'stair down', 5: 'run', 6: 'stair up and down'}


### Load KuHar

In [4]:
# Load KuHar, creating PandasMultiModalDatasets with the correct pre-defined windows
loader = KuHar_BalancedView20HzMotionSenseEquivalent("../../../data/views/KuHar/balanced_20Hz_motionsense_equivalent-v1", 
                              download=False)
train_val_kuhar = loader.load(concat_all=True, label="standard activity code")
train_val_kuhar

PandasMultiModalDataset: samples=3360, features=360, no. window=6, label_columns='standard activity code'

In [5]:
train_val_kuhar.data

Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,accel-start-time,gyro-start-time,accel-end-time,gyro-end-time,activity code,length,serial,index,user,standard activity code
0,-1.042828,7.069610,3.455672,0.858443,-13.806092,-5.950121,-0.608514,2.916913,4.881929,8.610287,...,9.159,9.160,12.199,12.201,0,300,23,900,1101,4
1,-2.437900,-2.628449,-2.166861,-3.854701,0.527599,11.567884,6.840191,-2.435600,-3.324599,-0.796095,...,9.167,9.166,12.211,12.208,0,300,43,900,1101,4
2,-0.664337,0.483465,3.090238,3.559119,3.888318,1.113559,3.656104,4.866379,0.073993,-14.136122,...,15.262,15.263,18.298,18.299,0,300,24,1500,1101,4
3,-2.269775,-3.209509,-2.508545,-2.923223,-1.269270,7.864898,7.683389,2.325797,-3.086381,-3.675483,...,33.572,33.583,36.615,36.626,0,300,41,3300,1101,4
4,-0.487718,0.226401,0.609332,1.490074,-0.295221,-1.111420,-0.627382,0.028419,-0.823724,-0.469242,...,0.008,0.007,3.047,3.048,0,300,30,0,1101,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3355,-4.064457,0.710153,-0.210519,-0.085273,-0.689988,0.591841,0.208143,-0.555237,-2.460645,2.797614,...,0.002,0.003,2.992,2.992,5,300,4,0,1036,5
3356,11.277711,7.928652,18.739669,1.170475,-16.387407,-6.588531,-9.052969,7.167498,28.822861,-2.055094,...,3.513,3.508,6.503,6.500,5,300,3,300,1023,5
3357,2.147823,-2.314857,-0.819528,-0.204672,-1.121992,-5.432771,4.140002,-1.748817,2.392677,0.545084,...,1.007,1.002,3.997,3.992,5,300,4,0,1023,5
3358,-8.015342,2.348301,0.955636,1.112485,-3.618251,-3.723044,-0.024734,2.381249,3.211661,7.933797,...,2.006,2.007,4.997,4.998,5,300,5,0,1036,5


In [6]:
kuhar_X = train_val_kuhar.data.iloc[:,:-10]
kuhar_Y = train_val_kuhar.data['standard activity code']
tam = len(kuhar_Y)
kuhar_id_dataset = np.array(['K']*tam)
kuhar_user = train_val_kuhar.data['user']
kuhar_label = np.array([labels_activity[i] + ' - ' + labels_dataset[j] for i, j in zip(kuhar_Y, kuhar_id_dataset)])

### Load MotionSense

In [7]:
loader = MotionSense_BalancedView20HZ("../../../data/views/MotionSense/balanced_20Hz_filtered", 
                                   download=False) 
train_val_motion = loader.load(concat_all=True, label="standard activity code")
train_val_motion

PandasMultiModalDataset: samples=5028, features=360, no. window=6, label_columns='standard activity code'

In [8]:
train_val_motion.data

Unnamed: 0,attitude.roll-0,attitude.roll-1,attitude.roll-2,attitude.roll-3,attitude.roll-4,attitude.roll-5,attitude.roll-6,attitude.roll-7,attitude.roll-8,attitude.roll-9,...,accel-z-56,accel-z-57,accel-z-58,accel-z-59,activity code,length,trial_code,index,user,standard activity code
0,0.361761,0.179350,0.202526,0.211447,0.222368,0.254928,0.286169,0.334803,0.431301,0.519485,...,0.666390,4.116260,-3.067699,-0.439384,0,60,1,360,2,4
1,0.998364,1.005298,0.851995,0.614079,0.494198,0.522378,0.696275,0.741020,0.764224,0.851471,...,2.944915,2.950987,1.207228,0.922734,0,60,1,540,16,4
2,-2.559295,-2.513393,-2.279217,-2.287133,-2.011615,-1.848937,-2.274790,-2.727606,-2.928327,-2.838619,...,0.809506,1.110136,0.587751,5.467525,0,60,1,120,24,4
3,-1.906789,-1.803415,-1.985550,-2.072027,-2.080793,-1.814693,-1.636071,-2.028029,-2.247766,-2.174807,...,-1.858955,2.016718,0.997634,-1.641303,0,60,2,420,18,4
4,-0.432330,-0.353399,-0.569593,-0.764132,-0.751537,-0.562869,-0.305289,-0.523283,-0.423675,-0.264825,...,-2.827182,3.128759,0.924363,-0.535934,0,60,2,300,1,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5023,0.945985,0.835755,0.689149,-2.325232,-2.145454,-1.022318,0.673976,0.381796,0.954072,0.627430,...,5.226585,5.615873,0.981034,-3.673905,5,60,9,360,22,5
5024,0.169756,-0.204503,0.459178,-0.568309,2.126020,2.105488,-0.950820,0.811170,-3.444055,0.164661,...,-14.707570,4.514682,-3.539342,2.991253,5,60,9,180,15,5
5025,-2.761080,-2.847438,-1.904134,0.242494,-0.020656,0.088319,-0.133732,1.987494,1.128165,0.442080,...,-6.899139,-5.999598,-1.444385,-4.138246,5,60,9,1200,6,5
5026,-0.234010,0.290870,-0.067315,0.639471,3.334376,-1.647199,2.985571,2.219747,-2.771499,-0.195250,...,-8.503529,1.643911,-9.312668,1.771203,5,60,16,240,15,5


In [9]:
train_val_motion.data.iloc[:,360:540]

Unnamed: 0,gyro-x-0,gyro-x-1,gyro-x-2,gyro-x-3,gyro-x-4,gyro-x-5,gyro-x-6,gyro-x-7,gyro-x-8,gyro-x-9,...,gyro-z-50,gyro-z-51,gyro-z-52,gyro-z-53,gyro-z-54,gyro-z-55,gyro-z-56,gyro-z-57,gyro-z-58,gyro-z-59
0,2.416168,1.425987,0.432724,-0.442331,-1.008685,-0.968557,-1.727375,-0.785960,0.301713,-0.265985,...,-0.000940,0.192489,-0.019042,-0.486224,-0.136334,0.208895,0.523736,-0.763385,-0.543151,0.782424
1,-0.392869,0.680290,2.331535,2.825752,3.159755,0.603375,-1.526515,-1.490733,-2.338737,-2.199652,...,0.040037,-0.545322,-1.036972,-1.589284,-0.734425,-0.297475,-0.168993,1.026077,-0.110496,-0.984670
2,3.877532,1.026625,0.482123,0.281094,0.799710,-0.169250,-0.605498,-1.113426,-2.861108,-2.588045,...,0.680082,1.321175,1.452527,-0.171931,-0.036254,-0.526024,0.032629,-0.227881,-0.063752,0.299167
3,0.024890,0.100237,-0.765744,-0.135315,0.360571,1.073031,-0.325650,-0.951654,-0.412893,0.330772,...,0.462495,0.408306,0.067837,-0.175092,-0.132694,-0.127267,-0.038517,-0.349052,0.243316,0.035424
4,0.676135,-1.723360,-0.990657,-0.941559,0.313503,0.238453,-0.321289,-0.665136,0.447474,0.180847,...,-0.023703,0.183257,0.743221,0.550107,-0.560860,-0.092277,0.532015,0.227761,0.579212,0.218732
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5023,-1.023721,-2.244999,-2.284888,-0.665339,0.025202,2.835075,3.173697,3.050147,1.327054,-0.045744,...,3.305324,2.149209,1.545474,-1.451223,-1.679996,0.619296,-0.576180,-3.000535,-2.343239,-2.079869
5024,0.462645,1.835205,-1.996374,-4.109942,-6.315888,-2.112793,-0.866276,2.483855,3.432113,5.302295,...,-0.513408,0.059170,-0.869835,1.418709,-0.333015,2.028773,1.078272,0.475017,-0.198779,-1.514206
5025,2.817022,3.871177,4.221806,2.749225,1.046050,-2.013996,-2.614715,-0.940133,2.111500,-1.088026,...,-0.248408,1.259049,-0.569404,-0.527184,-1.586541,-1.975594,-0.571789,-0.385179,0.536645,0.489831
5026,0.021670,-0.808959,-4.067501,-5.917265,-5.009484,-1.712742,2.579381,3.326548,5.654234,7.323675,...,-0.667787,0.185832,-0.737232,2.638622,1.365777,-0.041777,1.050452,-0.329185,1.534976,0.465610


In [10]:
motion_X = pd.concat([train_val_motion.data.iloc[:,540:-6], train_val_motion.data.iloc[:,360:540]], axis=1)
motion_Y = train_val_motion.data['standard activity code']
tam = len(motion_Y)
motion_id_dataset = np.array(['M']*tam)
motion_user = train_val_motion.data['user']
motion_label = np.array([labels_activity[i] + ' - ' + labels_dataset[j] for i, j in zip(motion_Y, motion_id_dataset)])

In [11]:
motion_X

Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gyro-z-50,gyro-z-51,gyro-z-52,gyro-z-53,gyro-z-54,gyro-z-55,gyro-z-56,gyro-z-57,gyro-z-58,gyro-z-59
0,1.013198,1.213734,2.009169,-2.182658,-2.550637,-2.970507,-0.217632,0.999594,-1.246883,-1.457703,...,-0.000940,0.192489,-0.019042,-0.486224,-0.136334,0.208895,0.523736,-0.763385,-0.543151,0.782424
1,3.468220,1.409920,-3.214915,-2.867658,-2.020174,1.271113,2.866810,3.731912,1.499058,-2.534589,...,0.040037,-0.545322,-1.036972,-1.589284,-0.734425,-0.297475,-0.168993,1.026077,-0.110496,-0.984670
2,1.421498,0.017303,5.173924,-2.088614,-5.692213,-1.351758,3.589653,-0.080849,-0.191445,-0.111856,...,0.680082,1.321175,1.452527,-0.171931,-0.036254,-0.526024,0.032629,-0.227881,-0.063752,0.299167
3,-0.497819,1.410787,-1.164364,0.479037,3.013279,-1.456027,1.731780,0.791390,1.319055,-0.111974,...,0.462495,0.408306,0.067837,-0.175092,-0.132694,-0.127267,-0.038517,-0.349052,0.243316,0.035424
4,1.050593,-1.057788,-3.268201,-3.543602,-1.746914,3.277555,1.059173,2.462798,4.433639,-0.412389,...,-0.023703,0.183257,0.743221,0.550107,-0.560860,-0.092277,0.532015,0.227761,0.579212,0.218732
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5023,2.004535,2.609273,-0.777566,5.790095,4.359054,-4.121035,0.958092,-0.469032,4.947858,2.126789,...,3.305324,2.149209,1.545474,-1.451223,-1.679996,0.619296,-0.576180,-3.000535,-2.343239,-2.079869
5024,0.791806,1.613315,6.142404,-1.948661,0.721081,-7.445180,7.764421,-3.382841,-0.655162,-9.244234,...,-0.513408,0.059170,-0.869835,1.418709,-0.333015,2.028773,1.078272,0.475017,-0.198779,-1.514206
5025,0.025046,0.818777,-9.129934,1.533859,3.172891,-0.118608,1.699464,2.342641,-5.414453,-4.136918,...,-0.248408,1.259049,-0.569404,-0.527184,-1.586541,-1.975594,-0.571789,-0.385179,0.536645,0.489831
5026,1.136342,0.959323,-0.002209,0.267273,-4.759702,2.358702,0.207890,-2.672748,-3.667548,-2.131495,...,-0.667787,0.185832,-0.737232,2.638622,1.365777,-0.041777,1.050452,-0.329185,1.534976,0.465610


### Load UCI-HAR

In [12]:
loader = UCIHAR_UnbalancedView20Hz("../../../data/views/UCI-HAR/balanced_20Hz_filtered", 
                                   download=False) 
train_val_uci = loader.load(concat_all=True, label="standard activity code")
train_val_uci

PandasMultiModalDataset: samples=2840, features=360, no. window=6, label_columns='standard activity code'

In [13]:
train_val_uci.data

Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gyro-z-53,gyro-z-54,gyro-z-55,gyro-z-56,gyro-z-57,gyro-z-58,gyro-z-59,activity code,user,standard activity code
0,0.156566,-1.424124,-2.905373,-1.449460,-1.068465,-1.123434,-1.195326,-0.833242,1.702248,2.030561,...,0.313386,0.000744,-0.047502,-0.569485,-0.652966,0.050269,0.477185,1,23,2
1,-0.537229,-0.186480,1.051567,0.605424,-0.185871,3.032825,4.907514,4.161293,1.228430,-0.243705,...,0.841789,0.214277,-0.155207,-0.054555,0.275682,0.461437,-0.171866,1,8,2
2,-2.151572,-2.340571,0.602388,2.951933,1.112684,3.037523,-0.053386,-1.381455,-0.637672,-2.754747,...,-0.278544,-0.050879,0.363474,0.356592,0.452005,0.304909,-0.237935,1,16,2
3,-0.121271,-1.496275,-1.492793,-1.547828,-0.905671,-0.681165,-0.371455,0.291828,1.207175,1.622875,...,0.290052,0.337902,0.202508,-0.316462,-0.537158,-0.476181,-0.320307,1,16,2
4,1.557988,1.860748,1.597169,2.202641,3.586844,1.869756,-1.940422,-3.407308,-3.873758,-2.844197,...,0.205870,-0.161104,-0.067245,-0.006595,-0.044617,-0.235705,0.503788,1,29,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2835,-0.000958,-0.019664,0.026211,-0.001207,-0.037750,-0.038691,-0.020334,0.027428,-0.032307,-0.025282,...,0.003225,0.007405,0.004294,0.003416,0.007763,0.007707,0.009001,5,3,1
2836,0.045167,-0.037133,-0.067926,0.139481,-0.016318,0.010556,0.019199,-0.015156,-0.062317,-0.172797,...,0.003043,-0.021028,-0.013950,-0.064271,-0.085593,-0.067198,-0.029561,5,21,1
2837,-0.032533,-0.004799,-0.024588,-0.051822,0.042807,0.019523,-0.034658,-0.007025,0.006493,-0.001434,...,0.002107,-0.001484,-0.000134,0.003454,-0.001465,-0.004153,-0.002858,5,21,1
2838,0.037385,-0.018655,0.003293,0.028780,0.009934,0.006147,0.007297,-0.013579,0.013963,-0.001217,...,-0.001573,-0.001164,0.006791,0.007795,0.006621,-0.025012,-0.060256,5,26,1


In [14]:
uci_X = train_val_uci.data.iloc[:,:-3]
uci_Y = train_val_uci.data['standard activity code']

tam = len(uci_Y)
uci_id_dataset = np.array(['U']*tam)
uci_user = train_val_uci.data['user']
uci_label = np.array([labels_activity[i] + ' - ' + labels_dataset[j] for i, j in zip(uci_Y, uci_id_dataset)])

### Load WISDM

In [15]:
loader = WISDM_UnbalancedView20Hz("../../../data/views/WISDM/balanced_20Hz_filtered", 
                                   download=False) 
train_val_wisdm = loader.load(concat_all=True, label="standard activity code")
train_val_wisdm

PandasMultiModalDataset: samples=13714, features=360, no. window=6, label_columns='standard activity code'

In [16]:
train_val_wisdm.data['standard activity code'].unique()

array([2, 5, 6, 0, 1], dtype=object)

In [17]:
train_val_wisdm.data

Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gyro-z-53,gyro-z-54,gyro-z-55,gyro-z-56,gyro-z-57,gyro-z-58,gyro-z-59,activity code,user,standard activity code
0,-0.074538,-0.580834,2.338327,0.883443,-1.666145,-3.929086,-0.039061,2.076679,0.325360,-1.234530,...,0.021654,-0.008257,-0.392786,-0.329143,-0.074698,-0.027220,0.314905,0,1600,2
1,1.221374,1.089056,1.585969,2.236866,-0.839452,-3.612999,-0.900370,1.857142,1.516450,-0.299742,...,0.345166,0.294343,-0.417068,-0.172208,0.067521,0.406141,0.615971,0,1600,2
2,1.496420,0.450270,0.064726,1.625363,-0.522916,-3.635099,2.741516,2.868284,0.081997,-1.445224,...,0.341863,0.039910,-0.253467,-0.607339,-0.102324,0.100644,0.608968,0,1600,2
3,2.913555,-0.473575,1.045040,1.709677,-1.312416,-4.159398,-0.479272,2.969279,1.993486,-1.367073,...,0.408067,0.268658,-0.341150,-0.359142,-0.065309,-0.015974,0.365056,0,1600,2
4,1.110426,0.817269,2.589152,3.346747,-2.516123,-5.412037,-1.846764,0.277373,2.061178,1.953869,...,-0.459501,0.421399,-0.182202,-0.406128,-0.590792,-0.174049,0.307581,0,1600,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13709,0.068292,0.098343,-0.076416,-0.023492,0.086462,0.079259,0.028139,-0.022419,-0.075487,-0.037037,...,-0.062382,-0.032012,-0.139899,-0.004694,0.085539,-0.072366,0.102914,4,1645,1
13710,-0.307594,0.071253,0.417730,1.086465,0.310087,-0.287224,-0.160821,0.061372,0.334338,1.086662,...,-0.014175,-0.029097,-0.037724,-0.003698,0.017190,-0.016007,-0.031203,4,1645,1
13711,-0.021779,-0.092840,0.099584,0.072275,-0.015085,-0.114407,0.034932,0.093248,0.053823,0.025874,...,-0.000124,0.005841,0.003804,0.000856,-0.006622,-0.010246,0.000292,4,1645,1
13712,-0.187424,-0.135107,-0.025969,0.048096,0.004855,-0.018962,-0.038663,-0.024901,0.024983,0.065581,...,-0.003824,0.006343,-0.000464,0.009976,-0.060380,0.009847,0.067267,4,1645,1


In [18]:
wisdm_X = train_val_wisdm.data.iloc[:,:-3]
wisdm_Y = train_val_wisdm.data['standard activity code']

tam = len(wisdm_Y)
wisdm_id_dataset = np.array(['W']*tam)
wisdam_user = train_val_wisdm.data['user']
wisdm_label = np.array([labels_activity[i] + ' - ' + labels_dataset[j] for i, j in zip(wisdm_Y, wisdm_id_dataset)])

### Load ExtraSensory

In [19]:
loader = ExtraSensorySense_UnbalancedView20HZ("../../../data/views/ExtraSensory/unbalanced_20Hz_train-gnoravity-v1", 
                                   download=False) 
train_val_extrasensory = loader.load(concat_all=True, label="standard activity code")
train_val_extrasensory

PandasMultiModalDataset: samples=25018, features=360, no. window=6, label_columns='standard activity code'

In [20]:
train_val_extrasensory.data

Unnamed: 0.1,Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,...,gyro-z-58,gyro-z-59,timestamp source,activity code,timestamp source index,accelerometer start timestamp,gyroscope start timestamp,gravity start timestamp,user,standard activity code
0,0,0.180988,0.166653,0.220385,0.180553,0.172057,0.193328,0.183921,0.217724,0.165645,...,-0.001602,0.004429,1440548096,0,0,113715.940,113716.086,113716.086,CA820D43-E5E2-42EF-9798-BE56F776370B,1
1,1,0.213085,0.156338,0.232402,0.167833,0.199010,0.156184,0.279663,0.116217,0.145872,...,-0.029518,0.000592,1440548096,0,120,113719.484,113719.086,113719.086,CA820D43-E5E2-42EF-9798-BE56F776370B,1
2,2,0.168877,0.179419,0.237898,0.153882,0.156423,0.187238,0.226915,0.190233,0.139495,...,0.001779,-0.021204,1440548096,0,240,113723.030,113722.086,113722.086,CA820D43-E5E2-42EF-9798-BE56F776370B,1
3,3,0.217478,0.166874,0.262751,0.156685,0.156064,0.183014,0.266148,0.155898,0.146684,...,-0.001287,0.033465,1440548096,0,360,113726.580,113725.086,113725.086,CA820D43-E5E2-42EF-9798-BE56F776370B,1
4,4,0.167281,0.143630,0.183344,0.210805,0.208415,0.129704,0.211796,0.219654,0.162489,...,-0.002336,0.002777,1440548096,0,480,113730.120,113728.086,113728.086,CA820D43-E5E2-42EF-9798-BE56F776370B,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25013,25013,0.120023,0.102523,0.182343,0.192553,0.130164,0.132290,0.170037,0.157019,0.177253,...,-0.078291,-0.075607,1446657536,0,120,204774.520,204774.530,204774.530,81536B0A-8DBF-4D8A-AC24-9543E2E4C8E0,1
25014,25014,0.173010,0.145972,0.167845,0.168520,0.136331,0.150179,0.158759,0.166362,0.139440,...,-0.076328,-0.077452,1446657536,0,240,204776.920,204776.940,204776.940,81536B0A-8DBF-4D8A-AC24-9543E2E4C8E0,1
25015,25015,0.157320,0.160179,0.144302,0.150981,0.160996,0.156867,0.156504,0.149133,0.151943,...,-0.075350,-0.076196,1446657536,0,360,204779.310,204777.810,204777.810,81536B0A-8DBF-4D8A-AC24-9543E2E4C8E0,1
25016,25016,0.163902,0.146540,0.147255,0.150095,0.156038,0.157817,0.141753,0.147672,0.149712,...,-0.075781,-0.078337,1446657536,0,480,204781.720,204778.610,204778.610,81536B0A-8DBF-4D8A-AC24-9543E2E4C8E0,1


In [21]:
extrasensory_X = train_val_extrasensory.data.iloc[:,1:-8]
extrasensory_Y = train_val_extrasensory.data['standard activity code']

tam = len(extrasensory_Y)
extrasensory_id_dataset = np.array(['E']*tam)
extrasensory_user = train_val_extrasensory.data['user']
extrasensory_label = np.array([labels_activity[i] + ' - ' + labels_dataset[j] for i, j in zip(extrasensory_Y, extrasensory_id_dataset)])

### Concatenate datasets

In [22]:
data_X = pd.concat([kuhar_X, motion_X, uci_X, wisdm_X, extrasensory_X])
data_y = pd.concat([kuhar_Y, motion_Y, uci_Y, wisdm_Y, extrasensory_Y])
data_id_dataset = np.concatenate([kuhar_id_dataset, motion_id_dataset, uci_id_dataset, wisdm_id_dataset, 
                                  extrasensory_id_dataset])
data_user = pd.concat([kuhar_user, motion_user, uci_user, wisdam_user, extrasensory_user])
data_label = np.concatenate([kuhar_label, motion_label, uci_label, wisdm_label, extrasensory_label])

In [23]:
kuhar_X.shape, motion_X.shape, uci_X.shape, wisdm_X.shape, extrasensory_X.shape

((3360, 360), (5028, 360), (2840, 360), (13714, 360), (25018, 360))

In [24]:
data = data_X
data['standard activity code'] = np.array(data_y)
data['DataSet'] = np.array(data_id_dataset)
data['label'] = np.array(data_label)
data['user'] = np.array(data_user)
data

Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gyro-z-54,gyro-z-55,gyro-z-56,gyro-z-57,gyro-z-58,gyro-z-59,standard activity code,DataSet,label,user
0,-1.042828,7.069610,3.455672,0.858443,-13.806092,-5.950121,-0.608514,2.916913,4.881929,8.610287,...,0.299246,0.215924,0.322555,0.508903,-0.113054,-0.380190,4,K,stair down - KuHar,1101
1,-2.437900,-2.628449,-2.166861,-3.854701,0.527599,11.567884,6.840191,-2.435600,-3.324599,-0.796095,...,-0.170998,0.067450,-0.153206,-0.094137,-0.238300,0.377260,4,K,stair down - KuHar,1101
2,-0.664337,0.483465,3.090238,3.559119,3.888318,1.113559,3.656104,4.866379,0.073993,-14.136122,...,0.005712,-0.275884,0.048012,0.288451,0.118925,0.288595,4,K,stair down - KuHar,1101
3,-2.269775,-3.209509,-2.508545,-2.923223,-1.269270,7.864898,7.683389,2.325797,-3.086381,-3.675483,...,0.143971,0.129493,0.189543,0.057577,-0.121568,-0.037776,4,K,stair down - KuHar,1101
4,-0.487718,0.226401,0.609332,1.490074,-0.295221,-1.111420,-0.627382,0.028419,-0.823724,-0.469242,...,-0.153478,-0.047371,0.020793,0.186724,0.132858,0.318369,4,K,stair down - KuHar,1101
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25013,0.120023,0.102523,0.182343,0.192553,0.130164,0.132290,0.170037,0.157019,0.177253,0.155428,...,-0.077157,-0.077373,-0.077243,-0.079048,-0.078291,-0.075607,1,E,stand - ExtraSensory,81536B0A-8DBF-4D8A-AC24-9543E2E4C8E0
25014,0.173010,0.145972,0.167845,0.168520,0.136331,0.150179,0.158759,0.166362,0.139440,0.144607,...,-0.077221,-0.076154,-0.075042,-0.076802,-0.076328,-0.077452,1,E,stand - ExtraSensory,81536B0A-8DBF-4D8A-AC24-9543E2E4C8E0
25015,0.157320,0.160179,0.144302,0.150981,0.160996,0.156867,0.156504,0.149133,0.151943,0.160496,...,-0.077446,-0.076203,-0.076593,-0.075095,-0.075350,-0.076196,1,E,stand - ExtraSensory,81536B0A-8DBF-4D8A-AC24-9543E2E4C8E0
25016,0.163902,0.146540,0.147255,0.150095,0.156038,0.157817,0.141753,0.147672,0.149712,0.149800,...,-0.076951,-0.076240,-0.074629,-0.075549,-0.075781,-0.078337,1,E,stand - ExtraSensory,81536B0A-8DBF-4D8A-AC24-9543E2E4C8E0


In [25]:
# Features to select
features = [
    "accel-x",
    "accel-y",
    "accel-z",
    "gyro-x",
    "gyro-y",
    "gyro-z"
]

# Creating the datasets

# DataSet
data_activity = PandasMultiModalDataset(
    data,
    feature_prefixes=features,
    label_columns='standard activity code',
    as_array=True
)

In [26]:
# Create the objects

fft_transform = FFT(centered=True)

# Compose the transform
# First apply the normalizer over whole dataset and then apply FFT over each window
transformer = TransformMultiModalDataset(
    transforms=[fft_transform], new_window_name_prefix="fft."
)

In [27]:
# Transform it and generate a new dataset!
data_activity_fft = transformer(data_activity)
data_activity_fft

ArrayMultiModalDataset: samples=49960, shape=49960, no. window=6

In [28]:
# Checking the whole data...
data_activity_fft[:][0]

array([[9.17338790e-01, 2.94403289e-01, 1.23462554e+00, ...,
        1.12933705e-01, 2.64129222e-01, 1.39964048e-01],
       [6.52337076e-01, 1.97729668e+00, 2.15721029e+00, ...,
        6.15419090e-02, 1.04666318e-01, 3.16286858e-01],
       [3.45508609e+00, 4.17256855e+00, 4.84083478e+00, ...,
        2.34285066e-01, 1.36510397e-01, 2.39903888e-01],
       ...,
       [1.65514873e+00, 6.37964691e-03, 1.04148032e-02, ...,
        1.94106388e-04, 3.63986445e-04, 1.29327550e-03],
       [1.65367211e+00, 7.49444425e-03, 8.37975156e-03, ...,
        1.15764963e-03, 1.10827792e-04, 3.11394983e-04],
       [1.66994198e+00, 9.41050302e-03, 1.70479396e-02, ...,
        1.44141392e-04, 7.73899905e-04, 5.10091201e-04]])

In [29]:
def create_dataset(data, label_columns="standard activity code"):
    features = [
        "accel-x",
        "accel-y",
        "accel-z",
        "gyro-x",
        "gyro-y",
        "gyro-z"
    ]

    # Creating the dataset
    data = PandasMultiModalDataset(
        data,
        feature_prefixes=features,
        label_columns=label_columns,
        as_array=True
    )
    return data

In [30]:
labels = list(data['label'].unique())
labels = {i:i for i in labels}
labels['all'] = 'all'
labels

{'stair down - KuHar': 'stair down - KuHar',
 'stair up - KuHar': 'stair up - KuHar',
 'sit - KuHar': 'sit - KuHar',
 'stand - KuHar': 'stand - KuHar',
 'walk - KuHar': 'walk - KuHar',
 'run - KuHar': 'run - KuHar',
 'stair down - MotionSense': 'stair down - MotionSense',
 'stair up - MotionSense': 'stair up - MotionSense',
 'sit - MotionSense': 'sit - MotionSense',
 'stand - MotionSense': 'stand - MotionSense',
 'walk - MotionSense': 'walk - MotionSense',
 'run - MotionSense': 'run - MotionSense',
 'walk - UCI': 'walk - UCI',
 'stair up - UCI': 'stair up - UCI',
 'stair down - UCI': 'stair down - UCI',
 'sit - UCI': 'sit - UCI',
 'stand - UCI': 'stand - UCI',
 'walk - WISDM': 'walk - WISDM',
 'run - WISDM': 'run - WISDM',
 'stair up and down - WISDM': 'stair up and down - WISDM',
 'sit - WISDM': 'sit - WISDM',
 'stand - WISDM': 'stand - WISDM',
 'stand - ExtraSensory': 'stand - ExtraSensory',
 'walk - ExtraSensory': 'walk - ExtraSensory',
 'run - ExtraSensory': 'run - ExtraSensory',
 

## Plot UMAP and T-SNE

In [31]:
def plot(df, max_x, max_y, min_x, min_y, file_name, 
         hovertext: str = "standard activity code - str", title: str = None, labels: dict = None, legend_title: str = 'Dataset'):
    
    fig = go.Figure()
    for key, sub_def in df.groupby('label'):
        fig.add_trace(
            go.Scatter (
                x=sub_def['x'],
                y=sub_def['y'],
                name=labels[key],       # this sets its legend entry
                mode='markers',
                marker={'size': 4, 'color':'white'} if key == 'all' else {'size': 4},
                hovertext=sub_def[hovertext]
                # "standard activity code"
                # color='gray',
            )
        )
    fig.update_xaxes(range = [min_x,max_x])
    fig.update_yaxes(range = [min_y,max_y])

    fig.update_layout (
        title=title,
        xaxis_title="X",
        yaxis_title="Y",
        legend_title=legend_title,
        template='simple_white', 
        # You can see another thems on https://plotly.com/python/templates/
        # another options: "plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none" 
        width=900,
        height=900
    )

    # fig.show()
    fig.write_image(f"pdfFile/{file_name}.pdf")
    fig.write_image(f"pngFile/{file_name}.png")
    fig.write_image(f"Charts/{file_name}.svg")
    fig.write_html(f"htmlFile/{file_name}.html")
    fig.write_json(f"jsonFile/{file_name}.json")


In [32]:
num_info=4
data_activity.data.iloc[:,:-num_info]

Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gyro-z-50,gyro-z-51,gyro-z-52,gyro-z-53,gyro-z-54,gyro-z-55,gyro-z-56,gyro-z-57,gyro-z-58,gyro-z-59
0,-1.042828,7.069610,3.455672,0.858443,-13.806092,-5.950121,-0.608514,2.916913,4.881929,8.610287,...,-0.472805,0.343294,0.260441,-0.082865,0.299246,0.215924,0.322555,0.508903,-0.113054,-0.380190
1,-2.437900,-2.628449,-2.166861,-3.854701,0.527599,11.567884,6.840191,-2.435600,-3.324599,-0.796095,...,0.267483,-0.713088,0.337778,-0.016247,-0.170998,0.067450,-0.153206,-0.094137,-0.238300,0.377260
2,-0.664337,0.483465,3.090238,3.559119,3.888318,1.113559,3.656104,4.866379,0.073993,-14.136122,...,0.371043,0.478258,-0.547497,-0.010739,0.005712,-0.275884,0.048012,0.288451,0.118925,0.288595
3,-2.269775,-3.209509,-2.508545,-2.923223,-1.269270,7.864898,7.683389,2.325797,-3.086381,-3.675483,...,0.011855,0.110627,0.131676,0.085040,0.143971,0.129493,0.189543,0.057577,-0.121568,-0.037776
4,-0.487718,0.226401,0.609332,1.490074,-0.295221,-1.111420,-0.627382,0.028419,-0.823724,-0.469242,...,-0.224966,0.039142,-0.232840,0.027406,-0.153478,-0.047371,0.020793,0.186724,0.132858,0.318369
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25013,0.120023,0.102523,0.182343,0.192553,0.130164,0.132290,0.170037,0.157019,0.177253,0.155428,...,-0.079363,-0.077876,-0.076816,-0.078549,-0.077157,-0.077373,-0.077243,-0.079048,-0.078291,-0.075607
25014,0.173010,0.145972,0.167845,0.168520,0.136331,0.150179,0.158759,0.166362,0.139440,0.144607,...,-0.076338,-0.074947,-0.076408,-0.079775,-0.077221,-0.076154,-0.075042,-0.076802,-0.076328,-0.077452
25015,0.157320,0.160179,0.144302,0.150981,0.160996,0.156867,0.156504,0.149133,0.151943,0.160496,...,-0.076381,-0.074026,-0.075965,-0.079001,-0.077446,-0.076203,-0.076593,-0.075095,-0.075350,-0.076196
25016,0.163902,0.146540,0.147255,0.150095,0.156038,0.157817,0.141753,0.147672,0.149712,0.149800,...,-0.075934,-0.077367,-0.076920,-0.076633,-0.076951,-0.076240,-0.074629,-0.075549,-0.075781,-0.078337


## UMAP

### Time

In [33]:
model = UMAP(n_components=2, random_state=42)
result = pd.DataFrame(model.fit_transform(np.array(data_activity.data.iloc[:,:-num_info])), columns=["x", "y"])

result["DataSet"] = np.array(data_activity.data['DataSet'])
result["standard activity code"] = np.array(data_activity.data['standard activity code'])
result['labels'] = np.array(data_activity.data['label'])
result['user'] = np.array(data_activity.data['user'])

y_activity = [labels_activity[k] for k in result['standard activity code']]
result['standard activity code - str'] = np.array(y_activity)

max_x, max_y = max(result['x']), max(result['y'])
min_x, min_y = min(result['x']), min(result['y'])



In [34]:
new_data = result.copy()
new_data.rename(columns={'DataSet': 'label'}, inplace=True)

file_name = 'Time_data_Umap_label_Dataset'
plot(new_data, max_x, max_y, min_x, min_y, file_name,
     title="Umap on KuHar, MotionSense, UCI-HAR and WISDM \n Time data", 
     labels = labels_dataset)

In [35]:
new_data = result.copy()
new_data2 = result.copy()
tam = new_data['labels'].shape[0]
y = np.array(['all']*tam)
new_data2['labels'] = y

new_data3 = pd.concat([new_data, new_data2], axis=0)
new_data3.shape
new_data3.rename(columns={'labels': 'label'}, inplace=True)

file_name = f'Time_data_Umap_label_Activity_Dataset'
plot(new_data3, max_x, max_y, min_x, min_y, file_name,
     title="Umap on KuHar, MotionSense, UCI-HAR and WISDM \n Time data", 
     labels = labels)

In [36]:
new_data = result.copy()
new_data.rename(columns={'standard activity code': 'label'}, inplace=True)

file_name = f'Time_data_Umap_label_Activity'
plot(new_data, max_x, max_y, min_x, min_y, file_name, hovertext='DataSet',
     title="Umap on KuHar, MotionSense, UCI-HAR and WISDM \n Time data", 
     labels = labels_activity, legend_title='Activity')

In [37]:
for i in classes:
    new_data = result.copy()
    new_data.rename(columns={'DataSet': 'label'}, inplace=True)
    new_data = new_data[new_data['standard activity code'].isin([i])]
    
    file_name = f'Time_data_Umap_label_{labels_activity[i]}'
    plot(new_data, max_x, max_y, min_x, min_y, file_name,
         title=f"Umap on KuHar, MotionSense, UCI-HAR and WISDM \n Time data \n Class: {labels_activity[i]}", 
         labels = labels_dataset)

In [38]:
for i in classes:
    new_data = result.copy()
    new_data.rename(columns={'DataSet': 'label'}, inplace=True)
    new_data = new_data[new_data['standard activity code'].isin([i])]
    
    file_name = f'Time_data_Umap_label_{labels_activity[i]}_and_user'
    plot(new_data, max_x, max_y, min_x, min_y, file_name, hovertext='user',
         title=f"Umap on KuHar, MotionSense, UCI-HAR and WISDM \n Time data \n Class: {labels_activity[i]}", 
         labels = labels_dataset, legend_title='DataSet')

In [39]:
for i in labels_dataset.keys():
    new_data = result.copy()
    new_data.rename(columns={'standard activity code': 'label'}, inplace=True)
    new_data = new_data[new_data['DataSet'].isin([i])]
    labels_user = {user: user for user in result['user']}
    
    file_name = f'Time_data_Umap_label_user_on_{labels_dataset[i]}'
    plot(new_data, max_x, max_y, min_x, min_y, file_name, hovertext='user',
         title=f"Umap on KuHar, MotionSense, UCI-HAR and WISDM \n Time data \n DataSet: {labels_dataset[i]}", 
         labels = labels_activity, legend_title='Activity')

### Frequency

In [40]:
model = UMAP(n_components=2, random_state=42)
result = pd.DataFrame(model.fit_transform(data_activity_fft[:][0]), columns=["x", "y"])
result["DataSet"] = np.array(data_activity.data['DataSet'])
result["standard activity code"] = np.array(data_activity.data['standard activity code'])
result['labels'] = np.array(data_activity.data['label'])
result['standard activity code - str'] = np.array(y_activity)
result['user'] = np.array(data_activity.data['user'])

max_x, max_y = max(result['x']), max(result['y'])
min_x, min_y = min(result['x']), min(result['y'])


Graph is not fully connected, spectral embedding may not work as expected.



In [41]:
new_data = result.copy()
new_data.rename(columns={'DataSet': 'label'}, inplace=True)

file_name = f'Frequency_data_Umap_label_DataSet'
plot(new_data, max_x, max_y, min_x, min_y, file_name,
     title="Umap on KuHar, MotionSense, UCI-HAR and WISDM \n FFT data", 
     labels = labels_dataset)

In [42]:
new_data = result.copy()
new_data2 = result.copy()
tam = new_data['labels'].shape[0]
y = np.array(['all']*tam)
new_data2['labels'] = y

new_data3 = pd.concat([new_data, new_data2], axis=0)
new_data3.shape
new_data3.rename(columns={'labels': 'label'}, inplace=True)
# new_data3['standard activity code - str'] = np.array(y_activity + y_activity)

file_name = f'Frequency_data_Umap_label_Activity_Dataset'
plot(new_data3, max_x, max_y, min_x, min_y, file_name,
     title="Umap on KuHar, MotionSense, UCI-HAR and WISDM \n FFT data", 
     labels = labels)

In [43]:
new_data = result.copy()
new_data.rename(columns={'standard activity code': 'label'}, inplace=True)

file_name = f'Frequency_data_Umap_label_Activity'
plot(new_data, max_x, max_y, min_x, min_y, file_name,
     title="Umap on KuHar, MotionSense, UCI-HAR and WISDM \n FFT data", 
     labels = labels_activity, legend_title='Activity')

In [44]:
for i in classes:
    new_data = result.copy()
    new_data.rename(columns={'DataSet': 'label'}, inplace=True)
    new_data = new_data[new_data['standard activity code'].isin([i])]
    
    file_name = f'Frequency_data_Umap_label_{labels_activity[i]}'
    plot(new_data, max_x, max_y, min_x, min_y, file_name,
         title=f"Umap on KuHar, MotionSense, UCI-HAR and WISDM \n FFT data \n Class: {labels_activity[i]}", 
         labels = labels_dataset)

In [45]:
for i in classes:
    new_data = result.copy()
    new_data.rename(columns={'DataSet': 'label'}, inplace=True)
    new_data = new_data[new_data['standard activity code'].isin([i])]
    
    file_name = f'Frequency_data_Umap_label_{labels_activity[i]}_and_user'
    plot(new_data, max_x, max_y, min_x, min_y, file_name, hovertext='user',
         title=f"Umap on KuHar, MotionSense, UCI-HAR and WISDM \n FFT data \n Class: {labels_activity[i]}", 
         labels = labels_dataset, legend_title='DataSet')

In [46]:
for i in labels_dataset.keys():
    new_data = result.copy()
    new_data.rename(columns={'standard activity code': 'label'}, inplace=True)
    new_data = new_data[new_data['DataSet'].isin([i])]
    labels_user = {user: user for user in result['user']}
    
    file_name = f'Frequency_data_Umap_label_user_on_{labels_dataset[i]}'
    plot(new_data, max_x, max_y, min_x, min_y, file_name, hovertext='user',
         title=f"Umap on KuHar, MotionSense, UCI-HAR and WISDM \n FFT data \n DataSet: {labels_dataset[i]}", 
         labels = labels_activity, legend_title='Activity')

## T-SNE

### Time

In [47]:
model = TSNE(n_components=2, random_state=42)
result = pd.DataFrame(model.fit_transform(np.array(data_activity.data.iloc[:,:-num_info])), columns=["x", "y"])
result["DataSet"] = np.array(data_activity.data['DataSet'])
result["standard activity code"] = np.array(data_activity.data['standard activity code'])
result['labels'] = np.array(data_activity.data['label'])
result['standard activity code - str'] = np.array(y_activity)
result['user'] = np.array(data_activity.data['user'])

max_x, max_y = max(result['x']), max(result['y'])
min_x, min_y = min(result['x']), min(result['y'])


The default initialization in TSNE will change from 'random' to 'pca' in 1.2.


The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.



In [48]:
new_data = result.copy()
new_data.rename(columns={'DataSet': 'label'}, inplace=True)

file_name = f'Time_data_tSNE_label_DataSet'
plot(new_data, max_x, max_y, min_x, min_y, file_name,
     title="t-SNE on KuHar, MotionSense, UCI-HAR and WISDM \n Time data", 
     labels = labels_dataset)

In [49]:
new_data = result.copy()
new_data2 = result.copy()
tam = new_data['labels'].shape[0]
y = np.array(['all']*tam)
new_data2['labels'] = y

new_data3 = pd.concat([new_data, new_data2], axis=0)
new_data3.shape
new_data3.rename(columns={'labels': 'label'}, inplace=True)
# new_data3['standard activity code - str'] = np.array(y_activity + y_activity)

file_name = f'Time_data_tSNE_label_Activity_Dataset'
plot(new_data3, max_x, max_y, min_x, min_y, file_name, 
     title="t-SNE on KuHar, MotionSense, UCI-HAR and WISDM \n FFT data", 
     labels = labels)

In [50]:
new_data = result.copy()
new_data.rename(columns={'standard activity code': 'label'}, inplace=True)

file_name = f'Time_data_tSNE_label_Activity'
plot(new_data, max_x, max_y, min_x, min_y, file_name,
     title="t-SNE on KuHar, MotionSense, UCI-HAR and WISDM \n Time data", 
     labels = labels_activity, legend_title='Activity')

In [51]:
for i in classes:
    new_data = result.copy()
    new_data.rename(columns={'DataSet': 'label'}, inplace=True)
    new_data = new_data[new_data['standard activity code'].isin([i])]
    
    file_name = f'Time_data_tSNE_label_{labels_activity[i]}'
    plot(new_data, max_x, max_y, min_x, min_y, file_name,
         title=f"t-SNE on KuHar, MotionSense, UCI-HAR and WISDM \n Time data \n Class: {labels_activity[i]}", 
         labels = labels_dataset)

In [52]:
for i in classes:
    new_data = result.copy()
    new_data.rename(columns={'DataSet': 'label'}, inplace=True)
    new_data = new_data[new_data['standard activity code'].isin([i])]
    
    file_name = f'Time_data_tSNE_label_{labels_activity[i]}_and_user'
    plot(new_data, max_x, max_y, min_x, min_y, file_name, hovertext='user',
         title=f"t-SNE on KuHar, MotionSense, UCI-HAR and WISDM \n Time data \n Class: {labels_activity[i]}", 
         labels = labels_dataset, legend_title='DataSet')

In [53]:
for i in labels_dataset.keys():
    new_data = result.copy()
    new_data.rename(columns={'standard activity code': 'label'}, inplace=True)
    new_data = new_data[new_data['DataSet'].isin([i])]
    labels_user = {user: user for user in result['user']}
    
    file_name = f'Time_data_tSNE_label_user_on_{labels_dataset[i]}'
    plot(new_data, max_x, max_y, min_x, min_y, file_name, hovertext='user',
         title=f"t-SNE on KuHar, MotionSense, UCI-HAR and WISDM \n Time data \n DataSet: {labels_dataset[i]}", 
         labels = labels_activity, legend_title='Activity')

### Frequency

In [54]:
model = TSNE(n_components=2, random_state=42)
result = pd.DataFrame(model.fit_transform(data_activity_fft[:][0]), columns=["x", "y"])
result["DataSet"] = np.array(data_activity.data['DataSet'])
result["standard activity code"] = np.array(data_activity.data['standard activity code'])
result['labels'] = np.array(data_activity.data['label'])
result['standard activity code - str'] = np.array(y_activity)
result['user'] = np.array(data_activity.data['user'])

max_x, max_y = max(result['x']), max(result['y'])
min_x, min_y = min(result['x']), min(result['y'])


The default initialization in TSNE will change from 'random' to 'pca' in 1.2.


The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.



In [55]:
new_data = result.copy()
new_data.rename(columns={'DataSet': 'label'}, inplace=True)

file_name = f'Frequency data_tSNE_label_DataSet'
plot(new_data, max_x, max_y, min_x, min_y, file_name,
     title="t-SNE on KuHar, MotionSense, UCI-HAR and WISDM \n FFT data", 
     labels = labels_dataset)

In [56]:
new_data = result.copy()
new_data2 = result.copy()
tam = new_data['labels'].shape[0]
y = np.array(['all']*tam)
new_data2['labels'] = y

new_data3 = pd.concat([new_data, new_data2], axis=0)
new_data3.shape
new_data3.rename(columns={'labels': 'label'}, inplace=True)
# new_data3['standard activity code - str'] = np.array(y_activity + y_activity)

file_name = f'Frequency data_tSNE_label_Activity_Dataset'
plot(new_data3, max_x, max_y, min_x, min_y, file_name,
     title="t-SNE on KuHar, MotionSense, UCI-HAR and WISDM \n FFT data", 
     labels = labels)

In [57]:
new_data = result.copy()
new_data.rename(columns={'standard activity code': 'label'}, inplace=True)

file_name = f'Frequency data_tSNE_label_Activity'
plot(new_data, max_x, max_y, min_x, min_y, file_name,
     title="t-SNE on KuHar, MotionSense, UCI-HAR and WISDM \n FFT data", 
     labels = labels_activity, legend_title='Activity')

In [58]:
for i in classes:
    new_data = result.copy()
    new_data.rename(columns={'DataSet': 'label'}, inplace=True)
    new_data = new_data[new_data['standard activity code'].isin([i])]
    
    file_name = f'Frequency_data_tSNE_label_{labels_activity[i]}'
    plot(new_data, max_x, max_y, min_x, min_y, file_name,
         title=f"t-SNE on KuHar, MotionSense, UCI-HAR and WISDM \n FFT data \n Class: {labels_activity[i]}", 
         labels = labels_dataset)

In [59]:
for i in classes:
    new_data = result.copy()
    new_data.rename(columns={'DataSet': 'label'}, inplace=True)
    new_data = new_data[new_data['standard activity code'].isin([i])]
    
    file_name = f'Frequency_data_tSNE_label_{labels_activity[i]}_and_user'
    plot(new_data, max_x, max_y, min_x, min_y, file_name, hovertext='user',
         title=f"t-SNE on KuHar, MotionSense, UCI-HAR and WISDM \n FFT data \n Class: {labels_activity[i]}", 
         labels = labels_dataset, legend_title='DataSet')

In [60]:
for i in labels_dataset.keys():
    new_data = result.copy()
    new_data.rename(columns={'standard activity code': 'label'}, inplace=True)
    new_data = new_data[new_data['DataSet'].isin([i])]
    labels_user = {user: user for user in result['user']}
    
    file_name = f'Frequency_data_tSNE_label_user_on_{labels_dataset[i]}'
    plot(new_data, max_x, max_y, min_x, min_y, file_name, hovertext='user',
         title=f"t-SNE on KuHar, MotionSense, UCI-HAR and WISDM \n FFT data \n DataSet: {labels_dataset[i]}", 
         labels = labels_activity, legend_title='Activity')