# Experiments on KuHar Resampled to 30Hz

This notebook will perform basic experiments on the balanced KuHar dataset with the following steps:
1. Quick load train, test and validation CSV subsets from the balanced KuHar dataset using `PandasDatasetsIO` helper
2. Subclassing the `Dataset` interface using `PandasMultiModalDataset`
3. Apply the fourier transform on the dataset
4. Train and evaluate SVM, KNN and Random Forest classification models in both time and frequency domains

The experiments will evaluate the performance of SVM, KNN and RF models on the balanced KuHar dataset in both time and frequency domains.

## Common imports and definitions

In [1]:
from pathlib import Path  # For defining dataset Paths
import sys                # For include librep package

# This must be done if librep is not installed via pip,
# as this directory (examples) is appart from librep package root
sys.path.append("..")

# Third party imports
import pandas as pd
import numpy as np

# Librep imports
from librep.utils.dataset import PandasDatasetsIO          # For quick load train, test and validation CSVs
from librep.datasets.multimodal import PandasMultiModalDataset # Wrap CSVs to librep's `Dataset` interface

2022-09-08 20:58:41.860730: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-09-08 20:58:41.860750: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


## Loading data
Change the path to use in other datasets

In [2]:
# Path for KuHar resampled to 30Hz view with the same activities (and labels numbers)
# It is assumed that the directory will contain (train.csv, test.csv and validation.csv)
dataset_path_kuhar = Path("../data/views/KuHar/resampled_view_30Hz")
dataset_path_motionsense = Path("../data/views/MotionSense/resampled_view_30Hz")

Once paths is defined, we can load the CSV as pandas dataframes

In [3]:
# Kuhar dataframes
train_kuhar, validation_kuhar, test_kuhar = PandasDatasetsIO(dataset_path_kuhar).load()
train_motionsense, validation_motionsense, test_motionsense = PandasDatasetsIO(dataset_path_motionsense).load()

Letś take a look in the train dataframes

In [4]:
train_kuhar.head()

Unnamed: 0.1,Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,...,gyro-z-89,accel-start-time,gyro-start-time,accel-end-time,gyro-end-time,activity code,length,serial,index,user
0,0,0.00362,-0.023688,-0.002657,0.010371,-0.005858,0.010423,-0.019593,0.000303,0.045433,...,0.003377,23.235,23.223,26.26,26.249,0,300,1,2100,1051
1,1,-0.005823,0.012494,-0.012503,-0.002116,0.025957,-0.012833,-0.025845,-0.011941,0.012807,...,0.003056,56.292,56.292,59.245,59.245,0,300,1,5700,1037
2,2,-0.039278,0.003864,0.008927,-0.024887,0.022435,0.003431,-0.038931,0.003359,0.009394,...,0.003442,27.268,27.267,30.29,30.291,0,300,1,2700,1075
3,3,-0.001728,-0.018312,0.013927,0.015426,0.007332,-0.012372,0.006893,-0.002433,0.012821,...,-0.001294,39.421,39.42,42.441,42.44,0,300,6,3900,1008
4,4,-0.022981,0.014871,-0.03631,0.033512,-0.016733,0.01993,-0.016637,0.007568,-0.002753,...,0.00456,23.703,23.703,26.656,26.656,0,300,1,2400,1038


In [5]:
train_motionsense.head()

Unnamed: 0.1,Unnamed: 0,userAcceleration.x-0,userAcceleration.x-1,userAcceleration.x-2,userAcceleration.x-3,userAcceleration.x-4,userAcceleration.x-5,userAcceleration.x-6,userAcceleration.x-7,userAcceleration.x-8,...,rotationRate.z-85,rotationRate.z-86,rotationRate.z-87,rotationRate.z-88,rotationRate.z-89,activity code,length,trial_code,index,user
0,0,-0.186833,-0.179195,-0.226435,-0.234763,-0.267824,-0.234534,-0.235421,-0.133759,-0.297125,...,0.153844,0.456858,0.898804,1.139253,0.275556,0,150,1,150,11
1,1,-0.054442,0.260099,0.022933,0.019339,0.148599,-0.036896,-0.125777,-0.110877,0.01626,...,0.848919,0.559802,0.253026,0.858864,0.799075,0,150,1,900,12
2,2,-0.007696,-0.009515,0.051284,-0.082342,0.046316,0.062557,-0.032338,-0.108787,-0.09048,...,0.789453,0.49527,-0.042529,-0.16111,0.129157,0,150,1,1050,21
3,3,-0.435023,-0.557701,-0.284523,0.142448,0.545683,0.363495,0.006622,-0.042397,-0.412486,...,-1.073667,-0.531939,-0.302297,0.086762,0.60079,0,150,2,150,17
4,4,0.098066,0.398057,0.321284,-0.024039,0.372623,0.302234,0.199685,0.285311,0.319281,...,0.884322,1.476072,1.630557,0.927239,0.672827,0,150,11,450,21


## Creating a Librep dataset from pandas dataframes

Change the features to use in other datasets

In [6]:
# Kuhar features to select
features_kuhar = [
    "accel-x",
    "accel-y",
    "accel-z",
    "gyro-x",
    "gyro-y",
    "gyro-z"
]

# Creating the datasets

# Train
train_dataset_kuhar = PandasMultiModalDataset(
    train_kuhar,
    feature_prefixes=features_kuhar,
    label_columns="activity code",
    as_array=True
)

# Validation
validation_dataset_kuhar = PandasMultiModalDataset(
    validation_kuhar,
    feature_prefixes=features_kuhar,
    label_columns="activity code",
    as_array=True
)

# Test
test_dataset_kuhar = PandasMultiModalDataset(
    test_kuhar,
    feature_prefixes=features_kuhar,
    label_columns="activity code",
    as_array=True
)

In [7]:
# MotionSense features to select
features_motionsense = [
    "userAcceleration.x",
    "userAcceleration.y",
    "userAcceleration.z",
    "rotationRate.x",
    "rotationRate.y",
    "rotationRate.z"
]

# Creating the datasets

# Train
train_dataset_motionsense = PandasMultiModalDataset(
    train_motionsense,
    feature_prefixes=features_motionsense,
    label_columns="activity code",
    as_array=True
)

# Validation
validation_dataset_motionsense = PandasMultiModalDataset(
    validation_motionsense,
    feature_prefixes=features_motionsense,
    label_columns="activity code",
    as_array=True
)

# Test
test_dataset_motionsense = PandasMultiModalDataset(
    test_motionsense,
    feature_prefixes=features_motionsense,
    label_columns="activity code",
    as_array=True
)

## Inspect sample

In [8]:
# Lets print the first sample of kh_train dataset.
# Is a tuple, with an vector of 1800 elements as first element and the label as second
x = train_dataset_kuhar[0]
print(x)

(array([ 3.62008887e-03, -2.36884079e-02, -2.65723909e-03,  1.03708716e-02,
       -5.85826359e-03,  1.04230261e-02, -1.95931668e-02,  3.02941467e-04,
        4.54334895e-02, -4.61346122e-03, -4.32958544e-02, -2.11811327e-03,
       -3.24331746e-02, -9.64189479e-04,  2.38423378e-02,  1.46571666e-02,
       -6.33010786e-02, -3.09536555e-02, -6.09642095e-03, -2.23327124e-02,
        4.73811398e-02,  8.10745709e-03, -1.46291624e-03, -3.52547565e-02,
       -3.65486640e-02,  3.89222511e-02, -5.64525903e-02,  1.71065679e-02,
       -1.88765233e-02,  1.56273664e-02,  6.68170176e-02,  1.86759574e-02,
       -4.15165877e-02,  1.55675209e-02,  4.57480490e-03, -1.56470432e-02,
        2.86984361e-02, -3.88362938e-03, -1.85153493e-02, -9.91992429e-03,
        1.05702633e-02, -5.34487311e-03,  2.19452625e-02,  2.31337120e-02,
       -7.65234924e-03, -5.43440699e-02, -2.68535392e-02,  1.13130045e-02,
        2.85217945e-02,  1.84220024e-02,  1.95164920e-02,  4.88774164e-03,
        6.26222956e-03, 

In [9]:
# Inspecting sample
print(f"The sample 0: {x[0]}")
print(f"Shape of sample 0: {x[0].shape}")
print(f"The label of sample 0: {x[1]}")

The sample 0: [ 3.62008887e-03 -2.36884079e-02 -2.65723909e-03  1.03708716e-02
 -5.85826359e-03  1.04230261e-02 -1.95931668e-02  3.02941467e-04
  4.54334895e-02 -4.61346122e-03 -4.32958544e-02 -2.11811327e-03
 -3.24331746e-02 -9.64189479e-04  2.38423378e-02  1.46571666e-02
 -6.33010786e-02 -3.09536555e-02 -6.09642095e-03 -2.23327124e-02
  4.73811398e-02  8.10745709e-03 -1.46291624e-03 -3.52547565e-02
 -3.65486640e-02  3.89222511e-02 -5.64525903e-02  1.71065679e-02
 -1.88765233e-02  1.56273664e-02  6.68170176e-02  1.86759574e-02
 -4.15165877e-02  1.55675209e-02  4.57480490e-03 -1.56470432e-02
  2.86984361e-02 -3.88362938e-03 -1.85153493e-02 -9.91992429e-03
  1.05702633e-02 -5.34487311e-03  2.19452625e-02  2.31337120e-02
 -7.65234924e-03 -5.43440699e-02 -2.68535392e-02  1.13130045e-02
  2.85217945e-02  1.84220024e-02  1.95164920e-02  4.88774164e-03
  6.26222956e-03  1.74586067e-02  4.62809500e-03 -4.00069874e-02
  1.28236155e-03  1.63634494e-02 -7.79265175e-04  3.31124115e-02
 -6.2073271

## t-SNE Transform

In [13]:
from librep.datasets.multimodal import TransformMultiModalDataset
from librep.transforms.tsne import TSNE

In [29]:
tsne_transform = TSNE(random_state=0)
transformer = TransformMultiModalDataset(transforms=[tsne_transform], new_window_name_prefix="tsne.")

In [63]:
features = []
for k in train_dataset_kuhar.feature_columns:
    features.append(k)

X = train_dataset_kuhar.data[features]
y = train_dataset_kuhar.data['activity code']

In [67]:
train_dataset_tsne_kuhar = transformer(X)

KeyError: 0

In [61]:

X

Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gyro-z-80,gyro-z-81,gyro-z-82,gyro-z-83,gyro-z-84,gyro-z-85,gyro-z-86,gyro-z-87,gyro-z-88,gyro-z-89
0,0.003620,-0.023688,-0.002657,0.010371,-0.005858,0.010423,-0.019593,0.000303,0.045433,-0.004613,...,0.000707,0.001735,0.004720,0.003066,0.002235,0.002746,0.003216,0.002385,0.004016,0.003377
1,-0.005823,0.012494,-0.012503,-0.002116,0.025957,-0.012833,-0.025845,-0.011941,0.012807,0.004603,...,0.002721,0.003136,0.001823,0.001692,0.000443,-0.001092,-0.000615,-0.003112,0.002459,0.003056
2,-0.039278,0.003864,0.008927,-0.024887,0.022435,0.003431,-0.038931,0.003359,0.009394,-0.028853,...,-0.000006,0.005970,0.002829,0.001145,0.004210,0.003512,0.003344,0.004968,0.003306,0.003442
3,-0.001728,-0.018312,0.013927,0.015426,0.007332,-0.012372,0.006893,-0.002433,0.012821,-0.010669,...,0.001164,-0.000918,-0.001025,0.000604,-0.000815,-0.001659,0.000037,-0.001675,-0.002496,-0.001294
4,-0.022981,0.014871,-0.036310,0.033512,-0.016733,0.019930,-0.016637,0.007568,-0.002753,0.007763,...,0.001448,-0.001679,0.003199,0.007891,0.003509,-0.000226,0.002749,0.003864,0.002099,0.004560
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3325,-2.056244,-3.629712,-5.103134,-7.984378,-7.192343,-3.935411,8.426300,24.822963,4.338489,-9.045683,...,-1.064967,-0.859027,-0.450143,-0.169548,0.161370,0.369481,0.191234,-0.061095,-0.218894,-0.051983
3326,-1.647790,-1.945329,5.476162,3.970667,0.271624,-4.150560,2.543954,-2.431491,-1.883694,-0.835626,...,-0.070705,-0.095882,-0.240973,0.234913,0.403643,0.565524,0.483941,0.499649,0.413276,0.869767
3327,-0.316901,-0.551917,-1.592117,-0.877500,-0.323541,-0.768261,1.014809,2.774861,1.561735,0.059030,...,0.896418,0.801655,0.508606,0.600514,0.503770,0.432132,0.301299,0.268875,0.147700,0.198234
3328,-1.888981,-0.294286,2.804385,1.476566,1.186552,0.976697,-0.997273,-2.450939,-2.028572,-2.295269,...,0.161268,0.249600,0.236745,0.330608,0.297170,0.416746,0.451894,0.570111,0.577753,0.643595


## Fourier Transform

In [None]:
from librep.datasets.multimodal import TransformMultiModalDataset
from librep.transforms.fft import FFT

In [None]:
fft_transform = FFT(centered = True)
transformer = TransformMultiModalDataset(transforms=[fft_transform], new_window_name_prefix="fft.")

### Use FFT in Kuhar

In [None]:
train_dataset_fft_kuhar = transformer(train_dataset_kuhar)
validation_dataset_fft_kuhar = transformer(validation_dataset_kuhar)
test_dataset_fft_kuhar = transformer(test_dataset_kuhar)

In [None]:
train_dataset_kuhar[:][0]

In [None]:
train_dataset_fft_kuhar[:][0]

### Use FFT in MotionSense

In [None]:
train_dataset_fft_motionsense = transformer(train_dataset_motionsense)
validation_dataset_fft_motionsense = transformer(validation_dataset_motionsense)
test_dataset_fft_motionsense = transformer(test_dataset_motionsense)

In [None]:
train_dataset_motionsense[:][0]

In [None]:
train_dataset_fft_motionsense[:][0]

## Train and evaluate t-

In [None]:
from librep.utils.workflow import SimpleTrainEvalWorkflow, MultiRunWorkflow
# from librep.estimators import RandomForestClassifier
from sklearn.manifold import TSNE
from librep.metrics.report import ClassificationReport
import yaml

# reporter = ClassificationReport(use_accuracy=True, use_f1_score=True, use_classification_report=False, use_confusion_matrix=False, plot_confusion_matrix=False)
# experiment = SimpleTrainEvalWorkflow(estimator=TSNE, estimator_creation_kwags ={'n_estimators':100} , do_not_instantiate=False, do_fit=True, evaluator=reporter)
# multi_run_experiment = MultiRunWorkflow(workflow=experiment, num_runs=3, debug=False)

In [None]:
combined_train_dset = PandasMultiModalDataset(
    pd.concat([train, validation]),
    feature_prefixes=features,
    label_columns="activity code",
    as_array=True
)


result = multi_run_experiment(combined_train_dset, test_dataset)
print(yaml.dump(result, sort_keys=True, indent=4))

In [None]:
combined_train_dset_fft = transformer(combined_train_dset)

result = multi_run_experiment(combined_train_dset_fft, test_dataset_fft)
print(yaml.dump(result, sort_keys=True, indent=4))

## Train and evaluate Support Vector Machine classifier

In [None]:
#from librep.estimators import SVC
from sklearn.svm import SVC

experiment = SimpleTrainEvalWorkflow(estimator=SVC, estimator_creation_kwags ={'C':3.0, 'kernel':"rbf"} , do_not_instantiate=False, do_fit=True, evaluator=reporter)
multi_run_experiment = MultiRunWorkflow(workflow=experiment, num_runs=3, debug=False)

result = multi_run_experiment(combined_train_dset, test_dataset)
print(yaml.dump(result, sort_keys=True, indent=4))

In [None]:
result = multi_run_experiment(combined_train_dset_fft, test_dataset_fft)
print(yaml.dump(result, sort_keys=True, indent=4))

## Train and evaluate K Neighbors Classifier classifier

In [None]:
#from librep.estimators import KNeighborsClassifier
from sklearn.neighbors import KNeighborsClassifier

experiment = SimpleTrainEvalWorkflow(estimator=KNeighborsClassifier, estimator_creation_kwags ={'n_neighbors' :1} , do_not_instantiate=False, do_fit=True, evaluator=reporter)
multi_run_experiment = MultiRunWorkflow(workflow=experiment, num_runs=3, debug=False)

result = multi_run_experiment(combined_train_dset, test_dataset)
print(yaml.dump(result, sort_keys=True, indent=4))

In [None]:
result = multi_run_experiment(combined_train_dset_fft, test_dataset_fft)
print(yaml.dump(result, sort_keys=True, indent=4))