# Experiments on KuHar Resampled to 20Hz

This notebook will perform basic experiments on the balanced KuHar dataset with the following steps:
1. Quick load train, test and validation CSV subsets from the balanced KuHar dataset using `PandasDatasetsIO` helper
2. Subclassing the `Dataset` interface using `PandasMultiModalDataset`
3. Apply the fourier transform on the dataset
4. Apply t-SNE in both datasets together in time and frequency domains

The experiments will evaluate the distrbution between KuHar and MotionSense on the balanced datasets in both time and frequency domains.

## Common imports and definitions

In [1]:
from pathlib import Path  # For defining dataset Paths
import sys                # For include librep package

# This must be done if librep is not installed via pip,
# as this directory (examples) is appart from librep package root
sys.path.append("..")

# Third party imports
import pandas as pd
import numpy as np

# Librep imports
from librep.utils.dataset import PandasDatasetsIO          # For quick load train, test and validation CSVs
from librep.datasets.multimodal import PandasMultiModalDataset # Wrap CSVs to librep's `Dataset` interface

2022-09-20 12:20:56.573862: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-09-20 12:20:56.573884: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


## Loading data
Change the path to use in other datasets

In [2]:
# Path for KuHar resampled to 30Hz view with the same activities (and labels numbers)
# It is assumed that the directory will contain (train.csv, test.csv and validation.csv)
dataset_path_extrasensory = Path("../data/views/ExtraSensory/balanced_view_resampled_20Hz")
# dataset_path_extrasensory = Path("../data/views/ExtraSensory/unbalanced_train_only_resampled_20hz")
dataset_path_kuhar = Path("../data/views/KuHar/resampled_view_20Hz")
dataset_path_motionsense = Path("../data/views/MotionSense/resampled_view_20Hz")

Once paths is defined, we can load the CSV as pandas dataframes

In [3]:
# Kuhar/Motionsense dataframes
train_extrasensory, validation_extrasensory, test_extrasensory = PandasDatasetsIO(dataset_path_extrasensory).load()
train_kuhar, validation_kuhar, test_kuhar = PandasDatasetsIO(dataset_path_kuhar).load()
train_motionsense, validation_motionsense, test_motionsense = PandasDatasetsIO(dataset_path_motionsense).load()

FileNotFoundError: [Errno 2] No such file or directory: '../data/views/ExtraSensory/balanced_view_resampled_20Hz/train.csv'

In [None]:
all_data_kuhar = pd.concat([train_kuhar, validation_kuhar, test_kuhar])
all_data_motionsense = pd.concat([train_motionsense, validation_motionsense, test_motionsense])

X = [0 for i in range(len(all_data_kuhar))]
all_data_kuhar['Dataset'] = X

X = [1 for i in range(len(all_data_motionsense))]
all_data_motionsense['Dataset'] = X

## Removing activities that don't exist in MotionSense and changing their codes.

In [None]:
activity_kuhar = {
        0: "Stand",
        1: "Sit",
        11: "Walk",
        14: "Run",
        15: "Stair-up",
        16: "Stair-down",
}

activity_motionsense = {
        3: "std",
        2: "sit",
        4: "wlk",
        5: "jog",
        1: "ups",
        0: "dws"
}

kh = list(activity_kuhar.keys())
ms = list(activity_motionsense.keys())
l = [k for k in range(6)]
kh

In [None]:
all_data_kuhar = all_data_kuhar[all_data_kuhar['activity code'].isin(kh)]

In [None]:
all_data_kuhar['activity code'].unique()

In [None]:
for k in range(6):
    all_data_kuhar['activity code'] = all_data_kuhar['activity code'].replace(kh[k], k+6)

In [None]:
all_data_kuhar['activity code'].unique()

## Creating a Librep dataset from pandas dataframes

Change the features to use in other datasets

In [None]:
features_motionsense = list(all_data_motionsense.columns[1:-6]) + ['Dataset', 'activity code']
# all_data_motionsense.head()

In [None]:
features_kuhar = list(all_data_kuhar.columns[1:-10]) + ['Dataset', 'activity code']
# all_data_kuhar

In [None]:
X1 = np.array(all_data_kuhar[features_kuhar])
X2 = np.array(all_data_motionsense[features_motionsense])

In [None]:
all_data = pd.DataFrame(np.concatenate((X1, X2)), columns=list(features_kuhar))
all_data

In [None]:
features = features_kuhar[:-2]
# features

## t-SNE Transform

In [None]:
from sklearn.manifold import TSNE

tsne_model = TSNE(random_state=42)
tsne_df = tsne_model.fit_transform(all_data[features])

In [None]:
tsne_df = pd.DataFrame(tsne_df, columns=["X", "Y"])
tsne_df
tsne_df["class"] = all_data['Dataset']

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(1,1,1)
ax.set_xlabel('X', fontsize=15)
ax.set_ylabel('Y', fontsize=15)
ax.set_title("T-SNE - Time domain - Chart of the datasets KuHar and MotionSense - 20Hz", fontsize=14)

labels = []
for c, ds in tsne_df.groupby(["class"]):
    ax.scatter(ds["X"], ds["Y"], marker='.')
    if c  == 0:
        labels.append('KuHar')
    else:
        labels.append('MotionSense')
#     else:
#         labels.append(f"{c}")
ax.legend(labels, bbox_to_anchor = (1.28, 1.))
ax.grid()
plt.show()

Lets show the datasets in the frequency domain and compare with the result above

## Creating a Librep dataset from pandas dataframes

Change the features to use in other datasets

In [None]:
# Kuhar and MotionSense features to select
features = [
    "accel-x",
    "accel-y",
    "accel-z",
    "gyro-x",
    "gyro-y",
    "gyro-z"
]

# Creating the datasets

# Train
all_data_multimodal = PandasMultiModalDataset(
    all_data,
    feature_prefixes=features,
    label_columns="Dataset",
    as_array=True
)

## Fourier Transform

In [None]:
from librep.datasets.multimodal import TransformMultiModalDataset
from librep.transforms.fft import FFT

In [None]:
fft_transform = FFT(centered = True)
transformer = TransformMultiModalDataset(transforms=[fft_transform], new_window_name_prefix="fft.")

### Use FFT in the data

In [None]:
all_data_multimodal_fft = transformer(all_data_multimodal)

tsne_model = TSNE(random_state=42)
tsne_df = tsne_model.fit_transform(all_data_multimodal_fft.X)

tsne_df = pd.DataFrame(tsne_df, columns=["X", "Y"])
tsne_df
tsne_df["class"] = all_data_multimodal_fft.y

fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(1,1,1)
ax.set_xlabel('X', fontsize=15)
ax.set_ylabel('Y', fontsize=15)
ax.set_title("T-SNE - Frequency domain - Chart of the datasets KuHar and MotionSense - 20Hz", fontsize=14)

labels = []
for c, ds in tsne_df.groupby(["class"]):
    ax.scatter(ds["X"], ds["Y"], marker='.')
    if c  == 0:
        labels.append('KuHar')
    else:
        labels.append('MotionSense')
#     else:
#         labels.append(f"{c}")
ax.legend(labels, bbox_to_anchor = (1.28, 1.))
ax.grid()
plt.show()

## Use t-SNE to show separete classes

In [None]:
def transformMultimodal(X):
    # Kuhar and MotionSense features to select
    features = [
        "accel-x",
        "accel-y",
        "accel-z",
        "gyro-x",
        "gyro-y",
        "gyro-z"
    ]

    # Creating the datasets

    # Train
    data_multimodal = PandasMultiModalDataset(
        X,
        feature_prefixes=features,
        label_columns="activity code",
        as_array=True
    )
    return data_multimodal

def tsne(data, num_classes, classes, transforms, title):

    X = data[data['activity code'].isin(num_classes)]
    X_new = transformMultimodal(X)

    transformer = TransformMultiModalDataset(transforms=transforms, new_window_name_prefix="transform.")
    data_multimodal_fft = transformer(X_new)
    
    tsne_model = TSNE(random_state=42)
    tsne_df = tsne_model.fit_transform(data_multimodal_fft.X)

    tsne_df = pd.DataFrame(tsne_df, columns=["X", "Y"])
    tsne_df
    tsne_df["class"] = data_multimodal_fft.y

    fig = plt.figure(figsize=(8,8))
    ax = fig.add_subplot(1,1,1)
    ax.set_xlabel('X', fontsize=15)
    ax.set_ylabel('Y', fontsize=15)
    ax.set_title(title, fontsize=14)

    labels = []
    for c, ds in tsne_df.groupby(["class"]):
        ax.scatter(ds["X"], ds["Y"], marker='.')
        if c  in num_classes:
            idx = num_classes.index(c)
            labels.append(classes[num_classes[idx]])
        # else:
        #     labels.append(classes[num_classes[1]])

    ax.legend(labels, bbox_to_anchor = (1.28, 1.))
    ax.grid()
    plt.show()
fft_transform = FFT(centered = True)

In [None]:
pares = [[0, 11], [1, 10], [2, 7], [3, 6], [4, 8], [5, 9]]

labels_ms = ['Downstairs', 'Upstairs', 'Sitting', 'Standing', 'Walking', 'Jogging']
labels_kh = ["Stand", "Sit", "Walk", "Run", "Stair-up", "Stair-down"]

classes = []
for c in labels_ms:
    classes.append(c + ' - MS')
for c in labels_kh:
    classes.append(c + ' - KH')
classes

In [None]:
transforms = [fft_transform]
title = "T-SNE - Frequency domain - Chart of the datasets KuHar and MotionSense - 20Hz"
for i in range(6):
    tsne(all_data, pares[i], classes, transforms, title)
tsne(all_data, [*pares[4], *pares[5]], classes, transforms, title)

## What happens if we use statistic features?

In [None]:
from librep.transforms.stats import StatsTransform

stats_transform = StatsTransform()
transforms = [stats_transform]
title = "T-SNE - Statistic features - Chart of the datasets KuHar and MotionSense - 20Hz"

for i in range(6):
    tsne(all_data, pares[i], classes, transforms, title)

tsne(all_data, [*pares[4], *pares[5]], classes, transforms, title)

In [None]:
transformer = TransformMultiModalDataset(transforms=[stats_transform], new_window_name_prefix="st.")
all_data_multimodal_st = transformer(all_data_multimodal)

tsne_model = TSNE(random_state=42)
tsne_df = tsne_model.fit_transform(all_data_multimodal_st.X)

tsne_df = pd.DataFrame(tsne_df, columns=["X", "Y"])
tsne_df
tsne_df["class"] = all_data_multimodal_st.y

fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(1,1,1)
ax.set_xlabel('X', fontsize=15)
ax.set_ylabel('Y', fontsize=15)
ax.set_title(title, fontsize=14)

labels = []
for c, ds in tsne_df.groupby(["class"]):
    ax.scatter(ds["X"], ds["Y"], marker='.')
    if c  == 0:
        labels.append('KuHar')
    else:
        labels.append('MotionSense')
#     else:
#         labels.append(f"{c}")
ax.legend(labels, bbox_to_anchor = (1.28, 1.))
ax.grid()
plt.show()

In [None]:
from librep.transforms.stats import StatsTransform

stats_transform = StatsTransform()
transforms = [fft_transform, stats_transform]
title = "T-SNE - FFT + Statistic features - Chart of the datasets KuHar and MotionSense - 20Hz"

for i in range(6):
    tsne(all_data, pares[i], classes, transforms, title)

tsne(all_data, [*pares[4], *pares[5]], classes, transforms, title)

In [None]:
transformer = TransformMultiModalDataset(transforms=[fft_transform, stats_transform], 
                                         new_window_name_prefix="st.")
all_data_multimodal_fft_st = transformer(all_data_multimodal)

tsne_model = TSNE(random_state=42)
tsne_df = tsne_model.fit_transform(all_data_multimodal_fft_st.X)

tsne_df = pd.DataFrame(tsne_df, columns=["X", "Y"])
tsne_df
tsne_df["class"] = all_data_multimodal_fft_st.y

fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(1,1,1)
ax.set_xlabel('X', fontsize=15)
ax.set_ylabel('Y', fontsize=15)
ax.set_title(title, fontsize=14)

labels = []
for c, ds in tsne_df.groupby(["class"]):
    ax.scatter(ds["X"], ds["Y"], marker='.')
    if c  == 0:
        labels.append('KuHar')
    else:
        labels.append('MotionSense')
#     else:
#         labels.append(f"{c}")
ax.legend(labels, bbox_to_anchor = (1.28, 1.))
ax.grid()
plt.show()