# Pre-processing MotionSense Dataset and Generate Views 

In [1]:
%load_ext autoreload
%autoreload 2

import sys

sys.path.append("../../")

In [2]:
from pathlib import Path
from typing import List
import hashlib
import pandas as pd

from librep.datasets.har.motionsense import (
    RawMotionSense,
    RawMotionSenseIterator,
    MotionSenseDatasetGenerator
)
from librep.utils.dataset import PandasDatasetsIO

%matplotlib inline

2022-10-21 21:13:03.649651: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-10-21 21:13:03.649671: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [3]:
dataset_dir = Path("../../data/datasets/MotionSense/A_DeviceMotion_data")
motionsense_dataset = RawMotionSense(dataset_dir, download=False)
motionsense_dataset

MotionSense Dataset at: '../../data/datasets/MotionSense/A_DeviceMotion_data'

In [4]:
act_names = [motionsense_dataset.activity_names[i] for i in motionsense_dataset.activities]
act_names

['dws', 'ups', 'sit', 'std', 'wlk', 'jog']

In [5]:
iterator = RawMotionSenseIterator(motionsense_dataset, users_to_select=None, activities_to_select=None)
iterator

MotionSense Iterator: users=24, activities=6

In [6]:
motionsense_v1 = MotionSenseDatasetGenerator(iterator, time_window=60, window_overlap=0, add_gravity=True)
motionsense_v1

Dataset generator: time_window=60, overlap=0

In [7]:
train, validation, test = motionsense_v1.create_datasets(
    train_size=0.7,
    validation_size=0.1,
    test_size=0.2,
    ensure_distinct_users_per_dataset=True,
    balance_samples=True,
    seed=0
)

Generating full df over MotionSense View: 360it [00:18, 19.78it/s]


In [8]:
print(hashlib.sha1(pd.util.hash_pandas_object(train).values).hexdigest())
print(hashlib.sha1(pd.util.hash_pandas_object(validation).values).hexdigest())
print(hashlib.sha1(pd.util.hash_pandas_object(test).values).hexdigest())

11bb43fa2de39298cb24ac0a0bfa97c415f4f6f7
ac8b35f17555d0f2e25470079e5beb5b8d114346
e027edb2d15b42c0ac07ac289b6f92512f18e4b5


In [9]:
output_path = Path("../../data_2/views/MotionSense/balanced_view")

description = """# Balanced MotionSense

This view contains train, validation and test subsets in the following proportions:
- Train: 70% of samples
- Validation: 10% of samples
- Test: 20% of samples

After splits, the datasets were balanced in relation to the activity code column, that is, each subset have the same number of activitiy samples.

**NOTE**: Each subset contain samples from distinct users, that is, samples of one user belongs exclusivelly to one of three subsets.
"""
pandas_io = PandasDatasetsIO(output_path)
pandas_io

PandasDatasetIO at '../../data_2/views/MotionSense/balanced_view'

In [10]:
pandas_io.save(
    train=train, 
    validation=validation, 
    test=test, 
    description=description
)