In [1]:
import os
import numpy as np
import pandas as pd
import uuid
from typing import List
from librep.datasets.kuhar import TrimmedRawKuHarDataset
from librep.dataset_views.kuhar import KuHarView, TimeSeriesKuHarView

In [2]:
def save_canonical_view(data: np.ndarray, meta_info: pd.DataFrame, root_dir: str, dataset: str, description: str, features: str, uid: str):
    path = os.path.join(root_dir, dataset, description, features, uid)
    os.makedirs(path, exist_ok=True)
    data_name = os.path.join(path, "data")
    meta_name = os.path.join(path, "meta.csv")
    np.savez(data_name, data=data)
    print(f"Data saved to '{data_name}.npz'")
    meta_info.to_csv(meta_name, index=False)
    print(f"Metadata saved to '{meta_name}'")

In [3]:
dataset_dir = os.path.join("data", "datasets", "KuHar", "2.Trimmed_interpolated_raw_data")
kuhar = TrimmedRawKuHarDataset(dataset_dir, download=False)

In [4]:
dataset = "kuhar"
root_output_dir = os.path.join("data", "dataset_views")
uid = str(uuid.uuid4())[:8]

view_descriptions = [
    {
        "features": ("accel-x", "accel-y", "accel-z", "gyro-x", "gyro-y", "gyro-z"),
        "users": kuhar.get_all_user_ids(),
        "activities": kuhar.get_all_activity_ids(),
        "description": "all"
    }, 
    {
        "features": ("accel-x", "accel-y", "accel-z"),
        "users": kuhar.get_all_user_ids(),
        "activities": kuhar.get_all_activity_ids(),
        "description": "accelerometer"
    }, 
    {
        "features": ("gyro-x", "gyro-y", "gyro-z"),
        "users": kuhar.get_all_user_ids(),
        "activities": kuhar.get_all_activity_ids(),
        "description": "gyroscope"
    } 
]

# Raw Dataset Views

In [5]:
description = "raw"
for desc in view_descriptions:
    view = KuHarView(kuhar, features=desc["features"], users=desc["users"], activities=desc["activities"])
    data, meta = view.to_canonical()
    save_canonical_view(data, meta, root_output_dir, dataset, description, desc["description"], uid)

Data saved to 'data/dataset_views/kuhar/raw/all/486c8aad/data.npz'
Metadata saved to 'data/dataset_views/kuhar/raw/all/486c8aad/meta.csv'
Data saved to 'data/dataset_views/kuhar/raw/accelerometer/486c8aad/data.npz'
Metadata saved to 'data/dataset_views/kuhar/raw/accelerometer/486c8aad/meta.csv'
Data saved to 'data/dataset_views/kuhar/raw/gyroscope/486c8aad/data.npz'
Metadata saved to 'data/dataset_views/kuhar/raw/gyroscope/486c8aad/meta.csv'


# Time Series Dataset Views

In [7]:
for window, overlap in [(300, 0)]:
    description = f"time-series-{window}-samples-{overlap}-overlap"
    for desc in view_descriptions:
        view = TimeSeriesKuHarView(kuhar=kuhar, window=window, overlap=overlap, features=desc["features"], users=desc["users"], activities=desc["activities"])
        data, meta = view.to_canonical()
        save_canonical_view(data, meta, root_output_dir, dataset, description, desc["description"], uid)

Generating time series with 300 samples:   0%|          | 0/1945 [00:00<?, ?it/s]

Generating canonical data: 100%|██████████| 19792/19792 [14:37<00:00, 22.55it/s]


Data saved to 'data/dataset_views/kuhar/time-series-300-samples-0-overlap/all/486c8aad/data.npz'
Metadata saved to 'data/dataset_views/kuhar/time-series-300-samples-0-overlap/all/486c8aad/meta.csv'


Generating time series with 300 samples:   0%|          | 0/1945 [00:00<?, ?it/s]

Generating canonical data: 100%|██████████| 19792/19792 [07:28<00:00, 44.18it/s]


Data saved to 'data/dataset_views/kuhar/time-series-300-samples-0-overlap/accelerometer/486c8aad/data.npz'
Metadata saved to 'data/dataset_views/kuhar/time-series-300-samples-0-overlap/accelerometer/486c8aad/meta.csv'


Generating time series with 300 samples:   0%|          | 0/1945 [00:00<?, ?it/s]

Generating canonical data: 100%|██████████| 19792/19792 [06:59<00:00, 47.14it/s]


Data saved to 'data/dataset_views/kuhar/time-series-300-samples-0-overlap/gyroscope/486c8aad/data.npz'
Metadata saved to 'data/dataset_views/kuhar/time-series-300-samples-0-overlap/gyroscope/486c8aad/meta.csv'


In [5]:
l = list(kuhar.get_data_iterator(activities=[17]))

In [6]:
len(l)

20

In [7]:
l[0]

Unnamed: 0,accel-x,accel-y,accel-z,gyro-x,gyro-y,gyro-z,accel-start-time,gyro-start-time,accel-end-time,gyro-end-time,class,length,serial,index,user
0,-2.18640,1.66780,3.93850,0.037188,0.36460,-0.57867,0.004,0.007,0.004,0.007,17,1,1,0,1087
1,-1.54550,2.09350,4.01900,-0.074828,0.53200,-0.69964,0.014,0.017,0.014,0.017,17,1,1,1,1087
2,-0.77139,2.64330,3.19930,-0.423030,0.58243,-0.78084,0.024,0.027,0.024,0.027,17,1,1,2,1087
3,0.17231,3.53130,1.62650,-0.787310,0.54718,-0.79823,0.034,0.037,0.034,0.037,17,1,1,3,1087
4,0.92529,4.35160,0.96227,-1.158800,0.45059,-0.78797,0.044,0.047,0.044,0.047,17,1,1,4,1087
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6167,-5.43950,1.39310,1.38480,0.789360,-0.33993,0.76222,61.674,61.677,61.674,61.677,17,1,1,6167,1087
6168,-5.26390,0.36850,0.59809,0.544930,-0.31564,0.76393,61.684,61.687,61.684,61.687,17,1,1,6168,1087
6169,-5.27210,-0.32387,0.49611,0.242610,-0.36650,0.78636,61.694,61.697,61.694,61.697,17,1,1,6169,1087
6170,-5.80580,-1.81040,-0.17049,-0.603160,-0.46991,0.84526,61.704,61.707,61.704,61.707,17,1,1,6170,1087


In [8]:
view = TimeSeriesKuHarView(kuhar=kuhar, window=300, overlap=0, features=("accel-x", "accel-y", "accel-z", "gyro-x", "gyro-y", "gyro-z"), users=None, activities=[17])

In [9]:
l[0].loc[1*300:2*300]["accel-x"]

300    3.99060
301    5.43360
302    6.10470
303    6.38730
304    6.42190
        ...   
596    7.91260
597    5.80410
598    3.33690
599    0.97603
600   -0.89103
Name: accel-x, Length: 301, dtype: float64

In [10]:
view._create_time_series(l[0])[["accel-x-0", "accel-x-299"]]

0 300 300
300 600 300
600 900 300
900 1200 300
1200 1500 300
1500 1800 300
1800 2100 300
2100 2400 300
2400 2700 300
2700 3000 300
3000 3300 300
3300 3600 300
3600 3900 300
3900 4200 300
4200 4500 300
4500 4800 300
4800 5100 300
5100 5400 300
5400 5700 300
5700 6000 300
6000 6300 172
Extracted subsamples: 20


Unnamed: 0,accel-x-0,accel-x-299
0,-2.1864,1.8125
1,3.9906,0.97603
2,-0.89103,11.087
3,7.3868,-1.1621
4,-0.4065,-1.8882
5,-3.1502,5.6165
6,5.4476,2.2353
7,2.1591,1.2608
8,-1.965,4.8421
9,6.5507,-1.8068


In [11]:
data, meta = view.to_canonical()
data.shape

0 300 300


Generating time series with 300 samples:   0%|          | 0/20 [00:00<?, ?it/s]

300 600 300
600 900 300
900 1200 300
1200 1500 300
1500 1800 300
1800 2100 300
2100 2400 300
2400 2700 300
2700 3000 300
3000 3300 300
3300 3600 300
3600 3900 300
3900 4200 300
4200 4500 300
4500 4800 300
4800 5100 300
5100 5400 300
5400 5700 300
5700 6000 300
6000 6300 172
Extracted subsamples: 20
0 300 300
300 600 300
600 900 300
900 1200 300
1200 1500 300
1500 1800 300
1800 2100 300
2100 2400 300
2400 2700 300
2700 3000 300
3000 3300 300
3300 3600 300
3600 3900 300
3900 4200 300
4200 4500 300
4500 4800 300
4800 5100 300
5100 5400 300
5400 5700 300
5700 6000 300
6000 6300 300
6300 6600 189
Extracted subsamples: 21
0 300 300
300 600 300
600 900 300
900 1200 300
1200 1500 300
1500 1800 300
1800 2100 300
2100 2400 300
2400 2700 300
2700 3000 300
3000 3300 300
3300 3600 300
3600 3900 300
3900 4200 300
4200 4500 300
4500 4800 300
4800 5100 300
5100 5400 300
5400 5700 300
5700 6000 300
6000 6300 300
6300 6600 300
6600 6900 300
6900 7200 300
7200 7500 300
7500 7800 300
7800 8100 300
8100 84

Generating canonical data:   5%|▍         | 22/442 [00:00<00:01, 213.71it/s]

Extracted subsamples: 36


Generating canonical data: 100%|██████████| 442/442 [00:02<00:00, 213.96it/s]


(442, 6, 300)

In [5]:
view = TimeSeriesKuHarView(kuhar=kuhar, window=300, overlap=0, features=("accel-x", "accel-y", "accel-z", "gyro-x", "gyro-y", "gyro-z"), users=None, activities=[17])
data, meta = view.to_canonical()

Generating time series with 300 samples:   0%|          | 0/20 [00:00<?, ?it/s]

Generating canonical data: 100%|██████████| 462/462 [00:02<00:00, 209.43it/s]


In [6]:
data.shape

(462, 6, 300)

In [29]:
it = kuhar.get_data_iterator(users=None, activities=[0])
x = list(it)

In [30]:
len(x)

91