# 1. Prepare dataset

Dataset comes originally from https://archive.ics.uci.edu/dataset/779/harth, however a few errors where fixed by authors and up to date dataset can be found here: https://github.com/ntnu-ai-lab/harth-ml-experiments/tree/main/harth

In [None]:
from pathlib import Path
import pandas as pd
import numpy as np

ANNOTATIONS = {
    1: 'walking',
    2: 'running',
    3: 'shuffling',
    4: 'stairs (ascending)',
    5: 'stairs (descending)',
    6: 'standing',
    7: 'sitting',
    8: 'lying',
    13: 'cycling (sit)',
    14: 'cycling (stand)',
    130: 'cycling (sit, inactive)',
    140: 'cycling (stand, inactive)',
}


def prepare_raw(path: Path) -> dict[str, pd.DataFrame]:
    df = pd.read_csv(path, engine='pyarrow', index_col='timestamp')
    df.index.name = 'datetime'

    ground_truth = df['label']
    ground_truth.name = 'ground_truth'
    ground_truth = ground_truth.groupby(pd.Grouper(freq='1s')).median().dropna().astype(int)
    ground_truth = ground_truth.map(ANNOTATIONS).astype('category').dropna().to_frame()

    thigh = df[['thigh_x', 'thigh_y', 'thigh_z']].astype(np.float32)
    thigh.columns = ['acc_x', 'acc_y', 'acc_z']
    thigh['acc_x'] = -thigh['acc_x']  # Invert x-axis to match the expected orientation
    thigh['acc_y'] = -thigh['acc_y']  # Invert y-axis to match the expected orientation

    back = df[['back_x', 'back_y', 'back_z']].astype(np.float32)
    back.columns = ['acc_x', 'acc_y', 'acc_z']
    back['acc_x'] = -back['acc_x']  # Invert x-axis to match the expected orientation
    back['acc_y'] = -back['acc_y']  # Invert y-axis to match the expected orientation

    return {'ground_truth': ground_truth, 'thigh': thigh, 'trunk': back}


folder = Path('origin')
files = folder.rglob('*.csv')

output = Path('data')
output.mkdir(exist_ok=True, parents=True)

(output / 'ground_truth').mkdir(parents=True, exist_ok=True)
(output / 'thigh').mkdir(parents=True, exist_ok=True)
(output / 'trunk').mkdir(parents=True, exist_ok=True)

for file in files:
    id = file.stem
    data = prepare_raw(file)

    for name, df in data.items():
        df.to_parquet(output / name / f'{id}.parquet')

# 2. Process data

In [None]:
import pandas as pd
from acti_motus import Features, Activities
from pathlib import Path


files = Path('data/thigh').glob('*.parquet')

features = Features()
activities = Activities(chunks=False, orientation=True)

results = []
results_trunk = []

for thigh in files:
    gt = thigh.parent.parent / 'ground_truth' / thigh.name

    ground_truth = pd.read_parquet(gt)
    df = pd.read_parquet(thigh)

    extracted_features = features.extract(df)
    activity, references = activities.detect(extracted_features)

    df = ground_truth.join(activity, how='left')
    df.dropna(subset=['activity'], inplace=True)
    results.append(df)

    # Add trunk
    trunk = thigh.parent.parent / 'trunk' / thigh.name
    trunk = pd.read_parquet(trunk)

    features_trunk = features.extract(trunk)
    activity_trunk, references_trunk = activities.detect(extracted_features, trunk=features_trunk)
    trunk_df = ground_truth.join(activity_trunk, how='left')
    trunk_df.dropna(subset=['activity'], inplace=True)
    results_trunk.append(trunk_df)

results = pd.concat(results)
results.to_parquet('processed_thigh.parquet', index=True)

results_trunk = pd.concat(results_trunk)
results_trunk.to_parquet('processed_trunk.parquet', index=True)

# 3. Analysis

In [None]:
import sys

sys.path.append('..')

from functions import get_confusion_matrix, get_validity_metrics
import pandas as pd

sensor = 'trunk'

df = pd.read_parquet(f'processed_{sensor}.parquet')

annotations = df['ground_truth'].unique().tolist()

# df = df[~df['ground_truth'].isin(["shuffling", "cycling (stand, inactive)", "cycling (stand)"])]

rename = {
    'standing': 'stand',
    'shuffling': 'stand',
    'walking': 'walk',
    'stairs (descending)': 'stairs',
    'stairs (ascending)': 'stairs',
    'sitting': 'sit',
    'cycling (sit)': 'bicycle',
    'lying': 'lie',
    'cycling (sit, inactive)': 'bicycle',
    'cycling (stand)': 'bicycle',
    'running': 'run',
    'cycling (stand, inactive)': 'bicycle',
}
df['ground_truth'] = df['ground_truth'].map(rename)

df.loc[df['activity'] == 'move', 'activity'] = 'stand'
# df.loc[df['activity'] == 'stairs', 'activity'] = 'walk'

plot = get_confusion_matrix(
    df['ground_truth'], df['activity'], labels=['lie', 'sit', 'stand', 'walk', 'stairs', 'run', 'bicycle']
)  # move, stairs
plot.write_image(f'{sensor}.png', scale=2)

results = get_validity_metrics(df['ground_truth'], df['activity'])
results.to_csv(f'{sensor}.csv')