# 1. Prepare dataset

Origin data can be downloaded here: https://zenodo.org/records/12704412

## 1.1 Prepare RAW files

In [None]:
from pathlib import Path
import pandas as pd


def prepare_raw(path: Path) -> pd.DataFrame:
    raw = pd.read_csv(path, engine='pyarrow')
    raw['datetime'] = raw['utc'].dt.tz_convert('Europe/Berlin')
    raw.columns = [x.strip() for x in raw.columns]
    raw = raw[['datetime', 'x', 'y', 'z']]
    raw.set_index('datetime', inplace=True)
    raw.rename(columns={'x': 'acc_x', 'y': 'acc_y', 'z': 'acc_z'}, inplace=True)

    return -raw


folder = Path('origin')

input = (folder / 'data').rglob('raw*.csv')
output = Path('data/thigh')
output.mkdir(exist_ok=True, parents=True)

for file in input:
    id = file.parent.stem
    df = prepare_raw(file)

    df.to_parquet(
        output / f'{id}.parquet',
    )

## 1.2 Prepare ground-truth

In [None]:
from pathlib import Path

import pandas as pd

folder = Path('origin')

ground_truth = pd.read_csv(folder / 'data/ground_truth_lab.csv', engine='pyarrow')
ground_truth.rename(columns={'time': 'datetime', 'activity': 'ground_truth'}, inplace=True)
ground_truth.set_index('datetime', inplace=True)
ground_truth['condition'] = 'laboratory'

input = Path(folder / 'data').rglob('annotations*seconds.csv')

output = Path('data/ground_truth')
output.mkdir(exist_ok=True, parents=True)


def prepare_free_living_ground_truth(path: Path) -> pd.DataFrame:
    df = pd.read_csv(path, engine='pyarrow')
    df['time'] = df['time'].dt.tz_convert(None).dt.tz_localize('Europe/Berlin').dt.tz_convert('UTC')
    df.rename(columns={'time': 'datetime', 'activity': 'ground_truth'}, inplace=True)
    df.set_index('datetime', inplace=True)
    df['condition'] = 'free-living'

    return df


for file in input:
    id = file.parent.stem
    df = prepare_free_living_ground_truth(file)
    gt = ground_truth.loc[ground_truth['id'] == id]
    df = pd.concat([df, gt])
    df.sort_index(inplace=True)

    df.to_parquet(
        output / f'{id}.parquet',
    )

# 2. Process data

In [None]:
import pandas as pd
from acti_motus import Features, Activities
from pathlib import Path


def rotate_by_90_degrees_over_x(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    y_new = -df['acc_z']
    z_new = df['acc_y']

    df['acc_y'] = y_new
    df['acc_z'] = z_new

    return df


files = Path('data/thigh').glob('*.parquet')

features = Features(chunks=False)
activities = Activities(vendor='Sens', chunks=False, orientation=False)

results = []

for file in files:
    gt = file.parent.parent / 'ground_truth' / file.name

    if not gt.exists():
        print(f'Ground truth file does not exist for {file.name}, skipping.')
        continue

    ground_truth = pd.read_parquet(file.parent.parent / 'ground_truth' / file.name)
    df = pd.read_parquet(file)
    df = rotate_by_90_degrees_over_x(df)

    extracted_features = features.extract(df)
    activity, references = activities.detect(extracted_features)

    df = ground_truth.join(activity, how='left')
    results.append(df)

results = pd.concat(results)
results = results.loc[~((results['id'] == 'P17') & (results['ground_truth'] == 'running_fast'))]
results.to_parquet('processed_data.parquet', index=True)

# 3. Analysis

## 3.1 Laboratory

In [None]:
import sys

sys.path.append('..')

from functions import get_confusion_matrix, get_validity_metrics
import pandas as pd


df = pd.read_parquet('processed_data.parquet')
df = df.loc[df['condition'] == 'laboratory']

annotations = df['ground_truth'].unique().tolist()

rename = {
    'standing': 'stand',
    'sitting': 'sit',
    'lying_f': 'lie',
    'lying_s': 'lie',
    'lying_b': 'lie',
    'walking_slow': 'walk',
    'walking_mod': 'walk',
    'walking_fast': 'walk',
    'running_slow': 'run',
    'running_mod': 'run',
    'running_fast': 'run',
    'cycling_slow': 'bicycle',
    'cycling_mod': 'bicycle',
    'cycling_fast': 'bicycle',
}
df['ground_truth'] = df['ground_truth'].map(rename)
df.loc[df['activity'] == 'move', 'activity'] = 'stand'
df.loc[df['activity'] == 'stairs', 'activity'] = 'walk'

df.to_parquet('laboratory.parquet')

plot = get_confusion_matrix(
    df['ground_truth'], df['activity'], labels=['lie', 'sit', 'stand', 'walk', 'stairs', 'run', 'bicycle']
)  # move, stairs
plot.write_image('laboratory.png', scale=2)

results = get_validity_metrics(df['ground_truth'], df['activity'])
results.to_csv('laboratory.csv')

## 3.2 Free-living

In [None]:
import sys

sys.path.append('..')

from functions import get_confusion_matrix, get_validity_metrics
import pandas as pd

df = pd.read_parquet('processed_data.parquet')
df = df.loc[df['condition'] == 'free-living']
df = df.loc[~df['ground_truth'].isin(['start', 'end', 'heel_1', 'heel_2', 'heel_3', 'undefined', 'undefined_covered'])]
annotations = df['ground_truth'].unique().tolist()

rename = {
    'standing_static': 'stand',
    'walking': 'walk',
    'standing_dynamic': 'stand',
    'running': 'run',
    'walking_stairs': 'stairs',
    'sitting': 'sit',
    'lying_down': 'lie',
    'cycling_dynamic': 'bicycle',
    'cycling_static': 'bicycle',
    'cycling_standing': 'bicycle',
}
df['ground_truth'] = df['ground_truth'].map(rename)
df.loc[df['activity'] == 'move', 'activity'] = 'stand'
# df.loc[df['activity'] == 'stairs', 'activity'] = 'walk'


plot = get_confusion_matrix(
    df['ground_truth'], df['activity'], labels=['lie', 'sit', 'stand', 'walk', 'stairs', 'run', 'bicycle']
)  # move, stairs
plot.write_image('free-living.png', scale=2)

results = get_validity_metrics(df['ground_truth'], df['activity'])
results.to_csv('free-living.csv')