In [130]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from sktime.classification.kernel_based import RocketClassifier
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sktime.classification.feature_based import Catch22Classifier

import os

In [106]:
WORK_DIR: str = '.'
DATA_DIR: str = f"{WORK_DIR}/dataset/mhealth"

SUBJECT_FILES: list[str] = os.listdir(DATA_DIR)
DATA_COLUMNS: list[str] = [
    'acceleration chest X',
    'acceleration chest Y',
    'acceleration chest Z',
    'electrocardiogram signal 1',
    'electrocardiogram signal 2',
    'acceleration left-ankle X',
    'acceleration left-ankle Y',
    'acceleration left-ankle Z',
    'gyro left-ankle X',
    'gyro left-ankle Y',
    'gyro left-ankle Z',
    'magnetometer left-ankle X',
    'magnetometer left-ankle Y',
    'magnetometer left-ankle Z',
    'acceleration right-lower-arm X',
    'acceleration right-lower-arm Y',
    'acceleration right-lower-arm Z',
    'gyro right-lower-arm X',
    'gyro right-lower-arm Y',
    'gyro right-lower-arm Z',
    'magnetometer right-lower-arm X',
    'magnetometer right-lower-arm Y',
    'magnetometer right-lower-arm Z',
    'label'
]

LABEL_NAMES: list[str] = [
    'NULL',
    'Standing still',
    'Sitting and relaxing',
    'Lying down',
    'Walking',
    'Climbing stairs',
    'Waist bends forward',
    'Frontal elevation of arms',
    'Knees bending (crouching)',
    'Cycling',
    'Jogging',
    'Running',
    'Jump front & back'
]

LABEL_KEYS: dict[str, int] = {label: id for id, label in enumerate(LABEL_NAMES)}

In [113]:
def print_activities_len(path: str) -> None:
    df: pd.DataFrame = pd.read_csv(path, sep='\t', header=None)
    df.columns = DATA_COLUMNS

    df = df.groupby(df['label'].diff().ne(0).cumsum()).aggregate(list).reset_index(drop=True)

    df['label'] = df['label'].map(lambda x: x[0])
    df = df[df['label']!=0]

    df['len'] = df['gyro right-lower-arm Z'].transform(len)
    df = df.sort_values('label')
    df['label'] = df['label'].map(lambda x: LABEL_NAMES[x])
    display(df[['label', 'len']])

print_activities_len(f'{DATA_DIR}/{SUBJECT_FILES[1]}')

Unnamed: 0,label,len
3,Standing still,3072
5,Sitting and relaxing,3072
7,Lying down,3072
9,Walking,3072
1,Climbing stairs,3072
11,Waist bends forward,2458
13,Frontal elevation of arms,2765
15,Knees bending (crouching),2867
17,Cycling,3072
19,Jogging,3072


In [None]:
def read_subject(path: str) -> pd.DataFrame:
    SECTION_LEN: int = 200

    df: pd.DataFrame = pd.read_csv(path, sep='\t', header=None)
    df.columns = DATA_COLUMNS

    # Separate each section from the full series
    # Each activity is divided in sections with 200 length
    df['keys'] = df.groupby([df['label'].diff().ne(0).cumsum(), df['label']])['label'].rank('first')
    df['keys'] = (((df['keys'] // SECTION_LEN) + 1) * df['label'] * len(df)) + df['label']
    df = df.groupby(df['keys'].diff().ne(0).cumsum()).aggregate(list).reset_index(drop=True)
    df = df.drop(columns='keys')

    # Removes the NULL class
    df = df[df['label']!=0]
    # df = df[df['label'].isin([1, 2, 3, 4, 5, 9, 10, 11])]

    # Fixes the labels
    df['label'] = df['label'].transform(lambda x: x[0])
    df = df.sort_values('label').reset_index(drop=True)

    # Removes all sections that don't have 200 length
    df['len'] = df['gyro right-lower-arm Z'].transform(len)
    df = df[df['len'] == SECTION_LEN]
    df = df.drop(columns='len')

    # Replaces the numerical label by its meaning
    df['label'] = df['label'].map(lambda x: LABEL_NAMES[x])
    return df

# Groups sections from all subjects
df: pd.DataFrame = read_subject(f'{DATA_DIR}/{SUBJECT_FILES[0]}').head(0)
for subject in SUBJECT_FILES:
    df = pd.concat([df, read_subject(f'{DATA_DIR}/{subject}')])

display(df['label'].value_counts().sort_index(key= lambda x: x.map(LABEL_KEYS)))

label
Standing still               140
Sitting and relaxing         140
Lying down                   140
Walking                      140
Climbing stairs              130
Waist bends forward          127
Frontal elevation of arms    132
Knees bending (crouching)    131
Cycling                      140
Jogging                      140
Running                      140
Jump front & back             40
Name: count, dtype: int64

In [109]:
# Separates some data for testing
train_df: pd.DataFrame
test_df: pd.DataFrame
train_df, test_df = train_test_split(df, train_size=0.8)

print(f"Train: {train_df.shape}")
display(train_df['label'].value_counts().sort_index(key= lambda x: x.map(LABEL_KEYS)))
print(f"Test: {test_df.shape}")
display(test_df['label'].value_counts().sort_index(key= lambda x: x.map(LABEL_KEYS)))

Train: (1232, 24)


label
Standing still               107
Sitting and relaxing         112
Lying down                   113
Walking                      116
Climbing stairs               92
Waist bends forward           98
Frontal elevation of arms    112
Knees bending (crouching)    111
Cycling                      109
Jogging                      116
Running                      114
Jump front & back             32
Name: count, dtype: int64

Test: (308, 24)


label
Standing still               33
Sitting and relaxing         28
Lying down                   27
Walking                      24
Climbing stairs              38
Waist bends forward          29
Frontal elevation of arms    20
Knees bending (crouching)    20
Cycling                      31
Jogging                      24
Running                      26
Jump front & back             8
Name: count, dtype: int64

In [110]:
# Passes all values to numpy arrays
print("Train:")
train_x: np.ndarray = np.array(train_df.drop(columns='label').to_numpy().tolist())
print(train_x.shape)
train_y: np.ndarray = train_df['label'].to_numpy()
print(train_y.shape)

print("\nTest:")
test_x: np.ndarray = np.array(test_df.drop(columns='label').to_numpy().tolist())
print(test_x.shape)
test_y: np.ndarray = test_df['label'].to_numpy()
print(test_y.shape)

Train:
(1232, 23, 200)
(1232,)

Test:
(308, 23, 200)
(308,)


In [121]:
# Classifications using rocket
rocket_classifier: RocketClassifier = RocketClassifier(num_kernels=10000, n_features_per_kernel=6)
rocket_classifier.fit(train_x, train_y)
test_pred = rocket_classifier.predict(test_x)

In [122]:
# Checking performance
print(classification_report(test_y, test_pred, zero_division=0, labels=LABEL_NAMES[1:]))

                           precision    recall  f1-score   support

           Standing still       0.94      0.97      0.96        33
     Sitting and relaxing       0.89      0.86      0.87        28
               Lying down       0.85      0.85      0.85        27
                  Walking       1.00      1.00      1.00        24
          Climbing stairs       1.00      1.00      1.00        38
      Waist bends forward       1.00      1.00      1.00        29
Frontal elevation of arms       1.00      1.00      1.00        20
Knees bending (crouching)       1.00      1.00      1.00        20
                  Cycling       1.00      1.00      1.00        31
                  Jogging       1.00      1.00      1.00        24
                  Running       1.00      1.00      1.00        26
        Jump front & back       1.00      1.00      1.00         8

                 accuracy                           0.97       308
                macro avg       0.97      0.97      0.97    

In [None]:
# Classifications using knn
knn_classifier: KNeighborsTimeSeriesClassifier = KNeighborsTimeSeriesClassifier()
knn_classifier.fit(train_x, train_y)
test_pred = knn_classifier.predict(test_x)

In [120]:
# Checking performance
print(classification_report(test_y, test_pred, zero_division=0, labels=LABEL_NAMES[1:]))

                           precision    recall  f1-score   support

           Standing still       1.00      1.00      1.00        33
     Sitting and relaxing       1.00      1.00      1.00        28
               Lying down       1.00      1.00      1.00        27
                  Walking       1.00      1.00      1.00        24
          Climbing stairs       0.97      0.97      0.97        38
      Waist bends forward       1.00      1.00      1.00        29
Frontal elevation of arms       1.00      1.00      1.00        20
Knees bending (crouching)       1.00      1.00      1.00        20
                  Cycling       0.97      1.00      0.98        31
                  Jogging       0.92      1.00      0.96        24
                  Running       1.00      0.92      0.96        26
        Jump front & back       1.00      0.88      0.93         8

                 accuracy                           0.99       308
                macro avg       0.99      0.98      0.98    

In [131]:
# Classifications using catch22
catch22_classifier: Catch22Classifier = Catch22Classifier()
catch22_classifier.fit(train_x, train_y)
test_pred = catch22_classifier.predict(test_x)

In [132]:
# Checking performance
print(classification_report(test_y, test_pred, zero_division=0, labels=LABEL_NAMES[1:]))

                           precision    recall  f1-score   support

           Standing still       1.00      1.00      1.00        33
     Sitting and relaxing       1.00      1.00      1.00        28
               Lying down       1.00      1.00      1.00        27
                  Walking       1.00      1.00      1.00        24
          Climbing stairs       1.00      1.00      1.00        38
      Waist bends forward       1.00      1.00      1.00        29
Frontal elevation of arms       1.00      1.00      1.00        20
Knees bending (crouching)       1.00      1.00      1.00        20
                  Cycling       1.00      1.00      1.00        31
                  Jogging       1.00      1.00      1.00        24
                  Running       1.00      1.00      1.00        26
        Jump front & back       1.00      1.00      1.00         8

                 accuracy                           1.00       308
                macro avg       1.00      1.00      1.00    