In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import os
from tsfel import time_series_features_extractor
import warnings
warnings.filterwarnings('ignore')

In [3]:
def load_data(base_path):
    def load_signals(signal_type, data_type):
        filepaths = [os.path.join(base_path, data_type, 'Inertial Signals', f'{signal_type}_{data_type}.txt')]
        return pd.concat([pd.read_csv(fp, delim_whitespace=True, header=None) for fp in filepaths], axis=1)

    X_train = np.concatenate([
        load_signals('body_acc_x', 'train').values[:, :, None],
        load_signals('body_acc_y', 'train').values[:, :, None],
        load_signals('body_acc_z', 'train').values[:, :, None],
        load_signals('body_gyro_x', 'train').values[:, :, None],
        load_signals('body_gyro_y', 'train').values[:, :, None],
        load_signals('body_gyro_z', 'train').values[:, :, None],
        load_signals('total_acc_x', 'train').values[:, :, None],
        load_signals('total_acc_y', 'train').values[:, :, None],
        load_signals('total_acc_z', 'train').values[:, :, None]
    ], axis=2)

    X_test = np.concatenate([
        load_signals('body_acc_x', 'test').values[:, :, None],
        load_signals('body_acc_y', 'test').values[:, :, None],
        load_signals('body_acc_z', 'test').values[:, :, None],
        load_signals('body_gyro_x', 'test').values[:, :, None],
        load_signals('body_gyro_y', 'test').values[:, :, None],
        load_signals('body_gyro_z', 'test').values[:, :, None],
        load_signals('total_acc_x', 'test').values[:, :, None],
        load_signals('total_acc_y', 'test').values[:, :, None],
        load_signals('total_acc_z', 'test').values[:, :, None]
    ], axis=2)
    y_train = pd.read_csv(os.path.join(base_path, 'train', 'y_train.txt'), header=None).values.ravel() - 1
    y_test = pd.read_csv(os.path.join(base_path, 'test', 'y_test.txt'), header=None).values.ravel() - 1

    return X_train, X_test, y_train, y_test


base_path = 'UCI-HAR Dataset'
X_train, X_test, y_train, y_test = load_data(base_path)
print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: (7352, 128, 9)
Shape of X_test: (2947, 128, 9)
Shape of y_train: (7352,)
Shape of y_test: (2947,)


In [7]:
import tsfel
from pandas import DataFrame

def extract_features_tsfel(X, y, window_size=128):
    cfg = tsfel.get_features_by_domain()

    signals = {}
    for j in range(X.shape[2]):
        signals[f'channel_{j+1}'] = X[:, :, j].flatten()

    n_samples = X.shape[0]
    signal_df = pd.DataFrame(signals)

    #  window_size=window_size
    features = tsfel.time_series_features_extractor(cfg, signal_df, window_size=window_size)

    return features


extracted_train_features_tsfel = extract_features_tsfel(X_train, y_train)
extracted_test_features_tsfel = extract_features_tsfel(X_test, y_test)

extracted_train_features_tsfel = extracted_train_features_tsfel.fillna(extracted_train_features_tsfel.mean())
extracted_test_features_tsfel = extracted_test_features_tsfel.fillna(extracted_test_features_tsfel.mean())

print("Shape of extracted_train_features_tsfel:", extracted_train_features_tsfel.shape)
print("Shape of extracted_test_features_tsfel:", extracted_test_features_tsfel.shape)


Shape of extracted_train_features_tsfel: (7352, 1404)
Shape of extracted_test_features_tsfel: (2947, 1404)


In [10]:
print(extracted_train_features_tsfel.columns)
print(extracted_test_features_tsfel.columns)

Index(['channel_1_Absolute energy', 'channel_1_Area under the curve',
       'channel_1_Autocorrelation', 'channel_1_Average power',
       'channel_1_Centroid', 'channel_1_ECDF Percentile Count_0',
       'channel_1_ECDF Percentile Count_1', 'channel_1_ECDF Percentile_0',
       'channel_1_ECDF Percentile_1', 'channel_1_ECDF_0',
       ...
       'channel_9_Wavelet variance_12.5Hz',
       'channel_9_Wavelet variance_2.78Hz',
       'channel_9_Wavelet variance_25.0Hz',
       'channel_9_Wavelet variance_3.12Hz',
       'channel_9_Wavelet variance_3.57Hz',
       'channel_9_Wavelet variance_4.17Hz', 'channel_9_Wavelet variance_5.0Hz',
       'channel_9_Wavelet variance_6.25Hz',
       'channel_9_Wavelet variance_8.33Hz', 'channel_9_Zero crossing rate'],
      dtype='object', length=1404)
Index(['channel_1_Absolute energy', 'channel_1_Area under the curve',
       'channel_1_Autocorrelation', 'channel_1_Average power',
       'channel_1_Centroid', 'channel_1_ECDF Percentile Count_0',
  

In [11]:
extracted_train_features_tsfel['label'] = y_train
extracted_test_features_tsfel['label'] = y_test

print("Shape of extracted_train_features_tsfel:", extracted_train_features_tsfel.shape)
print("Shape of extracted_test_features_tsfel:", extracted_test_features_tsfel.shape)

Shape of extracted_train_features_tsfel: (7352, 1405)
Shape of extracted_test_features_tsfel: (2947, 1405)


In [12]:
def scale_data(df_train, df_test):
    y_train = df_train['label'].values
    y_test = df_test['label'].values

    X_train = df_train.drop('label', axis=1)
    X_test = df_test.drop('label', axis=1)

    scaler = StandardScaler()

    X_train_scaled = scaler.fit_transform(X_train)

    X_test_scaled = scaler.transform(X_test)

    return X_train_scaled, X_test_scaled, y_train, y_test, scaler

X_train_scaled_tsfel, X_test_scaled_tsfel, y_train_tsfel, y_test_tsfel, scaler_tsfel = scale_data(extracted_train_features_tsfel, extracted_test_features_tsfel)

print("Shape of X_train_scaled_tsfel:", X_train_scaled_tsfel.shape)
print("Shape of X_test_scaled_tsfel:", X_test_scaled_tsfel.shape)
print("Shape of y_train_tsfel:", y_train_tsfel.shape)
print("Shape of y_test_tsfel:", y_test_tsfel.shape)

Shape of X_train_scaled_tsfel: (7352, 1404)
Shape of X_test_scaled_tsfel: (2947, 1404)
Shape of y_train_tsfel: (7352,)
Shape of y_test_tsfel: (2947,)


In [16]:
import pandas as pd
import numpy as np
import tsfel
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import os
import warnings
warnings.filterwarnings('ignore')

def train_evaluate_model(model, X_train, X_test, y_train, y_test, model_name="Model"):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print(f"--- Classification Report for {model_name} ---")
    print(classification_report(y_test, y_pred))

    print(f"--- Confusion Matrix for {model_name} ---")
    print(confusion_matrix(y_test, y_pred))
    print("\n")

rf_tsfel = RandomForestClassifier(random_state=42)
svm_tsfel = SVC(random_state=42)
lr_tsfel = LogisticRegression(random_state=42, solver='liblinear', multi_class='ovr')

train_evaluate_model(rf_tsfel, X_train_scaled_tsfel, X_test_scaled_tsfel, y_train_tsfel, y_test_tsfel, model_name="Random Forest (TSFEL)")
train_evaluate_model(svm_tsfel, X_train_scaled_tsfel, X_test_scaled_tsfel, y_train_tsfel, y_test_tsfel, model_name="SVM (TSFEL)")
train_evaluate_model(lr_tsfel, X_train_scaled_tsfel, X_test_scaled_tsfel, y_train_tsfel, y_test_tsfel, model_name="Logistic Regression (TSFEL)")


--- Classification Report for Random Forest (TSFEL) ---
              precision    recall  f1-score   support

           0       0.94      0.99      0.97       496
           1       0.90      0.96      0.93       471
           2       0.96      0.83      0.89       420
           3       0.89      0.89      0.89       491
           4       0.90      0.90      0.90       532
           5       1.00      1.00      1.00       537

    accuracy                           0.93      2947
   macro avg       0.93      0.93      0.93      2947
weighted avg       0.93      0.93      0.93      2947

--- Confusion Matrix for Random Forest (TSFEL) ---
[[490   0   6   0   0   0]
 [ 10 454   7   0   0   0]
 [ 19  51 350   0   0   0]
 [  0   0   0 438  53   0]
 [  0   0   0  53 479   0]
 [  0   0   0   0   0 537]]


--- Classification Report for SVM (TSFEL) ---
              precision    recall  f1-score   support

           0       0.95      0.97      0.96       496
           1       0.98      0

# author provided features comparison

In [18]:
def load_raw_data(base_path):
    def load_signals(signal_type, data_type):
        filepaths = [os.path.join(base_path, data_type, 'Inertial Signals', f'{signal_type}_{data_type}.txt')]
        return pd.concat([pd.read_csv(fp, delim_whitespace=True, header=None) for fp in filepaths], axis=1)

    X_train = np.concatenate([
        load_signals('body_acc_x', 'train').values,
        load_signals('body_acc_y', 'train').values,
        load_signals('body_acc_z', 'train').values,
        load_signals('body_gyro_x', 'train').values,
        load_signals('body_gyro_y', 'train').values,
        load_signals('body_gyro_z', 'train').values,
        load_signals('total_acc_x', 'train').values,
        load_signals('total_acc_y', 'train').values,
        load_signals('total_acc_z', 'train').values
    ], axis=1)

    X_test = np.concatenate([
        load_signals('body_acc_x', 'test').values,
        load_signals('body_acc_y', 'test').values,
        load_signals('body_acc_z', 'test').values,
        load_signals('body_gyro_x', 'test').values,
        load_signals('body_gyro_y', 'test').values,
        load_signals('body_gyro_z', 'test').values,
        load_signals('total_acc_x', 'test').values,
        load_signals('total_acc_y', 'test').values,
        load_signals('total_acc_z', 'test').values
    ], axis=1)
    # Load labels
    y_train = pd.read_csv(os.path.join(base_path, 'train', 'y_train.txt'), header=None).values.ravel() - 1
    y_test = pd.read_csv(os.path.join(base_path, 'test', 'y_test.txt'), header=None).values.ravel() - 1

    return X_train, X_test, y_train, y_test

X_train_author, X_test_author, y_train_author, y_test_author = load_raw_data(base_path)

scaler_author = StandardScaler()
X_train_scaled_author = scaler_author.fit_transform(X_train_author)
X_test_scaled_author = scaler_author.transform(X_test_author)

rf_author = RandomForestClassifier(random_state=42)
svm_author = SVC(random_state=42)
lr_author = LogisticRegression(random_state=42, solver='liblinear', multi_class='ovr')

train_evaluate_model(rf_author, X_train_scaled_author, X_test_scaled_author, y_train_author, y_test_author, model_name="Random Forest (Author)")
train_evaluate_model(svm_author, X_train_scaled_author, X_test_scaled_author, y_train_author, y_test_author, model_name="SVM (Author)")
train_evaluate_model(lr_author, X_train_scaled_author, X_test_scaled_author, y_train_author, y_test_author, model_name="Logistic Regression (Author)")

--- Classification Report for Random Forest (Author) ---
              precision    recall  f1-score   support

           0       0.79      0.88      0.83       496
           1       0.86      0.78      0.82       471
           2       0.85      0.87      0.86       420
           3       0.76      0.79      0.77       491
           4       0.81      0.77      0.79       532
           5       1.00      1.00      1.00       537

    accuracy                           0.85      2947
   macro avg       0.85      0.85      0.85      2947
weighted avg       0.85      0.85      0.85      2947

--- Confusion Matrix for Random Forest (Author) ---
[[434  25  37   0   0   0]
 [ 77 368  25   1   0   0]
 [ 33  22 365   0   0   0]
 [  2   8   0 386  95   0]
 [  1   3   0 121 407   0]
 [  0   0   0   0   0 537]]


--- Classification Report for SVM (Author) ---
              precision    recall  f1-score   support

           0       0.95      0.91      0.93       496
           1       0.97    