THIS CODE HAS BEEN COMPILED IN GOOGLE COLAB :D


In [9]:
#IMPORTING LIBRARIES

import numpy as np
import os
import pandas as pad
import matplotlib.pyplot as plt
import seaborn as sb
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
!pip install tsfel
import tsfel
import urllib.request
import zipfile

np.random.seed(42)
tf.random.set_seed(42)



In [10]:
from google.colab import files

In [12]:
import io

UPLOADING THE DATASET FROM MY DEVICE

In [13]:
uploaded = files.upload()


Saving UCI HAR Dataset.zip to UCI HAR Dataset (1).zip


EXTRACTING THE CONTENTS

In [14]:
for filename in uploaded.keys():
    if filename.endswith('.zip'):
        print(f"Extracting {filename}...")
        with zipfile.ZipFile(io.BytesIO(uploaded[filename]), 'r') as zip_ref:
            zip_ref.extractall()
        print(f"Extraction of {filename} completed")


Extracting UCI HAR Dataset (1).zip...
Extraction of UCI HAR Dataset (1).zip completed


In [15]:
def load_signals(folder):
  #READS THE NINE TYPE OF INERTIAL SIGNALS AND ADDS THEM INTO THE ARRAY
    signal_types = [
        "body_acc_x", "body_acc_y", "body_acc_z",
        "body_gyro_x", "body_gyro_y", "body_gyro_z",
        "total_acc_x", "total_acc_y", "total_acc_z"
    ]
    signals = []
    for signal in signal_types:
        file_path = os.path.join(folder, "Inertial Signals", f"{signal}_{os.path.basename(folder)}.txt")
        data = np.loadtxt(file_path)
        signals.append(data)
    return np.transpose(np.array(signals), (1, 2, 0))

def load_labels(folder):
    label_file = os.path.join(folder, f"y_{os.path.basename(folder)}.txt")
    labels = np.loadtxt(label_file).astype(int)
    return labels

train_folder = os.path.join("UCI HAR Dataset", "train")
test_folder  = os.path.join("UCI HAR Dataset", "test")

print("Loading raw sensor data...")
X_train_raw = load_signals(train_folder)
X_test_raw  = load_signals(test_folder)
y_train = load_labels(train_folder)
y_test  = load_labels(test_folder)
print("Train data shape:", X_train_raw.shape)
print("Test data shape :", X_test_raw.shape)

n_train, time_steps, n_channels = X_train_raw.shape
X_train_flat = X_train_raw.reshape(-1, n_channels)
X_test_flat  = X_test_raw.reshape(-1, n_channels)

scaler = StandardScaler() #STANDARDIZATION OF THE DATA
scaler.fit(X_train_flat)
X_train_scaled_flat = scaler.transform(X_train_flat)
X_test_scaled_flat  = scaler.transform(X_test_flat)

X_train_scaled = X_train_scaled_flat.reshape(n_train, time_steps, n_channels)
X_test_scaled  = X_test_scaled_flat.reshape(X_test_raw.shape[0], time_steps, n_channels)

n_classes = len(np.unique(y_train))
#ONE HOT ENCODING OF THE DATA
y_train_onehot = tf.keras.utils.to_categorical(y_train - 1, num_classes=n_classes)
y_test_onehot  = tf.keras.utils.to_categorical(y_test - 1, num_classes=n_classes)

#MODEL DEFINING
model = Sequential([
    LSTM(64, input_shape=(time_steps, n_channels), return_sequences=True),
    Dropout(0.5),
    LSTM(32),
    Dense(n_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
print("\nModel Summary:")
model.summary()

print("\nTraining the LSTM model...")
history = model.fit(X_train_scaled, y_train_onehot, epochs=50, batch_size=64, validation_split=0.2, verbose=3)

loss, accuracy = model.evaluate(X_test_scaled, y_test_onehot, verbose=0)
print("\nTest Accuracy on raw sensor data (LSTM): {:.4f}".format(accuracy))

#WE CAN ALSO TUNE THE HYPERPARAMETERS IF WE USE K FOLD CROSS VALIDATION TO MAKE THE MODEL MORE ROBUST AND MAKE IT GENERALISE WELL

Loading raw sensor data...
Train data shape: (7352, 128, 9)
Test data shape : (2947, 128, 9)

Model Summary:


  super().__init__(**kwargs)



Training the LSTM model...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50

Test Accuracy on raw sensor data (LSTM): 0.9019


In [17]:
cfg = tsfel.get_features_by_domain()

def extract_tsfel_features(sample, cfg):
    features_list = []
    for ch in range(sample.shape[1]):
        channel_data = sample[:, ch]
        df_channel = pad.DataFrame({f'channel_{ch}': channel_data})
        feat = tsfel.time_series_features_extractor(cfg, df_channel, fs=50, verbose=0)
        features_list.append(feat)
    features_concat = pad.concat(features_list, axis=1)
    return features_concat

print("Extracting TSFEL features from training data")
features_train_list = []
for idx in range(X_train_raw.shape[0]):
    feats = extract_tsfel_features(X_train_raw[idx], cfg)
    features_train_list.append(feats)
X_train_tsfel = pad.concat(features_train_list, ignore_index=True)
print("Training TSFEL features shape:", X_train_tsfel.shape)

print("Extracting TSFEL features from test data")
features_test_list = []
for idx in range(X_test_raw.shape[0]):
    feats = extract_tsfel_features(X_test_raw[idx], cfg)
    features_test_list.append(feats)
X_test_tsfel = pad.concat(features_test_list, ignore_index=True)
print("Test TSFEL features shape:", X_test_tsfel.shape)

X_train_tsfel.fillna(0, inplace=True)
X_test_tsfel.fillna(0, inplace=True)

scaler_tsfel = StandardScaler()
X_train_tsfel_scaled = scaler_tsfel.fit_transform(X_train_tsfel)
X_test_tsfel_scaled = scaler_tsfel.transform(X_test_tsfel)

y_train_ml = y_train - 1
y_test_ml = y_test - 1

#NOW WE WILL EVALUATE THE MODELS ON THE FEATURES GENERATED BY USING TSFEL
#RANDOM FOREST
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_tsfel_scaled, y_train_ml)
rf_preds = rf.predict(X_test_tsfel_scaled)
print("Random Forest TSFEL Accuracy:", accuracy_score(y_test_ml, rf_preds))
print(classification_report(y_test_ml, rf_preds))

#SUPPORT VECTOR MACHINE
svm = SVC(kernel='rbf', gamma='scale', random_state=42)
svm.fit(X_train_tsfel_scaled, y_train_ml)
svm_preds = svm.predict(X_test_tsfel_scaled)
print("SVM TSFEL Accuracy:", accuracy_score(y_test_ml, svm_preds))
print(classification_report(y_test_ml, svm_preds))

#LOGISTIC REGRESSION
lr = LogisticRegression(max_iter=200, random_state=42)
lr.fit(X_train_tsfel_scaled, y_train_ml)
lr_preds = lr.predict(X_test_tsfel_scaled)
print("Logistic Regression TSFEL Accuracy:", accuracy_score(y_test_ml, lr_preds))
print(classification_report(y_test_ml, lr_preds))


Extracting TSFEL features from training data
Training TSFEL features shape: (7352, 1404)
Extracting TSFEL features from test data
Test TSFEL features shape: (2947, 1404)
Random Forest TSFEL Accuracy: 0.9290804207668816
              precision    recall  f1-score   support

           0       0.93      0.98      0.95       496
           1       0.91      0.96      0.93       471
           2       0.95      0.83      0.88       420
           3       0.89      0.89      0.89       491
           4       0.90      0.90      0.90       532
           5       1.00      1.00      1.00       537

    accuracy                           0.93      2947
   macro avg       0.93      0.93      0.93      2947
weighted avg       0.93      0.93      0.93      2947

SVM TSFEL Accuracy: 0.9548693586698337
              precision    recall  f1-score   support

           0       0.95      0.98      0.96       496
           1       0.98      0.96      0.97       471
           2       0.96      0.96   

THE PREDICTION SCORES OF THE FEATURES PROVIDED BY THE AUTHORS

In [18]:
train_features = np.loadtxt(os.path.join(train_folder, "X_train.txt"))
test_features = np.loadtxt(os.path.join(test_folder, "X_test.txt"))

scaler_prov = StandardScaler()
X_train_prov_scaled = scaler_prov.fit_transform(train_features)
X_test_prov_scaled = scaler_prov.transform(test_features)

y_train_ml = y_train - 1
y_test_ml = y_test - 1
#NOW WE WILL EVALUATE THE MODELS ON THE FEATURES PROVIDED BY THE AUTHORS
#RANDOM FOREST
rf_prov = RandomForestClassifier(n_estimators=100, random_state=42)
rf_prov.fit(X_train_prov_scaled, y_train_ml)
rf_prov_preds = rf_prov.predict(X_test_prov_scaled)
print("Random Forest (Provided Features) Accuracy:", accuracy_score(y_test_ml, rf_prov_preds))
print(classification_report(y_test_ml, rf_prov_preds))

#SUPPORT VECTOR MACHINE EVALUATION
svm_prov = SVC(kernel='rbf', gamma='scale', random_state=42)
svm_prov.fit(X_train_prov_scaled, y_train_ml)
svm_prov_preds = svm_prov.predict(X_test_prov_scaled)
print("SVM (Provided Features) Accuracy:", accuracy_score(y_test_ml, svm_prov_preds))
print(classification_report(y_test_ml, svm_prov_preds))

#LOGISTIC REGRESSION EVALAUATION
lr_prov = LogisticRegression(max_iter=200, random_state=42)
lr_prov.fit(X_train_prov_scaled, y_train_ml)
lr_prov_preds = lr_prov.predict(X_test_prov_scaled)
print("Logistic Regression (Provided Features) Accuracy:", accuracy_score(y_test_ml, lr_prov_preds))
print(classification_report(y_test_ml, lr_prov_preds))


Random Forest (Provided Features) Accuracy: 0.9260264675941635
              precision    recall  f1-score   support

           0       0.89      0.96      0.92       496
           1       0.89      0.90      0.90       471
           2       0.97      0.86      0.91       420
           3       0.91      0.89      0.90       491
           4       0.90      0.92      0.91       532
           5       1.00      1.00      1.00       537

    accuracy                           0.93      2947
   macro avg       0.93      0.92      0.92      2947
weighted avg       0.93      0.93      0.93      2947

SVM (Provided Features) Accuracy: 0.9521547336274178
              precision    recall  f1-score   support

           0       0.96      0.97      0.97       496
           1       0.93      0.97      0.95       471
           2       0.98      0.92      0.95       420
           3       0.94      0.90      0.92       491
           4       0.92      0.95      0.93       532
           5    