# Fall Detection using IMU Sensor Data
This notebook builds a classifier to distinguish falls from non-falls using data from triaxial accelerometer, gyroscope, and magnetometer sensors. It also compares feature importance across models using SHAP and Plotly.

In [6]:
import os
import numpy as np
import pandas as pd
import scipy.io
import shap
import plotly.express as px
import xgboost as xgb

import os
print("Current working directory:", os.getcwd())

for label_folder in ['falls', 'nonfalls']:
    folder_path = os.path.join("../data", label_folder)
    if os.path.exists(folder_path):
        print(f"Files in {folder_path}:")
        print(os.listdir(folder_path))
        
# DATA_FOLDER = "../data"  # Change to your folder name or absolute path
# # DATA_TRAIN_FOLDER = "../data/train"  # Change to your folder name or absolute path
# # DATA_TEST_FOLDER = "../data/test"  # Change to your folder name or absolute path
# SUBMISSION_FOLDER = "../submission"  # Change to your desired folder

# # Make sure the folder exists
# os.makedirs(DATA_FOLDER, exist_ok=True)
# os.makedirs(SUBMISSION_FOLDER, exist_ok=True)

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report

print("Files in ../data/falls:", os.listdir(os.path.join("../data", "falls")))

falls_folder = os.path.abspath(os.path.join("../data", "falls"))
print("Absolute falls folder path:", falls_folder)
print("Files in falls folder:", os.listdir(falls_folder))

print("All files in falls folder:", os.listdir(falls_folder))

# ls -l "../data/falls"

falls_folder = os.path.abspath(os.path.join("../data", "falls"))
print("Absolute falls folder path:", falls_folder)
print("Files in falls folder:", os.listdir(falls_folder))

Current working directory: /Users/marcoreis/Insync/marco.a.reis@gmail.com/Google Drive/dutfpr/recognition/assignments/recog-assig02/assignment03
Files in ../data/falls:
[]
Files in ../data/falls: []
Absolute falls folder path: /Users/marcoreis/Insync/marco.a.reis@gmail.com/Google Drive/dutfpr/recognition/assignments/recog-assig02/data/falls
Files in falls folder: []
All files in falls folder: []
Absolute falls folder path: /Users/marcoreis/Insync/marco.a.reis@gmail.com/Google Drive/dutfpr/recognition/assignments/recog-assig02/data/falls
Files in falls folder: []


In [2]:
falls_folder = os.path.join("../data", "falls")
print("All files in falls folder:", os.listdir(falls_folder))

All files in falls folder: []


In [30]:
def extract_features_from_file(filepath):
    mat = scipy.io.loadmat(filepath)
    data = mat['newData']
    imu_data = data[:, 1:10]
    means = np.mean(imu_data, axis=0)
    max_vals = np.max(imu_data, axis=0)
    std_devs = np.std(imu_data, axis=0)
    return np.concatenate([means, max_vals, std_devs])

def split_falls_by_suffix(folder, train_range, test_range):
    train_files, test_files = [], []
    for file in os.listdir(folder):
        if file.endswith('.mat'):
            if any(file.endswith(f"{i:02d}.mat") for i in train_range):
                train_files.append(os.path.join(folder, file))
            if any(file.endswith(f"{i:02d}.mat") for i in test_range):
                test_files.append(os.path.join(folder, file))
    return train_files, test_files

falls_folder = os.path.join("../data", "falls")
train_range = range(1, 19)   # 01 to 18
test_range = range(19, 23)   # 19 to 22

train_files, test_files = split_falls_by_suffix(falls_folder, train_range, test_range)

# Now extract features for train and test
X_train_falls = np.array([extract_features_from_file(f) for f in train_files])
y_train_falls = np.full(len(train_files), 2)  # Label 2 for falls

X_test_falls = np.array([extract_features_from_file(f) for f in test_files])
y_test_falls = np.full(len(test_files), 2)    # Label 2 for falls

print("Falls train shape:", X_train_falls.shape, y_train_falls.shape)
print("Falls test shape:", X_test_falls.shape, y_test_falls.shape)

Falls train shape: (0,) (0,)
Falls test shape: (0,) (0,)


In [31]:
falls_folder = os.path.join("../data", "falls")
print("All files in falls folder:", os.listdir(falls_folder))

All files in falls folder: []


In [29]:
def split_falls_by_suffix(folder, train_range, test_range):
    train_files, test_files = [], []
    for file in os.listdir(folder):
        if file.endswith('.mat'):
            # Train: suffix 01 to 18
            if any(file.endswith(f"{i:02d}.mat") for i in train_range):
                train_files.append(os.path.join(folder, file))
            # Test: suffix 19 to 22
            if any(file.endswith(f"{i:02d}.mat") for i in test_range):
                test_files.append(os.path.join(folder, file))
    return train_files, test_files

falls_folder = os.path.join("../data", "falls")
os.makedirs(falls_folder, exist_ok=True)  # Ensure the folder exists

train_range = range(1, 19)   # 01 to 18
test_range = range(19, 23)   # 19 to 22

train_files, test_files = split_falls_by_suffix(falls_folder, train_range, test_range)

print("Train files:", train_files)
print("Test files:", test_files)

Train files: []
Test files: []


In [23]:
# Suppose you already have test files loaded from your folder
# data_suffixes_group1 = [f"{i:02d}" for i in range(1, 19)]  # e.g., 19, 20
# data_suffixes_group2 = [f"{i:02d}" for i in range(19, 28)]  # e.g., 21, 22
test_suffixes_group1 = [f"{i:02d}" for i in range(19, 21)]  # 19, 20
test_suffixes_group2 = [f"{i:02d}" for i in range(21, 23)]  # 21, 22

def process_dataset_by_suffix(data_folder, suffixes):
    features, labels = [], []
    for label_folder, label in [('falls', 2), ('nonfalls', 1)]:
        folder_path = os.path.join(data_folder, label_folder)
        if not os.path.exists(folder_path):
            continue
        for file in os.listdir(folder_path):
            if file.endswith('.mat') and any(file.endswith(f"{s}.mat") for s in suffixes):
                path = os.path.join(folder_path, file)
                features.append(extract_features_from_file(path))
                labels.append(label)
    return np.array(features), np.array(labels)

DATA_FOLDER = "../data"

X_test_group1, y_test_group1 = process_dataset_by_suffix(DATA_FOLDER, test_suffixes_group1)
X_test_group2, y_test_group2 = process_dataset_by_suffix(DATA_FOLDER, test_suffixes_group2)

print(X_test_group1.shape, y_train.shape)
print(X_test_group2.shape, y_test.shape)

(0,) (0,)
(0,) (0,)


In [13]:
def process_dataset_by_suffix(data_folder, suffixes):
    features, labels = [], []
    for label_folder, label in [('falls', 2), ('nonfalls', 1)]:
        folder_path = os.path.join(data_folder, label_folder)
        if not os.path.exists(folder_path):
            continue
        for file in os.listdir(folder_path):
            if file.endswith('.mat') and any(file.endswith(f"{s}.mat") for s in suffixes):
                path = os.path.join(folder_path, file)
                features.append(extract_features_from_file(path))
                labels.append(label)
    return np.array(features), np.array(labels)

# Use suffixes for train and test
train_suffixes = [f"{i:02d}" for i in range(1, 19)]
test_suffixes = [f"{i:02d}" for i in range(19, 23)]

DATA_FOLDER = "../data"  # Make sure this matches your folder structure

X_train, y_train = process_dataset_by_suffix(DATA_FOLDER, train_suffixes)
X_test, y_test = process_dataset_by_suffix(DATA_FOLDER, test_suffixes)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(0,) (0,)
(0,) (0,)


In [None]:
def process_dataset(folders):
    features, labels = [], []
    for folder in folders:
        for label_folder, label in [('falls', 2), ('nonfalls', 1)]:
            folder_path = os.path.join(folder, label_folder)
            if not os.path.exists(folder_path):
                continue
            for file in os.listdir(folder_path):
                if file.endswith('.mat'):
                    path = os.path.join(folder_path, file)
                    features.append(extract_features_from_file(path))
                    labels.append(label)
    return np.array(features), np.array(labels)

# Define train and test folders
train_folders = [os.path.join(DATA_FOLDER, f"{i:02d}") for i in range(1, 19)]
test_folders = [os.path.join(DATA_FOLDER, f"{i:02d}") for i in range(19, 23)]

X_train, y_train = process_dataset(train_folders)
X_test, y_test = process_dataset(test_folders)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(0,) (0,)
(0,) (0,)


In [14]:
def extract_features_from_file(filepath):
    mat = scipy.io.loadmat(filepath)
    data = mat['newData']
    imu_data = data[:, 1:10]
    means = np.mean(imu_data, axis=0)
    max_vals = np.max(imu_data, axis=0)
    std_devs = np.std(imu_data, axis=0)
    return np.concatenate([means, max_vals, std_devs])


In [15]:
def process_dataset(root_folder):
    features, labels = [], []
    for label_folder, label in [('falls', 2), ('nonfalls', 1)]:
        folder_path = os.path.join(root_folder, label_folder)
        for file in os.listdir(folder_path):
            if file.endswith('.mat'):
                path = os.path.join(folder_path, file)
                features.append(extract_features_from_file(path))
                labels.append(label)
    return np.array(features), np.array(labels)


In [None]:
# X_train, y_train = process_dataset("path_to_training_folder")
# X_test, y_test = process_dataset("path_to_testing_folder")

# Use the correct folder split for train and test
# train_folders = [os.path.join(DATA_FOLDER, f"{i:02d}") for i in range(1, 19)]
# test_folders = [os.path.join(DATA_FOLDER, f"{i:02d}") for i in range(19, 23)]

# # Use the process_dataset function that accepts a list of folders (from cell 2)
# X_train, y_train = process_dataset(train_folders)
# X_test, y_test = process_dataset(test_folders)


TypeError: expected str, bytes or os.PathLike object, not list

In [9]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(0,) (0,)
(0,) (0,)


In [16]:
rf = RandomForestClassifier().fit(X_train, y_train)
gb = GradientBoostingClassifier().fit(X_train, y_train)
svm = SVC(probability=True).fit(X_train, y_train)
xgb_model = xgb.XGBClassifier().fit(X_train, y_train)


ValueError: Expected 2D array, got 1D array instead:
array=[].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [None]:
explainer_rf = shap.Explainer(rf, X_train)
explainer_gb = shap.Explainer(gb, X_train)
explainer_xgb = shap.Explainer(xgb_model, X_train)
explainer_svm = shap.KernelExplainer(svm.predict_proba, X_train)

shap_rf = explainer_rf(X_test)
shap_gb = explainer_gb(X_test)
shap_xgb = explainer_xgb(X_test)
shap_svm = explainer_svm.shap_values(X_test)[1]


In [None]:
feature_names = [f"{stat}_{sensor}_{axis}" for stat in ['mean','max','std']
                 for sensor in ['acc','gyro','mag'] for axis in ['x','y','z']]

def top_shap(shap_vals, model_name, feature_names):
    mean_abs = np.abs(shap_vals).mean(axis=0)
    return pd.DataFrame({'Feature': feature_names, model_name: mean_abs})

df_rf = top_shap(shap_rf.values, 'Random Forest', feature_names)
df_gb = top_shap(shap_gb.values, 'Gradient Boosting', feature_names)
df_xgb = top_shap(shap_xgb.values, 'XGBoost', feature_names)
df_svm = top_shap(shap_svm, 'SVM', feature_names)

df_merge = df_rf.merge(df_gb, on='Feature', how='outer') \
                .merge(df_xgb, on='Feature', how='outer') \
                .merge(df_svm, on='Feature', how='outer')


In [None]:
df_melt = df_merge.melt(id_vars='Feature', var_name='Model', value_name='SHAP Value')

fig = px.bar(
    df_melt, x='SHAP Value', y='Feature', color='Model',
    orientation='h', barmode='group',
    title='🎯 SHAP Feature Importance Battle Royale',
    labels={'SHAP Value': 'Mean |SHAP| Value'}
)
fig.update_layout(template='plotly_dark')
fig.show()


In [None]:
y_pred = rf.predict(X_test)
print("🔍 Random Forest Classification Report:\n")
print(classification_report(y_test, y_pred))
