In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from pathlib import Path
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from scipy.stats import kurtosis, skew
import os

# Klasyfikacja danych przy użyciu algorytmów SVM

1. Pobieranie danych

Funkcja do ładowania danych z folderów

In [2]:
def load_data(folder_path, label):
    data = []
    for file_path in Path(folder_path).iterdir():
        if file_path.suffix == ".csv":
            df = pd.read_csv(file_path)
            features = df.values.flatten()
            data.append(np.append(features, label))
    return np.array(data)

In [3]:
folders = {
    0: "data/idle",
    1: "data/running",
    2: "data/stairs",
    3: "data/walking",
}
if not os.path.exists("data"):
    print("No data folder available. Check your data folders and CSV files.")
else:
    data = []
    for label, path in folders.items():
        folder_data = load_data(path, label)
        if folder_data.size > 0:
            data.append(folder_data)
            print(f"{path} - Data available")
        else:
            print(f"{path} - No data available. Check your data folders and CSV files")

    data = np.vstack(data)

data/idle - Data available
data/running - Data available
data/stairs - Data available
data/walking - Data available


2. Przygotowanie danych i ich podział na treningowy i testowy

In [4]:
X = data[:, :-1]
y = data[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=92)
if data.size > 0:
    print("X_train shape:", X_train.shape)
    print("X_test shape:", X_test.shape)
    print("y_train shape:", y_train.shape)
    print("y_test shape:", y_test.shape)

X_train shape: (5169, 90)
X_test shape: (1293, 90)
y_train shape: (5169,)
y_test shape: (1293,)


3. Modele i ich ewluacja

SVM Model

In [5]:
svm_model = SVC()
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)
print("SVM Classifier Report:")
print(classification_report(y_test, y_pred_svm))
print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))

SVM Classifier Report:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       185
         1.0       1.00      1.00      1.00       678
         2.0       0.86      0.53      0.65        34
         3.0       0.96      0.99      0.98       396

    accuracy                           0.99      1293
   macro avg       0.95      0.88      0.91      1293
weighted avg       0.98      0.99      0.98      1293

SVM Accuracy: 0.9853054911059551


Random Forest Model

In [6]:
rf_model = RandomForestClassifier(n_estimators=100)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
print("Random Forest Classifier Report:")
print(classification_report(y_test, y_pred_rf))
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))

Random Forest Classifier Report:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       185
         1.0       1.00      1.00      1.00       678
         2.0       0.00      0.00      0.00        34
         3.0       0.92      1.00      0.96       396

    accuracy                           0.97      1293
   macro avg       0.73      0.75      0.74      1293
weighted avg       0.95      0.97      0.96      1293

Random Forest Accuracy: 0.9737045630317092


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# __Time domain features__

We extract 17 time domain features from each window for each axis x, y and z. Guided by a previous work, the individual features for each axis involves statistical attributes such as __mean, variance, standard deviation and envelope metrics, i.e. median, range maximum and minimum value, root main square metric__. Furthermore, we usesignal magnitude area, indexes of minimum and maximum value, power, energy, entropy, __skewness, kurtosis__, interquartile range, and mean absolute deviation of signal. To see the cross-relational effects of different motion axes, we also use cross correlation of binary combinations of x, y, and z.

1. Pobieranie danych

Funkcja do obliczania cech domeny czasowej

In [7]:
def extract_features(file_path):
    df = pd.read_csv(file_path)
    features = []
    for axis in ["accelerometer_X", "accelerometer_Y", "accelerometer_Z"]:
        data = df[axis]
        features.extend([
            data.mean(),
            data.median(),
            data.std(),
            data.var(),
            kurtosis(data),
            skew(data),
            data.min(),
            data.max(),
            np.sqrt(np.mean(data**2)),
            np.sum(data**2)
        ])
    return np.array(features)

Funkcja do ładowania danych z folderów

In [8]:
def load_data(folder_path, label):
    data = []
    for file_path in Path(folder_path).iterdir():
        if file_path.name.endswith(".csv"):
            features = extract_features(file_path)
            data.append(np.append(features, label))
    return np.array(data)

In [9]:
folders = {
    0: "data/idle",
    1: "data/running",
    2: "data/stairs",
    3: "data/walking",
}
if not os.path.exists("data"):
    print("No data folder available. Check your data folders and CSV files.")
else:
    data = []
    for label, path in folders.items():
        folder_data = load_data(path, label)
        if folder_data.size > 0:
            data.append(folder_data)
            print(f"{path} - Data available")
        else:
            print(f"{path} - No data available. Check your data folders and CSV files")

    data = np.vstack(data)

data/idle - Data available
data/running - Data available
data/stairs - Data available
data/walking - Data available


2. Przygotowanie danych i ich podział na treningowy i testowy

In [10]:
X = data[:, :-1]
y = data[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=92)
if data.size > 0:
    print("X_train shape:", X_train.shape)
    print("X_test shape:", X_test.shape)
    print("y_train shape:", y_train.shape)
    print("y_test shape:", y_test.shape)

X_train shape: (5169, 30)
X_test shape: (1293, 30)
y_train shape: (5169,)
y_test shape: (1293,)


3. Modele i ich ewluacja

SVM Model

In [11]:
svm_model = SVC()
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)
print("SVM Classifier Report:")
print(classification_report(y_test, y_pred_svm))
print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))

SVM Classifier Report:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       185
         1.0       0.99      0.97      0.98       678
         2.0       0.00      0.00      0.00        34
         3.0       0.88      0.98      0.93       396

    accuracy                           0.95      1293
   macro avg       0.72      0.74      0.73      1293
weighted avg       0.93      0.95      0.94      1293

SVM Accuracy: 0.9528228924980665


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Random Forest Model

In [12]:
rf_model = RandomForestClassifier(n_estimators=100)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
print("Random Forest Classifier Report:")
print(classification_report(y_test, y_pred_rf))
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))

Random Forest Classifier Report:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       185
         1.0       1.00      1.00      1.00       678
         2.0       1.00      0.94      0.97        34
         3.0       0.99      1.00      1.00       396

    accuracy                           1.00      1293
   macro avg       1.00      0.99      0.99      1293
weighted avg       1.00      1.00      1.00      1293

Random Forest Accuracy: 0.9984532095901005
