In [1]:
import os
import numpy as np
import pandas as pd
from scipy.stats import entropy, iqr, skew
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [13]:
def load_data(data_dir):
    segments = []
    labels = []

    for activity_folder in os.listdir(data_dir):
        activity_path = os.path.join(data_dir, activity_folder)
        
        for csv_file in os.listdir(activity_path):
            csv_path = os.path.join(activity_path, csv_file)
            data = pd.read_csv(csv_path)
            segment = data.values
            segments.append(segment)
            labels.append(activity_folder)

    return np.asarray(segments), np.asarray(labels)


def calculate_time_domain_features(data):
    features = []
    
    # Time domain features for axis x
    features.append(np.max(data[:, 0]))
    features.append(np.min(data[:, 0]))
    # features.append(entropy(data[:, 0]))
    features.append(iqr(data[:, 0]))
    
    # Time domain features for axis y
    features.append(np.max(data[:, 1]))
    features.append(np.argmin(data[:, 1]))
    features.append(np.mean(np.abs(data[:, 1] - np.mean(data[:, 1]))))
    features.append(np.median(data[:, 1]))
    features.append(skew(data[:, 1]))
    features.append(np.std(data[:, 1]))
    features.append(np.sqrt(np.mean(np.square(data[:, 1]))))
    
    # Time domain features for axis z
    features.append(skew(data[:, 2]))
    
    return features
    

def train_model(X_train, y_train):
    svm_model = SVC(kernel='rbf', gamma='scale')
    svm_model.fit(X_train, y_train)

    rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train)

    return svm_model, rf_model


def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    return accuracy, report


def main():
    data_dir = os.path.join(os.getcwd(), "data")
    segments, labels = load_data(data_dir)
    X = np.asarray([calculate_time_domain_features(segment) for segment in segments])
    y = np.asarray(labels)
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
    svm_model, rf_model = train_model(X_train, y_train)
    
    print("SVM Model Evaluation:")
    svm_accuracy, svm_report = evaluate_model(svm_model, X_test, y_test)
    print("Accuracy:", svm_accuracy)
    print(svm_report)
    
    print("\nRandom Forest Model Evaluation:")
    rf_accuracy, rf_report = evaluate_model(rf_model, X_test, y_test)
    print("Accuracy:", rf_accuracy)
    print(rf_report)


if __name__ == "__main__":
    main()

SVM Model Evaluation:
Accuracy: 0.982985305491106
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       220
     running       1.00      1.00      1.00       689
      stairs       0.90      0.30      0.45        30
     walking       0.94      1.00      0.97       354

    accuracy                           0.98      1293
   macro avg       0.96      0.82      0.85      1293
weighted avg       0.98      0.98      0.98      1293


Random Forest Model Evaluation:
Accuracy: 0.9984532095901005
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       220
     running       1.00      1.00      1.00       689
      stairs       1.00      0.93      0.97        30
     walking       0.99      1.00      1.00       354

    accuracy                           1.00      1293
   macro avg       1.00      0.98      0.99      1293
weighted avg       1.00      1.00      1.00      1293



Both models achieved high accuracy, with the Random Forest model slightly outperforming the SVM model.
The precision, recall, and F1-score metrics provide detailed insights into the performance of the models for each class. The Random Forest model generally shows better performance, especially in terms of recall for the "stairs" class.