In [3]:
import pandas as pd
import numpy as np
import os
import warnings
from scipy.fft import fft
from scipy.signal import welch
from scipy.stats import entropy, gmean
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

# Suppress known numerical warnings (e.g., divide by zero, log of zero)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# --- Parameters ---
fs = 50
segment_size = 250
axes = ['back_x', 'back_y', 'back_z', 'thigh_x', 'thigh_y', 'thigh_z']

# --- Load and process all CSV files from harth/ ---
folder_path = "./harth"
features_list = []
labels = []

def spectral_centroid(freqs, psd):
    return np.sum(freqs * psd) / np.sum(psd) if np.sum(psd) > 0 else 0

for filename in os.listdir(folder_path):
    if filename.endswith(".csv"):
        df = pd.read_csv(os.path.join(folder_path, filename))
        if 'label' not in df.columns:
            continue
        for start in range(0, len(df) - segment_size, segment_size):
            end = start + segment_size
            segment = df.iloc[start:end]
            features = {}
            for axis in axes:
                signal = segment[axis].to_numpy()
                fft_vals = fft(signal)
                fft_mag = np.abs(fft_vals[:segment_size // 2])
                freqs, psd = welch(signal, fs=fs)
                psd_sum = np.sum(psd)
                psd_norm = psd / psd_sum if psd_sum > 0 else np.zeros_like(psd)

                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    features[f'{axis}_spectral_energy'] = np.sum(fft_mag ** 2)
                    features[f'{axis}_dominant_freq'] = freqs[np.argmax(np.abs(fft_vals))]
                    features[f'{axis}_frequency_variance'] = np.var(psd)
                    features[f'{axis}_spectral_centroid'] = spectral_centroid(freqs, psd)
                    features[f'{axis}_spectral_entropy'] = entropy(psd_norm)
                    features[f'{axis}_spectral_flatness'] = gmean(psd + 1e-12) / (np.mean(psd) + 1e-12)
                    features[f'{axis}_peak_freq'] = freqs[np.argmax(psd)]
                    features[f'{axis}_bandwidth'] = (
                        np.sqrt(np.sum(psd * (freqs - spectral_centroid(freqs, psd))**2) / psd_sum)
                        if psd_sum > 0 else 0
                    )

                for i in range(5):
                    features[f'{axis}_fft_coef_{i}'] = fft_mag[i] if i < len(fft_mag) else 0

            features_list.append(features)
            labels.append(segment['label'].mode()[0])

# --- Build dataset ---
X = pd.DataFrame(features_list)
y = pd.Series(labels)

# --- Impute + Split ---
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.3, random_state=42)

# --- Define and evaluate models ---
models = {
    "Decision Tree": DecisionTreeClassifier(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector Machine": SVC(),
    "Random Forest": RandomForestClassifier()
}

results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    
    print(f"\n=== {name} ===")
    print("Accuracy:", round(acc, 4))
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        print("Classification Report:\n", classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

    results.append((name, acc))

# --- Print Accuracy Summary ---
results_df = pd.DataFrame(results, columns=["Model", "Test Accuracy"]).sort_values(by="Test Accuracy", ascending=False)
print("\n=== Model Comparison ===")
print(results_df.to_string(index=False))



=== Decision Tree ===
Accuracy: 0.9129
Classification Report:
               precision    recall  f1-score   support

           1       0.89      0.87      0.88      1449
           2       0.96      0.97      0.96       359
           3       0.40      0.37      0.38       232
           4       0.62      0.61      0.62       105
           5       0.57      0.56      0.56        97
           6       0.83      0.86      0.85       929
           7       0.99      0.99      0.99      3450
           8       0.99      0.99      0.99       525
          13       0.91      0.92      0.91       480
          14       0.75      0.77      0.76        65
         130       0.46      0.56      0.50        48
         140       0.00      0.00      0.00        11

    accuracy                           0.91      7750
   macro avg       0.70      0.71      0.70      7750
weighted avg       0.91      0.91      0.91      7750

Confusion Matrix:
 [[1258   12   49   19   32   53    9    2    7    