In [None]:
import pandas as pd
import numpy as np
import os
import warnings
from scipy.fft import fft
from scipy.signal import welch
from scipy.stats import entropy, gmean
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

# Suppress known numerical warnings (e.g., divide by zero, log of zero)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# --- Parameters ---
fs = 50
segment_size = 250
axes = ['back_x', 'back_y', 'back_z', 'thigh_x', 'thigh_y', 'thigh_z']

# --- Load and process all CSV files from harth/ ---
folder_path = "./harth"
features_list = []
labels = []

def spectral_centroid(freqs, psd):
    return np.sum(freqs * psd) / np.sum(psd) if np.sum(psd) > 0 else 0

for filename in os.listdir(folder_path):
    if filename.endswith(".csv"):
        df = pd.read_csv(os.path.join(folder_path, filename))
        if 'label' not in df.columns:
            continue
        for start in range(0, len(df) - segment_size, segment_size):
            end = start + segment_size
            segment = df.iloc[start:end]
            features = {}
            for axis in axes:
                signal = segment[axis].to_numpy()
                fft_vals = fft(signal)
                fft_mag = np.abs(fft_vals[:segment_size // 2])
                freqs, psd = welch(signal, fs=fs)
                psd_sum = np.sum(psd)
                psd_norm = psd / psd_sum if psd_sum > 0 else np.zeros_like(psd)

                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    features[f'{axis}_spectral_energy'] = np.sum(fft_mag ** 2)
                    features[f'{axis}_dominant_freq'] = freqs[np.argmax(np.abs(fft_vals))]
                    features[f'{axis}_frequency_variance'] = np.var(psd)
                    features[f'{axis}_spectral_centroid'] = spectral_centroid(freqs, psd)
                    features[f'{axis}_spectral_entropy'] = entropy(psd_norm)
                    features[f'{axis}_spectral_flatness'] = gmean(psd + 1e-12) / (np.mean(psd) + 1e-12)
                    features[f'{axis}_peak_freq'] = freqs[np.argmax(psd)]
                    features[f'{axis}_bandwidth'] = (
                        np.sqrt(np.sum(psd * (freqs - spectral_centroid(freqs, psd))**2) / psd_sum)
                        if psd_sum > 0 else 0
                    )

                for i in range(5):
                    features[f'{axis}_fft_coef_{i}'] = fft_mag[i] if i < len(fft_mag) else 0

            features_list.append(features)
            labels.append(segment['label'].mode()[0])

# --- Build dataset ---
X = pd.DataFrame(features_list)
y = pd.Series(labels)

# --- Impute + Split ---
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.3, random_state=42)

# --- Define and evaluate models ---
models = {
    "Decision Tree": DecisionTreeClassifier(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector Machine": SVC(),
    "Random Forest": RandomForestClassifier()
}

results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    
    print(f"\n=== {name} ===")
    print("Accuracy:", round(acc, 4))
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        print("Classification Report:\n", classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

    results.append((name, acc))

# --- Print Accuracy Summary ---
results_df = pd.DataFrame(results, columns=["Model", "Test Accuracy"]).sort_values(by="Test Accuracy", ascending=False)
print("\n=== Model Comparison ===")
print(results_df.to_string(index=False))


In [1]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
from sklearn.model_selection import cross_val_score
import seaborn as sns
import matplotlib.pyplot as plt
import warnings

macro_f1_scores = []
micro_f1_scores = []

print("\n=== Part 3: Model Evaluation on Test Data ===")

for name, model in models.items():
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    macro_f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)
    micro_f1 = f1_score(y_test, y_pred, average='micro', zero_division=0)
    macro_f1_scores.append((name, macro_f1))
    micro_f1_scores.append((name, micro_f1))

    print(f"\n=== {name} ===")
    print(f"Test Accuracy: {acc:.4f}")
    print(f"Macro F1 Score: {macro_f1:.4f}")
    print(f"Micro F1 Score: {micro_f1:.4f}")

    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f"{name} – Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.tight_layout()
    plt.show()

    # Classification Report
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        print("Classification Report:")
        print(classification_report(y_test, y_pred, zero_division=0))

# --- Plot Macro and Micro F1 Scores ---
macro_df = pd.DataFrame(macro_f1_scores, columns=["Model", "Macro F1"])
micro_df = pd.DataFrame(micro_f1_scores, columns=["Model", "Micro F1"])

plt.figure(figsize=(10, 5))
plt.bar(macro_df["Model"], macro_df["Macro F1"], label='Macro F1', alpha=0.7)
plt.bar(micro_df["Model"], micro_df["Micro F1"], label='Micro F1', alpha=0.7)
plt.ylim(0, 1.05)
plt.ylabel("F1 Score")
plt.title("Macro vs. Micro F1 Score by Model")
plt.legend()
plt.grid(axis='y', linestyle='--', alpha=0.3)
plt.tight_layout()
plt.show()

# --- Optional: K-Fold Cross Validation on Entire Dataset ---
print("\n=== Optional: 5-Fold Cross-Validation Accuracy ===")
for name, model in models.items():
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        scores = cross_val_score(model, X_imputed, y, cv=5, scoring='accuracy')
        print(f"{name}: Mean Accuracy = {scores.mean():.4f}, Std = {scores.std():.4f}")



=== Part 3: Model Evaluation on Test Data ===


NameError: name 'models' is not defined