In [None]:
import os, json, joblib
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from collections import Counter
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline


In [6]:
def load_dataset(base_folder, motion):
    X, y = [], []

    for stroke in os.listdir(base_folder):  # e.g., freestyle, backstroke
        stroke_path = os.path.join(base_folder, stroke)
        motion_path = os.path.join(stroke_path, motion)  # e.g., freestyle/kick/

        if not os.path.isdir(motion_path):
            continue

        for file in os.listdir(motion_path):
            if not file.endswith(".json"):
                continue

            filepath = os.path.join(motion_path, file)
            with open(filepath, 'r') as f:
                data = json.load(f)

            # Ensure this file matches the motion we're training for
            if data.get("motion") != motion:
                continue

            # Get features and label from JSON
            features = data.get("features")
            label = data.get("label")

            if features is None or label is None:
                continue  # Skip incomplete data

            X.append(list(features.values()))
            y.append(label)

    return X, y


In [12]:
def train_and_evaluate(X, y, motion):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    models = {
    "SVM": make_pipeline(StandardScaler(), SVC()),
    "RandomForest": RandomForestClassifier(),
    "MLP": make_pipeline(StandardScaler(), MLPClassifier(max_iter=1000))
    }


    for name, model in models.items():
        print(f"\nTraining {name} on {motion}...")
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        print(classification_report(y_test, y_pred))
        os.makedirs("models", exist_ok=True)
        joblib.dump(model, f"models/{motion}_{name}.pkl")


In [14]:
motions = ["kick", "breathing", "hand_entry", "overall_posture"]
base_path = "segmented_videos"

for motion in motions:
    print(f"\n==== {motion.upper()} ====")
    X, y = load_dataset(base_path, motion)
    print("Label distribution:", Counter(y))
    train_and_evaluate(X, y, motion)



==== KICK ====
Label distribution: Counter({'inconsistent_kick': 1414, 'consistent_kick': 950})

Training SVM on kick...
                   precision    recall  f1-score   support

  consistent_kick       0.72      0.60      0.65       199
inconsistent_kick       0.74      0.83      0.78       274

         accuracy                           0.73       473
        macro avg       0.73      0.72      0.72       473
     weighted avg       0.73      0.73      0.73       473


Training RandomForest on kick...
                   precision    recall  f1-score   support

  consistent_kick       1.00      1.00      1.00       199
inconsistent_kick       1.00      1.00      1.00       274

         accuracy                           1.00       473
        macro avg       1.00      1.00      1.00       473
     weighted avg       1.00      1.00      1.00       473


Training MLP on kick...
                   precision    recall  f1-score   support

  consistent_kick       0.98      0.99      0