In [2]:
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from src.pipeline import run_process

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

Load all datasets

In [4]:
# Force Plate
df_fp_open = pd.read_csv("../data/processed/ML_datasets/oa_averages_open_fp.csv")
df_fp_closed = pd.read_csv("../data/processed/ML_datasets/oa_averages_closed_fp.csv")

# Zed
df_zed_open = pd.read_csv("../data/processed/ML_datasets/oa_averages_open_zed.csv")
df_zed_closed = pd.read_csv("../data/processed/ML_datasets/oa_averages_closed_zed.csv")

# Front Phone
df_front_open = pd.read_csv("../data/processed/ML_datasets/oa_averages_open_front.csv")
df_front_closed = pd.read_csv("../data/processed/ML_datasets/oa_averages_closed_front.csv")

# Back Phone
df_back_open = pd.read_csv("../data/processed/ML_datasets/oa_averages_open_back.csv")
df_back_closed = pd.read_csv("../data/processed/ML_datasets/oa_averages_closed_back.csv")


In [4]:
df_fp_closed.head(2)

Unnamed: 0,participant name,gender,age,height,weight,foot length,foot width,faller,low stability,AP MAD,AP Max abs dev,AP RMS,AP Range,Ellipse area,ML MAD,ML Max abs dev,ML RMS,ML Range,Sway RMS
0,dida,female,79,156.0,67.0,28.0,11.5,0,0,0.541914,2.413038,0.701916,4.026925,9.240993,0.536572,2.280691,0.677683,3.820725,0.979758
1,ronald,male,80,176.0,89.0,32.0,12.0,0,0,0.685373,2.944936,0.872391,4.9028,11.356841,0.557594,2.062567,0.705783,3.5597,1.13331


Remove demographic features

In [5]:
columns_to_remove = ["participant name", "age", "gender", "height", "weight", "foot length", "foot width"]

# Force Plate
df_fp_open = df_fp_open.drop(columns=columns_to_remove)
df_fp_closed = df_fp_closed.drop(columns=columns_to_remove)

# Zed
df_zed_open = df_zed_open.drop(columns=columns_to_remove)
df_zed_closed = df_zed_closed.drop(columns=columns_to_remove)

# Front Phone
df_front_open = df_front_open.drop(columns=columns_to_remove)
df_front_closed = df_front_closed.drop(columns=columns_to_remove)

# Back Phone
df_back_open = df_back_open.drop(columns=columns_to_remove)
df_back_closed = df_back_closed.drop(columns=columns_to_remove)

In [11]:
PROCESSES = [
    ("ForcePlate-Open",   df_fp_open),
    ("ForcePlate-Closed", df_fp_closed),
    ("ZED-Open",          df_zed_open),
    ("ZED-Closed",        df_zed_closed),
    ("Front-Open",        df_front_open),
    ("Front-Closed",      df_front_closed),
    ("Back-Open",         df_back_open),
    ("Back-Closed",       df_back_closed)
]

### Low stability - f1 score

In [None]:
all_results = {}
for name, df_proc in PROCESSES:
    print("="*70)
    print(f"Running: {name}")
    print("="*70)

     # Drop faller column just for this run
    df_proc_tmp = df_proc.drop(columns=["faller"], errors="ignore")

    all_results[name] = run_process(name, df_proc_tmp, target_col="low stability", plots_dir="plots_low_stability_f1")

Running: ForcePlate - Open

[ForcePlate - Open] LogReg
  F1: 0.320 | Precision: 0.267 | Recall: 0.400
  Most chosen LR params (top 5):
    {'clf__C': np.float64(0.001), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l1', 'corr': CorrelationFilter(threshold=0.85)} → 30 folds
    {'clf__C': np.float64(0.01), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l2', 'corr': CorrelationFilter(threshold=0.85)} → 4 folds
    {'clf__C': np.float64(0.001), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l2', 'corr': CorrelationFilter(threshold=0.85)} → 3 folds
    {'clf__C': np.float64(1.0), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l2', 'corr': CorrelationFilter(threshold=0.85)} → 1 folds
  Saved final tuned model → c:\fall_risk_analysis\machine_learning\machine_learning\trained_models\ForcePlate_-_Open_LogReg_f1.joblib

[ForcePlate - Open] DecisionTree
  F1: 0.636 | Precision: 0.583 | Recall: 0.700
  Most chosen DT params (top 5):
    {'clf__criterion': 'gini', 'clf__max_depth': 3, 'clf__min_samples_leaf': 2} → 26 folds
    

In [None]:
# Assuming all_results is your nested dict
rows = []

for dataset, models in all_results.items():
    for model_name, metrics in models.items():
        rows.append({
            "dataset": dataset,   # e.g. "ZED - Open"
            "model": model_name,  # e.g. "LogReg" / "DecisionTree"
            "f1": float(metrics["f1"]),
            "precision": float(metrics["precision"]),
            "recall": float(metrics["recall"]),
            "AP": float(metrics["AP"])
        })

df_results = pd.DataFrame(rows).round(3)
df_results

Unnamed: 0,dataset,model,f1,precision,recall,AP
0,ForcePlate - Open,LogReg,0.32,0.266667,0.4,0.220882
1,ForcePlate - Open,DecisionTree,0.636364,0.583333,0.7,0.459372
2,ForcePlate - Closed,LogReg,0.45,0.290323,1.0,0.307787
3,ForcePlate - Closed,DecisionTree,0.416667,0.333333,0.555556,0.43139
4,ZED - Open,LogReg,0.307692,0.25,0.4,0.227394
5,ZED - Open,DecisionTree,0.357143,0.277778,0.5,0.404503
6,ZED - Closed,LogReg,0.466667,0.333333,0.777778,0.425932
7,ZED - Closed,DecisionTree,0.4,0.3125,0.555556,0.514944
8,Front - Open,LogReg,0.25,0.214286,0.3,0.219301
9,Front - Open,DecisionTree,0.533333,0.4,0.8,0.332632


#### Low stability F2 score

In [14]:
all_results_f2 = {}
for name, df_proc in PROCESSES:
    print("="*70)
    print(f"Running: {name}")
    print("="*70)

     # Drop faller column just for this run
    df_proc_tmp = df_proc.drop(columns=["faller"], errors="ignore")

    all_results_f2[name] = run_process(name, df_proc_tmp, target_col="low stability", plots_dir="plots_low_stability_f2", score_metric="f2")

Running: ForcePlate-Open

[ForcePlate-Open] LogReg
  F1: 0.320 | Precision: 0.267 | Recall: 0.400 | F2: 0.364
  Most chosen LR params (top 5):
    {'clf__C': np.float64(0.001), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l1', 'corr': CorrelationFilter(threshold=0.85)} → 30 folds
    {'clf__C': np.float64(0.01), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l2', 'corr': CorrelationFilter(threshold=0.85)} → 4 folds
    {'clf__C': np.float64(0.001), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l2', 'corr': CorrelationFilter(threshold=0.85)} → 3 folds
    {'clf__C': np.float64(1.0), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l2', 'corr': CorrelationFilter(threshold=0.85)} → 1 folds
  Saved final tuned model → c:\fall_risk_analysis\machine_learning\machine_learning\trained_models\ForcePlate-Open_LogReg_f2.joblib

[ForcePlate-Open] DecisionTree
  F1: 0.636 | Precision: 0.583 | Recall: 0.700 | F2: 0.673
  Most chosen DT params (top 5):
    {'clf__criterion': 'gini', 'clf__max_depth': 3, 'clf__min_samples_leaf': 2}

In [17]:
# Assuming all_results is your nested dict
rows = []

for dataset, models in all_results_f2.items():
    for model_name, metrics in models.items():
        rows.append({
            "dataset": dataset,   # e.g. "ZED - Open"
            "model": model_name,  # e.g. "LogReg" / "DecisionTree"
            "f2": float(metrics["F2"]),
            "precision": float(metrics["precision"]),
            "recall": float(metrics["recall"]),
            "AP": float(metrics["AP"])
        })

df_results_f2 = pd.DataFrame(rows)
df_results_f2

Unnamed: 0,dataset,model,f2,precision,recall,AP
0,ForcePlate-Open,LogReg,0.363636,0.266667,0.4,0.220882
1,ForcePlate-Open,DecisionTree,0.673077,0.583333,0.7,0.459372
2,ForcePlate-Closed,LogReg,0.671642,0.290323,1.0,0.307787
3,ForcePlate-Closed,DecisionTree,0.490196,0.333333,0.555556,0.43139
4,ZED-Open,LogReg,0.357143,0.25,0.4,0.227394
5,ZED-Open,DecisionTree,0.431034,0.277778,0.5,0.404503
6,ZED-Closed,LogReg,0.614035,0.333333,0.777778,0.425932
7,ZED-Closed,DecisionTree,0.480769,0.3125,0.555556,0.514944
8,Front-Open,LogReg,0.277778,0.214286,0.3,0.219301
9,Front-Open,DecisionTree,0.666667,0.4,0.8,0.332632


#### Using combined eyes open and eyes closed data

In [6]:
def _combine_states_wide(df_open: pd.DataFrame, df_closed: pd.DataFrame) -> pd.DataFrame:
    """
    Inner-join on 'participant name'.
    Targets: ['faller', 'low stability'] kept once.
    All other non-key columns become features with _open/_closed suffixes.
    """
    key = "participant name"
    targets = ["faller", "low stability"]

    feats_open   = [c for c in df_open.columns   if c not in [key] + targets]
    feats_closed = [c for c in df_closed.columns if c not in [key] + targets]

    o = df_open[[key] + targets + feats_open].rename(columns={c: f"{c}_open" for c in feats_open})
    c = df_closed[[key] + targets + feats_closed].rename(columns={c: f"{c}_closed" for c in feats_closed})

    merged = pd.merge(o, c, on=key, how="inner")

    # collapse duplicate targets (take from open side)
    for t in targets:
        tx, ty = f"{t}_x", f"{t}_y"
        if tx in merged.columns or ty in merged.columns:
            if tx in merged.columns:
                merged.rename(columns={tx: t}, inplace=True)
            if ty in merged.columns:
                merged.drop(columns=[ty], inplace=True, errors="ignore")

    open_cols   = [c for c in merged.columns if c.endswith("_open")]
    closed_cols = [c for c in merged.columns if c.endswith("_closed")]
    return merged[[key] + targets + open_cols + closed_cols]


def load_and_combine_all_devices(base_dir: str = "../data/processed/ML_datasets/"):
    """
    Loads all device CSVs from base_dir, drops DROP_COLS, combines open+closed
    per device (wide), and returns 4 DataFrames: (fp, zed, front, back).
    """
    DROP_COLS = ["age", "gender", "height", "weight", "foot length", "foot width"]
    # Force Plate
    df_fp_open   = pd.read_csv(f"{base_dir}oa_averages_open_fp.csv").drop(columns=DROP_COLS, errors="ignore")
    df_fp_closed = pd.read_csv(f"{base_dir}oa_averages_closed_fp.csv").drop(columns=DROP_COLS, errors="ignore")

    # ZED
    df_zed_open   = pd.read_csv(f"{base_dir}oa_averages_open_zed.csv").drop(columns=DROP_COLS, errors="ignore")
    df_zed_closed = pd.read_csv(f"{base_dir}oa_averages_closed_zed.csv").drop(columns=DROP_COLS, errors="ignore")

    # Front Phone
    df_front_open   = pd.read_csv(f"{base_dir}oa_averages_open_front.csv").drop(columns=DROP_COLS, errors="ignore")
    df_front_closed = pd.read_csv(f"{base_dir}oa_averages_closed_front.csv").drop(columns=DROP_COLS, errors="ignore")

    # Back Phone
    df_back_open   = pd.read_csv(f"{base_dir}oa_averages_open_back.csv").drop(columns=DROP_COLS, errors="ignore")
    df_back_closed = pd.read_csv(f"{base_dir}oa_averages_closed_back.csv").drop(columns=DROP_COLS, errors="ignore")

    # Combine wide per device
    df_fp_both    = _combine_states_wide(df_fp_open,   df_fp_closed)
    df_zed_both   = _combine_states_wide(df_zed_open,  df_zed_closed)
    df_front_both = _combine_states_wide(df_front_open,df_front_closed)
    df_back_both  = _combine_states_wide(df_back_open, df_back_closed)

    return df_fp_both, df_zed_both, df_front_both, df_back_both

In [7]:
df_fp_both, df_zed_both, df_front_both, df_back_both = load_and_combine_all_devices()

for df in [df_fp_both, df_zed_both, df_front_both, df_back_both]:
    df.drop(columns=["participant name"], inplace=True)

PROCESSES_COMBINED = [
    ("ForcePlate-Both",   df_fp_both),
    ("ZED-Both",          df_zed_both),
    ("Front-Both",        df_front_both),
    ("Back-Both",         df_back_both)
]

#### Combined - Low stability - f1 score

In [None]:
all_results_both = {}
for name, df_proc in PROCESSES_COMBINED:
    print("="*70)
    print(f"Running: {name}")
    print("="*70)

     # Drop faller column just for this run
    df_proc_tmp = df_proc.drop(columns=["faller"], errors="ignore")

    all_results_both[name] = run_process(name, df_proc_tmp, target_col="low stability", plots_dir="plots_low_stability_both_f1")

Running: ForcePlate-Both

[ForcePlate-Both] LogReg
  F1: 0.457 | Precision: 0.308 | Recall: 0.889
  Most chosen LR params (top 5):
    {'clf__C': np.float64(0.001), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l2', 'corr': CorrelationFilter(threshold=0.85)} → 13 folds
    {'clf__C': np.float64(0.001), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l1', 'corr': CorrelationFilter(threshold=0.85)} → 11 folds
    {'clf__C': np.float64(0.01), 'clf__l1_ratio': 0.5, 'clf__penalty': 'elasticnet', 'corr': CorrelationFilter(threshold=0.85)} → 7 folds
    {'clf__C': np.float64(0.01), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l2', 'corr': CorrelationFilter(threshold=0.85)} → 4 folds
    {'clf__C': np.float64(0.01), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l1', 'corr': CorrelationFilter(threshold=0.85)} → 1 folds
  Saved final tuned model → c:\fall_risk_analysis\machine_learning\machine_learning\trained_models\ForcePlate-Both_LogReg_f1.joblib

[ForcePlate-Both] DecisionTree
  F1: 0.385 | Precision: 0.294 | Recall: 0.

In [39]:
# Assuming all_results is your nested dict
rows = []

for dataset, models in all_results_both.items():
    for model_name, metrics in models.items():
        rows.append({
            "dataset": dataset,   # e.g. "ZED - Open"
            "model": model_name,  # e.g. "LogReg" / "DecisionTree"
            "f1": float(metrics["f1"]),
            "precision": float(metrics["precision"]),
            "recall": float(metrics["recall"]),
            "AP": float(metrics["AP"])
        })

df_results_both = pd.DataFrame(rows).round(3)
df_results_both

Unnamed: 0,dataset,model,f1,precision,recall,AP
0,ForcePlate-Both,LogReg,0.457,0.308,0.889,0.265
1,ForcePlate-Both,DecisionTree,0.385,0.294,0.556,0.402
2,ZED-Both,LogReg,0.424,0.292,0.778,0.594
3,ZED-Both,DecisionTree,0.312,0.217,0.556,0.489
4,Front-Both,LogReg,0.343,0.231,0.667,0.214
5,Front-Both,DecisionTree,0.417,0.333,0.556,0.361
6,Back-Both,LogReg,0.4,0.269,0.778,0.255
7,Back-Both,DecisionTree,0.444,0.444,0.444,0.347


#### Combined - Low stability - f2 score

In [14]:
all_results_both_f2 = {}
for name, df_proc in PROCESSES_COMBINED:
    print("="*70)
    print(f"Running: {name}")
    print("="*70)

     # Drop faller column just for this run
    df_proc_tmp = df_proc.drop(columns=["faller"], errors="ignore")

    all_results_both_f2[name] = run_process(name, df_proc_tmp, target_col="low stability", plots_dir="plots_low_stability_both_f2", score_metric="f2")

Running: ForcePlate-Both

[ForcePlate-Both] LogReg
  F1: 0.457 | Precision: 0.308 | Recall: 0.889 | F2: 0.645
  Most chosen LR params (top 5):
    {'clf__C': np.float64(0.001), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l2', 'corr': CorrelationFilter(threshold=0.85)} → 13 folds
    {'clf__C': np.float64(0.001), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l1', 'corr': CorrelationFilter(threshold=0.85)} → 11 folds
    {'clf__C': np.float64(0.01), 'clf__l1_ratio': 0.5, 'clf__penalty': 'elasticnet', 'corr': CorrelationFilter(threshold=0.85)} → 7 folds
    {'clf__C': np.float64(0.01), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l2', 'corr': CorrelationFilter(threshold=0.85)} → 4 folds
    {'clf__C': np.float64(0.01), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l1', 'corr': CorrelationFilter(threshold=0.85)} → 1 folds
  Saved final tuned model → c:\fall_risk_analysis\machine_learning\machine_learning\trained_models\ForcePlate-Both_LogReg_f2.joblib

[ForcePlate-Both] DecisionTree
  F1: 0.385 | Precision: 0.294 

In [23]:
# Assuming all_results is your nested dict
rows = []

for dataset, models in all_results_both_f2.items():
    for model_name, metrics in models.items():
        rows.append({
            "dataset": dataset,   # e.g. "ZED - Open"
            "model": model_name,  # e.g. "LogReg" / "DecisionTree"
            "f2": float(metrics["F2"]),
            "precision": float(metrics["precision"]),
            "recall": float(metrics["recall"]),
            "AP": float(metrics["AP"])
        })

all_results_both_f2 = pd.DataFrame(rows)
all_results_both_f2

TypeError: string indices must be integers, not 'str'

In [24]:
all_results_both_f2

Unnamed: 0,dataset,model,f2,precision,recall,AP
0,ForcePlate-Both,LogReg,0.645,0.308,0.889,0.265
1,ForcePlate-Both,DecisionTree,0.472,0.294,0.556,0.402
2,ZED-Both,LogReg,0.583,0.292,0.778,0.594
3,ZED-Both,DecisionTree,0.424,0.217,0.556,0.489
4,Front-Both,LogReg,0.484,0.231,0.667,0.214
5,Front-Both,DecisionTree,0.49,0.333,0.556,0.361
6,Back-Both,LogReg,0.565,0.269,0.778,0.255
7,Back-Both,DecisionTree,0.444,0.444,0.444,0.347


#### Combined - Faller - f1 score

In [None]:
all_results_both_faller = {}
for name, df_proc in PROCESSES_COMBINED:
    print("="*70)
    print(f"Running: {name}")
    print("="*70)

     # Drop faller column just for this run
    df_proc_tmp = df_proc.drop(columns=["low stability"], errors="ignore")

    all_results_both_faller[name] = run_process(name, df_proc_tmp, target_col="faller", plots_dir="plots_faller_both_f1")

Running: ForcePlate-Both

[ForcePlate-Both] LogReg
  F1: 0.000 | Precision: 0.000 | Recall: 0.000
  Most chosen LR params (top 5):
    {'clf__C': np.float64(0.01), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l1', 'corr': CorrelationFilter(threshold=0.85)} → 26 folds
    {'clf__C': np.float64(0.001), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l2', 'corr': CorrelationFilter(threshold=0.85)} → 8 folds
    {'clf__C': np.float64(0.01), 'clf__l1_ratio': 0.5, 'clf__penalty': 'elasticnet', 'corr': CorrelationFilter()} → 2 folds
    {'clf__C': np.float64(0.001), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l1', 'corr': CorrelationFilter(threshold=0.85)} → 1 folds
  Saved final tuned model → c:\fall_risk_analysis\machine_learning\machine_learning\trained_models\ForcePlate-Both_LogReg_f1.joblib

[ForcePlate-Both] DecisionTree
  F1: 0.133 | Precision: 0.091 | Recall: 0.250
  Most chosen DT params (top 5):
    {'clf__criterion': 'gini', 'clf__max_depth': 2, 'clf__min_samples_leaf': 4} → 34 folds
    {'clf__criter

In [8]:
rows = []

for dataset, models in all_results_both_faller.items():
    for model_name, metrics in models.items():
        rows.append({
            "dataset": dataset,   # e.g. "ZED - Open"
            "model": model_name,  # e.g. "LogReg" / "DecisionTree"
            "f1": float(metrics["f1"]),
            "precision": float(metrics["precision"]),
            "recall": float(metrics["recall"]),
            "AP": float(metrics["AP"])
        })

df_results_both = pd.DataFrame(rows).round(3)
df_results_both

Unnamed: 0,dataset,model,f1,precision,recall,AP
0,ForcePlate-Both,LogReg,0.0,0.0,0.0,0.073
1,ForcePlate-Both,DecisionTree,0.133,0.091,0.25,0.331
2,ZED-Both,LogReg,0.143,0.083,0.5,0.088
3,ZED-Both,DecisionTree,0.222,0.2,0.25,0.206
4,Front-Both,LogReg,0.0,0.0,0.0,0.078
5,Front-Both,DecisionTree,0.333,0.25,0.5,0.197
6,Back-Both,LogReg,0.0,0.0,0.0,0.076
7,Back-Both,DecisionTree,0.0,0.0,0.0,0.108


#### Combined - Faller - f2 score

In [8]:
all_results_both_faller_f2 = {}
for name, df_proc in PROCESSES_COMBINED:
    print("="*70)
    print(f"Running: {name}")
    print("="*70)

     # Drop faller column just for this run
    df_proc_tmp = df_proc.drop(columns=["low stability"], errors="ignore")

    all_results_both_faller_f2[name] = run_process(name, df_proc_tmp, target_col="faller", plots_dir="plots_faller_both_f2", score_metric="f2")

Running: ForcePlate-Both

[ForcePlate-Both] LogReg
  F1: 0.000 | Precision: 0.000 | Recall: 0.000 | F2: 0.000
  Most chosen LR params (top 5):
    {'clf__C': np.float64(0.01), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l1', 'corr': CorrelationFilter(threshold=0.85)} → 26 folds
    {'clf__C': np.float64(0.001), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l2', 'corr': CorrelationFilter(threshold=0.85)} → 8 folds
    {'clf__C': np.float64(0.01), 'clf__l1_ratio': 0.5, 'clf__penalty': 'elasticnet', 'corr': CorrelationFilter()} → 2 folds
    {'clf__C': np.float64(0.001), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l1', 'corr': CorrelationFilter(threshold=0.85)} → 1 folds
  Saved final tuned model → c:\fall_risk_analysis\machine_learning\machine_learning\trained_models\ForcePlate-Both_LogReg_f2.joblib

[ForcePlate-Both] DecisionTree
  F1: 0.133 | Precision: 0.091 | Recall: 0.250 | F2: 0.185
  Most chosen DT params (top 5):
    {'clf__criterion': 'gini', 'clf__max_depth': 2, 'clf__min_samples_leaf': 4} → 34

In [9]:
rows = []

for dataset, models in all_results_both_faller_f2.items():
    for model_name, metrics in models.items():
        rows.append({
            "dataset": dataset,   # e.g. "ZED - Open"
            "model": model_name,  # e.g. "LogReg" / "DecisionTree"
            "f2": float(metrics["F2"]),
            "precision": float(metrics["precision"]),
            "recall": float(metrics["recall"]),
            "AP": float(metrics["AP"])
        })

all_results_both_faller_f2 = pd.DataFrame(rows).round(3)
all_results_both_faller_f2

Unnamed: 0,dataset,model,f2,precision,recall,AP
0,ForcePlate-Both,LogReg,0.0,0.0,0.0,0.073
1,ForcePlate-Both,DecisionTree,0.185,0.091,0.25,0.331
2,ZED-Both,LogReg,0.25,0.083,0.5,0.088
3,ZED-Both,DecisionTree,0.238,0.2,0.25,0.206
4,Front-Both,LogReg,0.0,0.0,0.0,0.078
5,Front-Both,DecisionTree,0.417,0.25,0.5,0.197
6,Back-Both,LogReg,0.0,0.0,0.0,0.076
7,Back-Both,DecisionTree,0.0,0.0,0.0,0.108


#### Faller - f1 score

In [None]:
all_results_fallers = {}
for name, df_proc in PROCESSES:
    print("="*70)
    print(f"Running: {name}")
    print("="*70)

    # Drop low stability column just for this run
    df_proc_tmp = df_proc.drop(columns=["low stability"], errors="ignore")
    all_results_fallers[name] = run_process(name, df_proc_tmp, target_col="faller", plots_dir="plots_faller_f1")

Running: ForcePlate-Open

[ForcePlate-Open] LogReg
  F1: 0.667 | Precision: 0.571 | Recall: 0.800
  Most chosen LR params (top 5):
    {'clf__C': np.float64(0.001), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l1', 'corr': CorrelationFilter(threshold=0.85)} → 24 folds
    {'clf__C': np.float64(0.001), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l2', 'corr': CorrelationFilter(threshold=0.85)} → 8 folds
    {'clf__C': np.float64(0.1), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l2', 'corr': CorrelationFilter(threshold=0.85)} → 5 folds
    {'clf__C': np.float64(1.0), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l1', 'corr': CorrelationFilter(threshold=0.85)} → 1 folds
  Saved final tuned model → c:\fall_risk_analysis\machine_learning\machine_learning\trained_models\ForcePlate-Open_LogReg_f1.joblib

[ForcePlate-Open] DecisionTree
  F1: 0.353 | Precision: 0.250 | Recall: 0.600
  Most chosen DT params (top 5):
    {'clf__criterion': 'gini', 'clf__max_depth': 2, 'clf__min_samples_leaf': 1} → 33 folds
    {'clf__cr

In [13]:
# Assuming all_results is your nested dict
rows = []

for dataset, models in all_results_fallers.items():
    for model_name, metrics in models.items():
        rows.append({
            "dataset": dataset,   # e.g. "ZED - Open"
            "model": model_name,  # e.g. "LogReg" / "DecisionTree"
            "f1": float(metrics["f1"]),
            "precision": float(metrics["precision"]),
            "recall": float(metrics["recall"]),
            "AP": float(metrics["AP"])
        })

df_results_faller = pd.DataFrame(rows)
df_results_faller

Unnamed: 0,dataset,model,f1,precision,recall,AP
0,ForcePlate-Open,LogReg,0.666667,0.571429,0.8,0.479649
1,ForcePlate-Open,DecisionTree,0.352941,0.25,0.6,0.352632
2,ForcePlate-Closed,LogReg,0.0,0.0,0.0,0.072801
3,ForcePlate-Closed,DecisionTree,0.266667,0.181818,0.5,0.304054
4,ZED-Open,LogReg,0.615385,0.5,0.8,0.351306
5,ZED-Open,DecisionTree,0.222222,0.153846,0.4,0.238947
6,ZED-Closed,LogReg,0.121212,0.068966,0.5,0.093301
7,ZED-Closed,DecisionTree,0.2,0.166667,0.25,0.164414
8,Front-Open,LogReg,0.75,1.0,0.6,0.72735
9,Front-Open,DecisionTree,0.6,0.6,0.6,0.412632


#### Faller - f2 score

In [18]:
all_results_fallers_f2 = {}
for name, df_proc in PROCESSES:
    print("="*70)
    print(f"Running: {name}")
    print("="*70)

    # Drop low stability column just for this run
    df_proc_tmp = df_proc.drop(columns=["low stability"], errors="ignore")
    all_results_fallers_f2[name] = run_process(name, df_proc_tmp, target_col="faller", plots_dir="plots_faller_f2", score_metric="f2")

Running: ForcePlate-Open

[ForcePlate-Open] LogReg
  F1: 0.667 | Precision: 0.571 | Recall: 0.800 | F2: 0.741
  Most chosen LR params (top 5):
    {'clf__C': np.float64(0.001), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l1', 'corr': CorrelationFilter(threshold=0.85)} → 24 folds
    {'clf__C': np.float64(0.001), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l2', 'corr': CorrelationFilter(threshold=0.85)} → 8 folds
    {'clf__C': np.float64(0.1), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l2', 'corr': CorrelationFilter(threshold=0.85)} → 5 folds
    {'clf__C': np.float64(1.0), 'clf__l1_ratio': 0.5, 'clf__penalty': 'l1', 'corr': CorrelationFilter(threshold=0.85)} → 1 folds
  Saved final tuned model → c:\fall_risk_analysis\machine_learning\machine_learning\trained_models\ForcePlate-Open_LogReg_f2.joblib

[ForcePlate-Open] DecisionTree
  F1: 0.353 | Precision: 0.250 | Recall: 0.600 | F2: 0.469
  Most chosen DT params (top 5):
    {'clf__criterion': 'gini', 'clf__max_depth': 2, 'clf__min_samples_leaf': 1} 

In [19]:
# Assuming all_results is your nested dict
rows = []

for dataset, models in all_results_fallers_f2.items():
    for model_name, metrics in models.items():
        rows.append({
            "dataset": dataset,   # e.g. "ZED - Open"
            "model": model_name,  # e.g. "LogReg" / "DecisionTree"
            "f2": float(metrics["F2"]),
            "precision": float(metrics["precision"]),
            "recall": float(metrics["recall"]),
            "AP": float(metrics["AP"])
        })

all_results_fallers_f2 = pd.DataFrame(rows)
all_results_fallers_f2

Unnamed: 0,dataset,model,f2,precision,recall,AP
0,ForcePlate-Open,LogReg,0.740741,0.571429,0.8,0.479649
1,ForcePlate-Open,DecisionTree,0.46875,0.25,0.6,0.352632
2,ForcePlate-Closed,LogReg,0.0,0.0,0.0,0.072801
3,ForcePlate-Closed,DecisionTree,0.37037,0.181818,0.5,0.304054
4,ZED-Open,LogReg,0.714286,0.5,0.8,0.351306
5,ZED-Open,DecisionTree,0.30303,0.153846,0.4,0.238947
6,ZED-Closed,LogReg,0.222222,0.068966,0.5,0.093301
7,ZED-Closed,DecisionTree,0.227273,0.166667,0.25,0.164414
8,Front-Open,LogReg,0.652174,1.0,0.6,0.72735
9,Front-Open,DecisionTree,0.6,0.6,0.6,0.412632
