In [None]:
# A quick look at the performance on each subject's features in isolation
# This gives us a sense if any of the subjects are an extreme outlier

In [52]:
from com_hom_emg.data import get_per_subj_data
from sklearn.linear_model import LogisticRegression as LogR
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from com_hom_emg.parallel_models import ParallelA
from sklearn.model_selection import cross_validate, GroupKFold
from sklearn.metrics import make_scorer
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [53]:
def acc_2d(y_true, y_pred):
    return (y_true == y_pred).all(-1).mean()


scorer = make_scorer(acc_2d, greater_is_better=True)


def shuffle_together(*arrays):
    """Shuffle arrays together"""
    assert all(len(x) == len(arrays[0]) for x in arrays)
    p = np.random.permutation(len(arrays[0]))
    return [x[p] for x in arrays]

In [54]:
per_subj_data = get_per_subj_data()

In [64]:
cols = ["Subject", "LDA raw", "LDA scaled", "LogR raw", "LogR scaled"]
cols = [format(x, "20s") for x in cols]
header = "\t".join(cols)
P = np.random.randn(7696, 64)
print(header)
for subj, conts in per_subj_data.items():
    n_folds = 5

    features = []
    labels = []
    groups = []
    for part in [
        conts["calibration"],
        conts["held_singles"],
        conts["pulsed_singles"],
        conts["doubles"],
    ]:
        x = part["features"]
        y = part["2d_labels"]
        x, y = shuffle_together(x, y)
        features.append(x)
        labels.append(y)
        groups.append(np.arange(len(x)) % n_folds)
    features = np.concatenate(features)
    labels = np.concatenate(labels)
    groups = np.concatenate(groups)

    # Try LDA raw
    def get_model():
        return LDA()

    model = ParallelA(get_model(), get_model())
    stratified_scores = cross_validate(
        model, X=features, y=labels, scoring=scorer, cv=GroupKFold(n_splits=n_folds), groups=groups, n_jobs=-1
    )
    lda_res_raw = f"{stratified_scores['test_score'].mean():.2f} ± {stratified_scores['test_score'].std():.2f}"

    # Try LDA scaled
    def get_model():
        return make_pipeline(StandardScaler(), LDA())

    model = ParallelA(get_model(), get_model())
    stratified_scores = cross_validate(
        model, X=features, y=labels, scoring=scorer, cv=GroupKFold(n_splits=n_folds), groups=groups, n_jobs=-1
    )
    lda_res_scaled = f"{stratified_scores['test_score'].mean():.2f} ± {stratified_scores['test_score'].std():.2f}"

    # Try LogR raw
    def get_model():
        return LogR(class_weight="balanced", n_jobs=-1)

    model = ParallelA(get_model(), get_model())
    stratified_scores = cross_validate(
        model, X=features, y=labels, scoring=scorer, cv=GroupKFold(n_splits=n_folds), groups=groups, n_jobs=-1
    )
    logr_res_raw = f"{stratified_scores['test_score'].mean():.2f} ± {stratified_scores['test_score'].std():.2f}"

    # Try LogR scaled
    def get_model():
        return make_pipeline(StandardScaler(), LogR(class_weight="balanced", n_jobs=-1))

    model = ParallelA(get_model(), get_model())
    stratified_scores = cross_validate(
        model, X=features, y=labels, scoring=scorer, cv=GroupKFold(n_splits=n_folds), groups=groups, n_jobs=-1
    )
    logr_res_scaled = f"{stratified_scores['test_score'].mean():.2f} ± {stratified_scores['test_score'].std():.2f}"
    # raw_scores = cross_validate(model, X=features, y=labels, scoring=scorer, cv=5, n_jobs=-1)
    # raw_res_str = f"{raw_scores['test_score'].mean():.2f} ± {raw_scores['test_score'].std():.2f}"
    # print(f"{subj=}, stratified_score={stratified_res_str}, raw_score={raw_res_str}")

    cols = [subj, lda_res_raw, lda_res_scaled, logr_res_raw, logr_res_scaled]
    cols = [format(x, "20s") for x in cols]
    row = "\t".join(cols)
    print(row)

Subject             	LDA raw             	LDA scaled          	LogR raw            	LogR scaled         
Sprint7_Subj_001    	0.67 ± 0.05         	0.67 ± 0.05         	0.49 ± 0.03         	0.77 ± 0.03         
Sprint7_Subj_002    	0.64 ± 0.04         	0.64 ± 0.04         	0.41 ± 0.02         	0.66 ± 0.05         
Sprint7_Subj_003    	0.50 ± 0.03         	0.50 ± 0.03         	0.38 ± 0.03         	0.57 ± 0.02         
Sprint7_Subj_004    	0.51 ± 0.04         	0.51 ± 0.04         	0.31 ± 0.02         	0.57 ± 0.04         
Sprint7_Subj_005    	0.58 ± 0.01         	0.58 ± 0.01         	0.44 ± 0.03         	0.67 ± 0.02         
Sprint7_Subj_007    	0.69 ± 0.02         	0.69 ± 0.02         	0.61 ± 0.04         	0.78 ± 0.01         
Sprint7_Subj_008    	0.66 ± 0.03         	0.66 ± 0.03         	0.50 ± 0.03         	0.76 ± 0.02         
Sprint7_Subj_009    	0.57 ± 0.03         	0.57 ± 0.03         	0.36 ± 0.01         	0.66 ± 0.03         
Sprint7_Subj_010    	0.46 ± 0.01         	0.46 ± 0.01  