In [None]:
import numpy as np
import pandas as pd
import pickle

import cebra

from data_utils.data_utils import AUDIO_BLOCKS

# Load extracted features and user labels
This example we use EEG + ECG

In [None]:
# Read labels pkl file
with open("./data/replace_me.pkl", "rb") as fp:
    behavioral_labels = pickle.load(fp)

valence_labels, arousal_labels, label_thresholds = (
    behavioral_labels["valence_labels"],
    behavioral_labels["arousal_labels"],
    behavioral_labels["label_thresholds"],
)

with open('./data/eeg_features.pkl', 'rb') as fp:
    eeg_features = pickle.load(fp)
    eeg_features = eeg_features['eeg_features']

with open('./data/ecg_features.pkl', 'rb') as fp:
    ecg_features = pickle.load(fp)
    ecg_features = ecg_features['ecg_features']    

# Model training

## Training dataset prep

In [None]:
from training_utils.dataset import get_consecutive_validation_indexes
from training_utils.dataset import DatasetBuilder

n_trial_per_block = 13
n_step_trial = 3
val_indexes = [
    get_consecutive_validation_indexes(
        len(valence_labels[0]), len(AUDIO_BLOCKS), 1, i, n_step_trial
    )
    for i in range(1, n_trial_per_block, n_step_trial)
]
print(len(val_indexes), val_indexes)

dataset_builder = DatasetBuilder(len(valence_labels[0]), val_indexes_group=val_indexes)
len(valence_labels[0])

## Training helper functions

In [None]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.preprocessing import StandardScaler
from training_utils.embedding import get_embeddings
from sklearn.ensemble import VotingClassifier

class EEGClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, output_dim, n_neighbors):
        self.decoder = cebra.KNNDecoder(n_neighbors=n_neighbors, metric="cosine")
        self.n_neighbors = n_neighbors
        self.output_dim = output_dim

    def fit(self, X, y):
        self.decoder.fit(X[:, : self.output_dim], y)
        return self

    def predict(self, X):
        return self.decoder.predict(X[:, : self.output_dim])

    def predict_proba(self, X):
        return self.decoder.knn.predict_proba(X[:, : self.output_dim])


class ECGClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, eeg_output_dim, n_neighbors):
        self.decoder = cebra.KNNDecoder(n_neighbors=n_neighbors, metric="cosine")
        self.n_neighbors = n_neighbors
        self.eeg_output_dim = eeg_output_dim

    def fit(self, X, y):
        self.decoder.fit(X[:, self.eeg_output_dim :], y)
        return self

    def predict(self, X):
        return self.decoder.predict(X[:, self.eeg_output_dim :])

    def predict_proba(self, X):
        return self.decoder.knn.predict_proba(X[:, self.eeg_output_dim :])


def _extract_embedding_features(
    train_data, val_data, train_labels, method, output_dim, max_iterations
):
    standard_scaler = StandardScaler()
    embedding = standard_scaler.fit_transform(train_data)
    val_embedding = standard_scaler.transform(val_data)
    if method != "NONE":
        embedding, val_embedding = get_embeddings(
            train_data=embedding,
            val_data=val_embedding,
            train_labels=train_labels,
            use_pca=(method == "PCA"),
            out_dim=output_dim,
            num_hidden_units=256,
            max_iterations=max_iterations,
        )
    return embedding, val_embedding


def run_knn_classifier(
    dataset,
    modality_method,
    threshold,
    param_config,
    modality,
    voting_weights=None,
    max_iteration=10,
):
    eeg_param_config, modality_param_config = (
        param_config["EEG"],
        param_config[modality],
    )
    y_pred, y_pred_cat = [], []
    for _, (train_data, train_labels, val_data, _) in enumerate(dataset):
        eeg_embedding, eeg_val_embedding = _extract_embedding_features(
            train_data[0],
            val_data[0],
            train_labels,
            modality_method,
            eeg_param_config["dim"],
            max_iteration,
        )
        phy_embedding, phy_val_embedding = _extract_embedding_features(
            train_data[1],
            val_data[1],
            train_labels,
            modality_method,
            modality_param_config["dim"],
            max_iteration,
        )

        X_train = np.hstack((eeg_embedding, phy_embedding))
        X_test = np.hstack((eeg_val_embedding, phy_val_embedding))
        train_labels_cat = [0 if p < threshold else 1 for p in train_labels]
        # 4. Train the decoder on training embedding and labels
        decoder = VotingClassifier(
            voting="soft",
            estimators=[
                (
                    "EEG",
                    EEGClassifier(
                        eeg_embedding.shape[-1], eeg_param_config["n_neighbors"]
                    ),
                ),
                (
                    modality,
                    ECGClassifier(  # note that it is EEG dim but not ECG
                        # (TODO) find a less confusing way
                        eeg_embedding.shape[-1],
                        modality_param_config["n_neighbors"],
                    ),
                ),
            ],
            weights=voting_weights,
        )

        decoder.fit(X_train, np.array(train_labels_cat))
        prediction = decoder.predict(X_test)
        y_pred_cat.append(prediction)

    return y_pred, y_pred_cat

## Run cross validation with decision funsion 
This section runs with a voting KNN classifier with EEG and ECG features. 
Please look into eeg_feature_analysis.ipynb for evaluation example

In [None]:
from features.labels import get_label_category

subject_accuracy_summary = {
    "subject": [],
    "label_type": [],
    #'feature_fusion_n_neighbor': [],
    "eeg_weight": [],
    "ecg_weight": [],
    "cv_mean_score": [],
}

### CHANGE ME
modality_method = "PCA"
modality = "ECG"
all_modalities = ["EGG", modality]

# All paramaters below can be fine tuned for better performance
esimator_param = {
    k: {
        "dim": 12, # dim after PCA/CEBRA reduction
        "n_neighbors": 2, # number of neighbors in KNN
    }
    for k in all_modalities
}
# How much vosting weight for each modality.
voting_weights = (1, 1)
#########

for idx in range(len(ecg_features)):
    print("decoding process...", idx)

    v_thred, a_thred = label_thresholds[idx]
    eegf, physiof = eeg_features[idx], ecg_features[idx]
    for lt in ["valence", "arousal"]:
        labels = valence_labels[idx] if lt == "valence" else arousal_labels[idx]
        thred = v_thred if lt == "valence" else a_thred

        dataset = dataset_builder.train_test_split(
            [eegf, physiof], labels, dataset_builder
        )

        val_true_cat = [
            get_label_category(val_labels, lt, v_thred, a_thred)
            for _, (_, _, _, val_labels) in enumerate(dataset)
        ]

        mean_acc, best_param = run_knn_classifier(
            dataset,
            modality_method,
            thred,
            val_true_cat,
            esimator_param,
            modality,
            voting_weights=voting_weights,
        )
        print(mean_acc, best_param)
        subject_accuracy_summary["subject"].append(idx)
        subject_accuracy_summary["cv_mean_score"].append(mean_acc)
        subject_accuracy_summary["label_type"].append(lt)
        subject_accuracy_summary["eeg_weight"].append(best_param["weights"][0])
        subject_accuracy_summary["ecg_weight"].append(best_param["weights"][1])

subject_accuracy_summary = pd.DataFrame(subject_accuracy_summary)
subject_accuracy_summary["subject"] = subject_accuracy_summary["subject"].astype(int)