## Comparison of results

This notebook compares the results of pretrained HuBERT + regularized logistic
regression to the results of the paper `Transfer Learning with Real-World Nonverbal Vocalizations from Minimally Speaking Individuals`. We subsample our data to match
the methodology of the paper, then run our classifier. For participants `P01` and `P16`, the model here performs significantly better than the models in the paper.
For other participants, the results are comparable.

In [2]:
import os
from pathlib import Path
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, confusion_matrix
from sklearn.model_selection import KFold, cross_val_predict
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from skopt import BayesSearchCV
from tqdm.notebook import tqdm
import torch

helper_dir = Path(os.getcwd()).parent / "helpers"
sys.path.append(str(helper_dir))
from helpers import get_hubert_features, unweighted_f1

In [3]:
classes = {
    "P01": ["delighted", "dysregulated", "request", "frustrated"],
    "P02": ["delighted", "social", "frustrated", "selftalk"],
    "P03": ["dysregulated", "request", "frustrated", "selftalk"],
    "P05": ["dysregulated", "delighted", "frustrated", "selftalk"],
    "P06": ["delighted", "request", "selftalk", "frustrated", "yes"],
    "P08": ["frustrated", "delighted", "social", "selftalk", "request"],
    "P11": ["delighted", "frustrated", "selftalk", "social"],
    "P16": ["delighted", "frustrated", "selftalk", "social"],
}

In [12]:
res = {}
for participant in classes.keys():
    print(participant)
    hubert_features = get_hubert_features(
        participant, label_list=classes[participant], subsample=True
    )
    X_tr, X_te = hubert_features["X_tr"], hubert_features["X_te"]
    y_tr, y_te = hubert_features["y_tr"], hubert_features["y_te"]
    session_tr, session_te = (
        hubert_features["session_tr"],
        hubert_features["session_te"],
    )
    label_list = hubert_features["label_list"]
    # df = pd.DataFrame(
    #     torch.cat([X_tr, X_te]), columns=[f"feature_{n}" for n in range(768)]
    # )
    # df["label"] = [label_list[n] for n in y_tr] + [label_list[n] for n in y_te]
    # df["session"] = np.concatenate([session_tr, session_te])
    # df = df.loc[df.label.isin(classes[participant])]
    # df_small = pd.DataFrame(
    #     df.groupby(["session", "label"])
    #     .apply(lambda data: data.sample(min(10, len(data))))
    #     .values,
    #     columns=df.columns,
    # )
    # vc = df_small.groupby("label").size()
    # df_subset = pd.DataFrame(
    #     df_small.groupby("label").apply(lambda data: data.sample(vc.min())).values,
    #     columns=df_small.columns,
    # )
    # print(df_subset.label.value_counts())
    # X_subset = df_subset[[f"feature_{n}" for n in range(768)]].values
    # y_subset = np.array([label_list.index(label) for label in df_subset.label])
    X_subset = torch.cat([X_tr, X_te])
    y_subset = torch.cat([y_tr, y_te])
    print(pd.Series([label_list[n] for n in y_subset]).value_counts())
    est = make_pipeline(StandardScaler(), LogisticRegression(max_iter=10**6))
    opt = BayesSearchCV(
        est,
        {
            "logisticregression__C": (1e-4, 5, "log-uniform"),
        },
        n_iter=50,
        cv=KFold(n_splits=10, shuffle=True, random_state=12345),
        scoring="f1_macro",
    )
    opt.fit(X_subset, y_subset)
    C = opt.best_params_["logisticregression__C"]
    print(f"{C=}")
    # Out-of-sample performance
    est = make_pipeline(
        StandardScaler(),
        LogisticRegression(C=C, max_iter=10**6),
    )
    oos_pred_prob = cross_val_predict(
        est,
        X_subset,
        y_subset,
        cv=KFold(
            n_splits=10,
            shuffle=True,
            random_state=1234,
        ),
        method="predict_proba",
    )
    oos_pred = oos_pred_prob.argmax(1)
    score = unweighted_f1(y_subset, oos_pred)
    print("Out-of-sample f1 score:", score)
    res[participant] = score

P01


  0%|          | 0/124 [00:00<?, ?it/s]

delighted       31
dysregulated    31
frustrated      31
request         31
Name: count, dtype: int64
C=0.057336309409118366
Out-of-sample f1 score: 0.7522
P02


  0%|          | 0/136 [00:00<?, ?it/s]

delighted     34
frustrated    34
selftalk      34
social        34
Name: count, dtype: int64
C=0.011337786404440388
Out-of-sample f1 score: 0.4772
P03


  0%|          | 0/100 [00:00<?, ?it/s]

dysregulated    25
frustrated      25
request         25
selftalk        25
Name: count, dtype: int64
C=0.004744372218162122
Out-of-sample f1 score: 0.6072
P05


  0%|          | 0/248 [00:00<?, ?it/s]

delighted       62
dysregulated    62
frustrated      62
selftalk        62
Name: count, dtype: int64
C=0.0013303630764645157
Out-of-sample f1 score: 0.5729
P06


  0%|          | 0/110 [00:00<?, ?it/s]

delighted     22
frustrated    22
request       22
selftalk      22
yes           22
Name: count, dtype: int64
C=0.0010494097133419778
Out-of-sample f1 score: 0.3214
P08


  0%|          | 0/165 [00:00<?, ?it/s]

delighted     33
frustrated    33
request       33
selftalk      33
social        33
Name: count, dtype: int64
C=0.04456512088719514
Out-of-sample f1 score: 0.3558
P11


  0%|          | 0/108 [00:00<?, ?it/s]

delighted     27
frustrated    27
selftalk      27
social        27
Name: count, dtype: int64
C=0.011981987662126424
Out-of-sample f1 score: 0.4815
P16


  0%|          | 0/188 [00:00<?, ?it/s]

delighted     47
frustrated    47
selftalk      47
social        47
Name: count, dtype: int64
C=0.014083129952371255
Out-of-sample f1 score: 0.7217


In [13]:
print(pd.Series(res))

P01    0.7522
P02    0.4772
P03    0.6072
P05    0.5729
P06    0.3214
P08    0.3558
P11    0.4815
P16    0.7217
dtype: float64
