In [1]:
import pandas as pd
import numpy as np

# 1. load
df = pd.read_csv('signal.csv')

# 2. integer‐divide the row index by 48 000 to get a group ID
group_id = np.arange(len(df)) // 48000

# 3. group & aggregate
#    – numeric columns (e.g. Tracheal, Mic) will be averaged
#    – if you have binary labels (nasal, resp) you probably want max()
agg = df.groupby(group_id).agg({
    'Tracheal': 'mean',
    'Mic':      'mean',
    'nasal':    'max',
    'resp':     'max',
})

# 4. (optional) reset the index so it’s back to 0,1,2…
agg = agg.reset_index(drop=True)

# 5. save
agg.to_csv('signal_compressed.csv', index=False)


In [3]:
from sklearn.multioutput import MultiOutputClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import xgboost as xgb
import pandas as pd

data = pd.read_csv("signal_compressed.csv")
X = data.drop(['nasal','resp'], axis=1)
y = data[['nasal','resp']]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

base_clf = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model = MultiOutputClassifier(base_clf)

model.fit(X_train, y_train)

# predict
y_pred      = model.predict(X_test)        # shape (n_samples, 2)
y_pred_prob = model.predict_proba(X_test)  # list of two (n_samples,2) arrays

# split predictions
y_n_pred      = y_pred[:,0]
y_n_pred_prob = y_pred_prob[0][:,1]
y_r_pred      = y_pred[:,1]
y_r_pred_prob = y_pred_prob[1][:,1]

# metrics
for name, y_true, y_p, y_pp in [
    ('Nasal',  y_test['nasal'], y_n_pred, y_n_pred_prob),
    ('Respir', y_test['resp'],  y_r_pred, y_r_pred_prob),
]:
    print(f"\n=== {name} ===")
    print("Accuracy: ", accuracy_score(y_true, y_p))
    print("Precision:", precision_score(y_true, y_p))
    print("Recall:   ", recall_score(y_true, y_p))
    print("F1:       ", f1_score(y_true, y_p))
    print("ROC AUC:  ", roc_auc_score(y_true, y_pp))



=== Nasal ===
Accuracy:  0.8652777777777778
Precision: 0.3448275862068966
Recall:    0.11363636363636363
F1:        0.17094017094017094
ROC AUC:   0.60814334004603

=== Respir ===
Accuracy:  0.8416666666666667
Precision: 0.3076923076923077
Recall:    0.07692307692307693
F1:        0.12307692307692308
ROC AUC:   0.5443150599400599


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

