In [None]:
from sklearn.linear_model import LogisticRegression
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, cohen_kappa_score
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
import xgboost as xgb
import warnings

In [2]:
# Load the dataset
data = pd.read_csv('binary-features-labels.csv')

# Separate features and labels
X = data.iloc[:, 3:-1]  # Exclude the first three columns and the last column
y = data['Labels']

# Participant Independet Model

## Data Splitting

We have a total of 40 subjects. To ensure that our model is trained, validated, and tested, we are splitting these subjects into 32 - 4 - 4

In [3]:
# Randomly select 6 subjects for the test set
np.random.seed(42)  # For reproducibility
test_subjects = np.random.choice(range(1, 41), 6, replace=False)
print('Subjects in the test set:', test_subjects)

# Extract test set
test_set = data[data['file'].str.contains('|'.join([f'sub_{i}' for i in test_subjects]))]

# Extract train set
train_set = data[~data['file'].str.contains('|'.join([f'sub_{i}' for i in test_subjects]))]

# Display the sizes of the datasets
print(f"Dataset size: {data.shape}")
print(f"Train set size: {train_set.shape}")
print(f"Test set size: {test_set.shape}")

Subjects in the test set: [20 17 16 27  5 13]
Dataset size: (15360, 20)
Train set size: (13056, 20)
Test set size: (2304, 20)


In [4]:
# Define features and labels for train, validation, and test sets
X_train = train_set.iloc[:, 3:-1]
y_train = train_set['Labels']

X_test = test_set.iloc[:, 3:-1]
y_test = test_set['Labels']

## Nominal classification via Logistic Regression

In [5]:
# Train the logistic regression model on the combined dataset
model = LogisticRegression(max_iter=1000)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    model.fit(X_train, y_train)

# Predict the labels of the combined validation and test sets
y_test_pred = model.predict(X_test)

# Calculate accuracy, F1 score, and Cohen's kappa
combined_accuracy = accuracy_score(y_test, y_test_pred)
combined_f1 = f1_score(y_test, y_test_pred, average='weighted')
combined_kappa = cohen_kappa_score(y_test, y_test_pred)

print(f"Test Accuracy: {combined_accuracy}")
print(f"Test F1 Score: {combined_f1}")
print(f"Test Cohen's Kappa: {combined_kappa}")


Test Accuracy: 0.7456597222222222
Test F1 Score: 0.6542085401800191
Test Cohen's Kappa: 0.02170283806343909


## Gradient Boosting

In [6]:
# Train the Gradient Boosting model on the training dataset
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
gb_model.fit(X_train, y_train)

# Predict the labels of the test set
y_test_pred_gb = gb_model.predict(X_test)

# Calculate accuracy, F1 score, and Cohen's kappa for the Gradient Boosting model
gb_accuracy = accuracy_score(y_test, y_test_pred_gb)
gb_f1 = f1_score(y_test, y_test_pred_gb, average='weighted')
gb_kappa = cohen_kappa_score(y_test, y_test_pred_gb)

print(f"Gradient Boosting Test Accuracy: {gb_accuracy}")
print(f"Gradient Boosting Test F1 Score: {gb_f1}")
print(f"Gradient Boosting Test Cohen's Kappa: {gb_kappa}")

Gradient Boosting Test Accuracy: 0.7621527777777778
Gradient Boosting Test F1 Score: 0.6792214321626087
Gradient Boosting Test Cohen's Kappa: 0.09121061359867333


## XGBoost

In [7]:
# Define the XGBoost model
xgb_model = xgb.XGBClassifier(objective='multi:softprob', num_class=2, random_state=42)

# Train the XGBoost model on the training dataset
xgb_model.fit(X_train, y_train)

# Predict the labels of the test set
y_test_pred_xgb = xgb_model.predict(X_test)

if y_test_pred_xgb.ndim == 2:
        y_test_pred_xgb = np.argmax(y_test_pred_xgb, axis=1)  # Convert probabilities to class labels

# Calculate accuracy, F1 score, and Cohen's kappa for the XGBoost model
xgb_accuracy = accuracy_score(y_test, y_test_pred_xgb)
xgb_f1 = f1_score(y_test, y_test_pred_xgb, average='weighted')
xgb_kappa = cohen_kappa_score(y_test, y_test_pred_xgb)

print(f"XGBoost Test Accuracy: {xgb_accuracy}")
print(f"XGBoost Test F1 Score: {xgb_f1}")
print(f"XGBoost Test Cohen's Kappa: {xgb_kappa}")

XGBoost Test Accuracy: 0.74609375
XGBoost Test F1 Score: 0.711702099428195
XGBoost Test Cohen's Kappa: 0.1812456263121064


## Discriminative Power of the Feature Selected

In [8]:
selected_features = ['rel_gamma_power', 'avg_clustering_coeff', 'degree_entropy', 'am_mean', 'abs_alpha_power']

# Define features and labels for train and test sets using selected features
X_train_selected = X_train[selected_features]
X_test_selected = X_test[selected_features]

# Define the XGBoost model
xgb_model_selected = xgb.XGBClassifier(objective='multi:softprob', num_class=2, random_state=42)

# Train the XGBoost model on the training dataset with selected features
xgb_model_selected.fit(X_train_selected, y_train)

# Predict the labels of the test set with selected features
y_test_pred_xgb_selected = xgb_model_selected.predict(X_test_selected)

if y_test_pred_xgb_selected.ndim == 2:
    y_test_pred_xgb_selected = np.argmax(y_test_pred_xgb_selected, axis=1)  # Convert probabilities to class labels

# Calculate accuracy, F1 score, and Cohen's kappa for the XGBoost model with selected features
xgb_accuracy_selected = accuracy_score(y_test, y_test_pred_xgb_selected)
xgb_f1_selected = f1_score(y_test, y_test_pred_xgb_selected, average='weighted')
xgb_kappa_selected = cohen_kappa_score(y_test, y_test_pred_xgb_selected)

print(f"XGBoost Test Accuracy with Selected Features: {xgb_accuracy_selected}")
print(f"XGBoost Test F1 Score with Selected Features: {xgb_f1_selected}")
print(f"XGBoost Test Cohen's Kappa with Selected Features: {xgb_kappa_selected}")

XGBoost Test Accuracy with Selected Features: 0.7183159722222222
XGBoost Test F1 Score with Selected Features: 0.6695841191418528
XGBoost Test Cohen's Kappa with Selected Features: 0.057371096586782855


# Participand Dependent Model

So far, we trained the model using data from 34 participants and evaluated its predictive performance on the remaining 6 participants. Now, let's build a participant-dependent model, where we train a separate model for each participant using data from their first two trials and test it on the remaining trial for each activity.

## Logistic Regression

In [9]:
# Initialize lists to store metrics for each participant
accuracies = []
f1_scores = []
kappas = []

# Loop through each participant
for subject in range(1, 41):
    print(f"{subject}", end=' ')
    # Extract data for the current subject
    subject_data = data[data['file'].str.contains(f'sub_{subject}_')]
    
    # Split data into training and testing sets based on trial number
    train_data = subject_data[subject_data['file'].str.contains('trial1|trial2')]
    test_data = subject_data[subject_data['file'].str.contains('trial3')]
    
    # Define features and labels for training and testing sets
    X_train = train_data.iloc[:, 3:-1]
    y_train = train_data['Labels']
    X_test = test_data.iloc[:, 3:-1]
    y_test = test_data['Labels']
    
    # Train the logistic regression model
    model = LogisticRegression(max_iter=5000)
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        model.fit(X_train, y_train)
    
    # Predict the labels of the test set
    y_test_pred = model.predict(X_test)
    
    # Calculate accuracy, F1 score, and Cohen's kappa
    accuracy = accuracy_score(y_test, y_test_pred)
    f1 = f1_score(y_test, y_test_pred, average='weighted')
    kappa = cohen_kappa_score(y_test, y_test_pred)
    
    # Append metrics to the lists
    accuracies.append(accuracy)
    f1_scores.append(f1)
    kappas.append(kappa)

print() # Print a newline

# Aggregate metrics
mean_accuracy = np.mean(accuracies)
mean_f1 = np.mean(f1_scores)
mean_kappa = np.mean(kappas)

print(f"Mean Accuracy: {mean_accuracy}")
print(f"Mean F1 Score: {mean_f1}")
print(f"Mean Cohen's Kappa: {mean_kappa}")


1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 
Mean Accuracy: 0.758203125
Mean F1 Score: 0.7364961640066325
Mean Cohen's Kappa: 0.3430289490966902


## Discriminative power of the features selected

In [10]:
# Initialize lists to store metrics for each participant
accuracies_xgb_selected = []
f1_scores_xgb_selected = []
kappas_xgb_selected = []

# Loop through each participant
for subject in range(1, 41):
    print(f"{subject}", end=' ')
    # Extract data for the current subject
    subject_data = data[data['file'].str.contains(f'sub_{subject}_')]
    
    # Split data into training and testing sets based on trial number
    train_data = subject_data[subject_data['file'].str.contains('trial1|trial2')]
    test_data = subject_data[subject_data['file'].str.contains('trial3')]
    
    # Define features and labels for training and testing sets using selected features
    X_train_selected = train_data[selected_features]
    y_train = train_data['Labels']
    X_test_selected = test_data[selected_features]
    y_test = test_data['Labels']
    
    # Define the XGBoost model
    xgb_model_selected = xgb.XGBClassifier(objective='multi:softprob', num_class=2, random_state=42)
    
    # Train the XGBoost model
    xgb_model_selected.fit(X_train_selected, y_train)
    
    # Predict the labels of the test set
    y_test_pred_xgb_selected = xgb_model_selected.predict(X_test_selected)

    if y_test_pred_xgb_selected.ndim == 2:
        y_test_pred_xgb_selected = np.argmax(y_test_pred_xgb_selected, axis=1)  # Convert probabilities to class labels
    
    # Calculate accuracy, F1 score, and Cohen's kappa
    accuracy_xgb_selected = accuracy_score(y_test, y_test_pred_xgb_selected)
    f1_xgb_selected = f1_score(y_test, y_test_pred_xgb_selected, average='weighted')
    kappa_xgb_selected = cohen_kappa_score(y_test, y_test_pred_xgb_selected)
    
    # Append metrics to the lists
    accuracies_xgb_selected.append(accuracy_xgb_selected)
    f1_scores_xgb_selected.append(f1_xgb_selected)
    kappas_xgb_selected.append(kappa_xgb_selected)

print()  # Print a newline
# Aggregate metrics
mean_accuracy_xgb_selected = np.mean(accuracies_xgb_selected)
mean_f1_xgb_selected = np.mean(f1_scores_xgb_selected)
mean_kappa_xgb_selected = np.mean(kappas_xgb_selected)

print(f"Mean Accuracy (XGBoost with Selected Features): {mean_accuracy_xgb_selected}")
print(f"Mean F1 Score (XGBoost with Selected Features): {mean_f1_xgb_selected}")
print(f"Mean Cohen's Kappa (XGBoost with Selected Features): {mean_kappa_xgb_selected}")

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 
Mean Accuracy (XGBoost with Selected Features): 0.729296875
Mean F1 Score (XGBoost with Selected Features): 0.7146568083776572
Mean Cohen's Kappa (XGBoost with Selected Features): 0.2396492589265286


## XGBoost

In [11]:
# Initialize lists to store metrics for each participant
accuracies_xgb = []
f1_scores_xgb = []
kappas_xgb = []

# Loop through each participant
for subject in range(1, 41):
    print(f"{subject}", end=' ')
    # Extract data for the current subject
    subject_data = data[data['file'].str.contains(f'sub_{subject}_')]
    
    # Split data into training and testing sets based on trial number
    train_data = subject_data[subject_data['file'].str.contains('trial1|trial2')]
    test_data = subject_data[subject_data['file'].str.contains('trial3')]
    
    # Define features and labels for training and testing sets
    X_train = train_data.iloc[:, 3:-1]
    y_train = train_data['Labels']
    X_test = test_data.iloc[:, 3:-1]
    y_test = test_data['Labels']
    
    # Define the XGBoost model
    xgb_model = xgb.XGBClassifier(objective='multi:softprob', num_class=2, random_state=42)
    
    # Train the XGBoost model
    xgb_model.fit(X_train, y_train)
    
    # Predict the labels of the test set
    y_test_pred_xgb = xgb_model.predict(X_test)

    if y_test_pred_xgb.ndim == 2:
        y_test_pred_xgb = np.argmax(y_test_pred_xgb, axis=1)  # Convert probabilities to class labels
    
    # Calculate accuracy, F1 score, and Cohen's kappa
    accuracy_xgb = accuracy_score(y_test, y_test_pred_xgb)
    f1_xgb = f1_score(y_test, y_test_pred_xgb, average='weighted')
    kappa_xgb = cohen_kappa_score(y_test, y_test_pred_xgb)
    
    # Append metrics to the lists
    accuracies_xgb.append(accuracy_xgb)
    f1_scores_xgb.append(f1_xgb)
    kappas_xgb.append(kappa_xgb)

print()  # Print a newline
# Aggregate metrics
mean_accuracy_xgb = np.mean(accuracies_xgb)
mean_f1_xgb = np.mean(f1_scores_xgb)
mean_kappa_xgb = np.mean(kappas_xgb)

print(f"Mean Accuracy (XGBoost): {mean_accuracy_xgb}")
print(f"Mean F1 Score (XGBoost): {mean_f1_xgb}")
print(f"Mean Cohen's Kappa (XGBoost): {mean_kappa_xgb}")

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 
Mean Accuracy (XGBoost): 0.7447265625
Mean F1 Score (XGBoost): 0.7187288440154825
Mean Cohen's Kappa (XGBoost): 0.24045863450459723
