In [1]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [2]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
import pandas as pd
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import (
    RandomForestClassifier,
    GradientBoostingClassifier,
    AdaBoostClassifier,
    ExtraTreesClassifier
)

In [3]:
file_path = '/content/participant_wise_combined.csv'
df = pd.read_csv(file_path)

print(df.head())


   unnamed: 0  travel_time subject_id environment    id  trait_anx gender  \
0           2            3         p2          e1  p2e1         40      F   
1           3            3         p2          e2  p2e2         40      F   
2           4            3         p3          e1  p3e1         45      M   
3           5            3         p3          e2  p3e2         45      M   
4           6            3         p4          e1  p4e1         58      M   

  trait_anx_level    pre_saa   post_saa  ...   SD_Ch20   SD_Ch21   SD_Ch22  \
0             low  67.926691  33.242639  ...  2.205121  2.323949  2.448438   
1             low  67.926691  33.242639  ...  2.122518  2.236502  2.356954   
2            high   2.617566   3.516061  ...  1.968179  2.075824  2.174238   
3            high   2.617566   3.516061  ...  2.039863  2.154719  2.251570   
4            high   3.652220   4.352831  ...  1.761801  1.860421  1.987060   

    SD_Ch23   SD_Ch24   SD_Ch25   SD_Ch26  F3_F4_theta  F3_Fz_theta 

In [6]:
X = df[['F3_F4_theta',	'F3_Fz_theta',	'F4_Fz_theta']]
y = df['trait_anx_level'].map({'low': 0, 'high': 1})

Train-test split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

Train a classifier & evaluate

In [8]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(random_state=42),
    "SVM": SVC(),
    "Naive Bayes": GaussianNB(),
    "KNN": KNeighborsClassifier(),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(random_state=42),
    "AdaBoost": AdaBoostClassifier(random_state=42),
    "Extra Trees": ExtraTreesClassifier(random_state=42),
    "MLP Neural Net": MLPClassifier(max_iter=1000, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    "LightGBM": LGBMClassifier(),
    "CatBoost": CatBoostClassifier(verbose=0)
}

accuracy_list = []

for name, model in models.items():
    print(f"\n🔍 Model: {name}")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print(classification_report(y_test, y_pred))
    scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
    mean_acc = scores.mean()
    print(f"📊 Cross-validated accuracy: {mean_acc:.4f}")
    accuracy_list.append((name, mean_acc))



🔍 Model: Logistic Regression
              precision    recall  f1-score   support

           0       0.55      1.00      0.71        11
           1       0.00      0.00      0.00         9

    accuracy                           0.55        20
   macro avg       0.28      0.50      0.35        20
weighted avg       0.30      0.55      0.39        20

📊 Cross-validated accuracy: 0.5600

🔍 Model: Random Forest


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.57      0.73      0.64        11
           1       0.50      0.33      0.40         9

    accuracy                           0.55        20
   macro avg       0.54      0.53      0.52        20
weighted avg       0.54      0.55      0.53        20

📊 Cross-validated accuracy: 0.4900

🔍 Model: SVM
              precision    recall  f1-score   support

           0       0.61      1.00      0.76        11
           1       1.00      0.22      0.36         9

    accuracy                           0.65        20
   macro avg       0.81      0.61      0.56        20
weighted avg       0.79      0.65      0.58        20

📊 Cross-validated accuracy: 0.4900

🔍 Model: Naive Bayes
              precision    recall  f1-score   support

           0       0.62      0.91      0.74        11
           1       0.75      0.33      0.46         9

    accuracy                           0.65        20
   macro avg       0.6

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.



              precision    recall  f1-score   support

           0       0.45      0.45      0.45        11
           1       0.33      0.33      0.33         9

    accuracy                           0.40        20
   macro avg       0.39      0.39      0.39        20
weighted avg       0.40      0.40      0.40        20

📊 Cross-validated accuracy: 0.3800

🔍 Model: LightGBM


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Number of positive: 35, number of negative: 45
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000295 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.437500 -> initscore=-0.251314
[LightGBM] [Info] Start training from score -0.251314
              precision    recall  f1-score   support

           0       0.64      0.82      0.72        11
           1       0.67      0.44      0.53         9

    accuracy                           0.65        20
   macro avg       0.65      0.63      0.63        20
weighted avg       0.65      0.65      0.64        20

[LightGBM] [Info] Number of positive: 36, number of negative: 44
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000019 seconds.
You can set `force_col

In [9]:
summary_df = pd.DataFrame(accuracy_list, columns=['Model', 'Cross-Validated Accuracy'])
print("\n✅ Summary of Model Accuracies:")
print(summary_df.sort_values(by='Cross-Validated Accuracy', ascending=False).reset_index(drop=True))


✅ Summary of Model Accuracies:
                  Model  Cross-Validated Accuracy
0   Logistic Regression                      0.56
1                   KNN                      0.56
2              LightGBM                      0.56
3        MLP Neural Net                      0.56
4         Decision Tree                      0.51
5              CatBoost                      0.51
6              AdaBoost                      0.50
7         Random Forest                      0.49
8                   SVM                      0.49
9           Extra Trees                      0.49
10    Gradient Boosting                      0.49
11          Naive Bayes                      0.48
12              XGBoost                      0.38
