### All EEG + Behavior Features for AI1 Suggestion

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.preprocessing import LabelEncoder, label_binarize
from imblearn.over_sampling import SMOTE
from sklearn.impute import SimpleImputer

# --- LOAD YOUR DATA ---
eeg_file_ai_image_shown = 'EEG_features/EEG_features_AI_image_1_shown_full_eeg_behavior.csv'
eeg_file_ai_decision_making = 'EEG_features/EEG_features_AI_image_1_decision_making_full_eeg_behavior.csv'

df_eeg_ai_image_shown = pd.read_csv(eeg_file_ai_image_shown)
df_eeg_ai_decision_making = pd.read_csv(eeg_file_ai_decision_making)

# --- SELECT BEHAVIORAL FEATURES ---
behavior_features = ['SecondsTarget1', 'Correct_1', 'AI1 Correct']

# Ensure all feature columns are numeric
eeg_features_ai_image_shown = df_eeg_ai_image_shown.columns.difference(['ConfidenceLabel'] + behavior_features)
eeg_features_ai_decision_making = df_eeg_ai_decision_making.columns.difference(['ConfidenceLabel'] + behavior_features)

df_eeg_ai_image_shown[eeg_features_ai_image_shown] = df_eeg_ai_image_shown[eeg_features_ai_image_shown].apply(pd.to_numeric, errors='coerce')
df_eeg_ai_decision_making[eeg_features_ai_decision_making] = df_eeg_ai_decision_making[eeg_features_ai_decision_making].apply(pd.to_numeric, errors='coerce')

# --- COMBINE EEG FEATURES FROM BOTH EVENTS + BEHAVIOR ---
combined_features = pd.concat([
    df_eeg_ai_image_shown[eeg_features_ai_image_shown].reset_index(drop=True),
    df_eeg_ai_decision_making[eeg_features_ai_decision_making].reset_index(drop=True),
    df_eeg_ai_image_shown[behavior_features].reset_index(drop=True),
], axis=1)

# Use 'ConfidenceLabel' from "AI Image 1 Shown" as target
y = df_eeg_ai_image_shown['ConfidenceLabel'].reset_index(drop=True)

# Impute missing values
imputer = SimpleImputer(strategy='mean')
X = pd.DataFrame(imputer.fit_transform(combined_features), columns=combined_features.columns)

# --- CATEGORIZE CONFIDENCE (edit as needed for your binning) ---
def categorize_confidence(confidence_level):
    # Example: for scale 1-6 (change if your scale differs)
    if confidence_level in [1, 2]:
        return 'Low'
    elif confidence_level in [3, 4]:
        return 'Medium'
    elif confidence_level in [5, 6]:
        return 'High'
    else:
        return np.nan
y_grouped = y.apply(categorize_confidence)

# Drop missing
mask = ~y_grouped.isna()
X = X.loc[mask].reset_index(drop=True)
y_grouped = y_grouped.loc[mask].reset_index(drop=True)

# Label encode
le = LabelEncoder()
le.classes_ = np.array(['Low', 'Medium', 'High'])
y_encoded = le.transform(y_grouped)

# --- NESTED CROSS-VALIDATION (4 outer, 3 inner) ---
outer_cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=42)
outer_accuracies = []
outer_aucs = []

for outer_train_idx, outer_test_idx in outer_cv.split(X, y_encoded):
    X_outer_train, X_outer_test = X.iloc[outer_train_idx], X.iloc[outer_test_idx]
    y_outer_train, y_outer_test = y_encoded[outer_train_idx], y_encoded[outer_test_idx]

    # --- Inner cross-validation for hyperparameter tuning ---
    inner_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    param_grid = [
        {'C': 0.1, 'kernel': 'linear', 'gamma': 'scale'},
        {'C': 1, 'kernel': 'linear', 'gamma': 'scale'},
        {'C': 10, 'kernel': 'linear', 'gamma': 'scale'},
        {'C': 0.1, 'kernel': 'rbf', 'gamma': 'scale'},
        {'C': 1, 'kernel': 'rbf', 'gamma': 'scale'},
        {'C': 10, 'kernel': 'rbf', 'gamma': 'scale'},
        {'C': 0.1, 'kernel': 'rbf', 'gamma': 'auto'},
        {'C': 1, 'kernel': 'rbf', 'gamma': 'auto'},
        {'C': 10, 'kernel': 'rbf', 'gamma': 'auto'},
    ]
    best_score = -np.inf
    best_params = None

    for params in param_grid:
        val_scores = []
        for inner_train_idx, inner_val_idx in inner_cv.split(X_outer_train, y_outer_train):
            X_inner_train, X_inner_val = X_outer_train.iloc[inner_train_idx], X_outer_train.iloc[inner_val_idx]
            y_inner_train, y_inner_val = y_outer_train[inner_train_idx], y_outer_train[inner_val_idx]
            # === SMOTE only on inner training set ===
            smote = SMOTE(random_state=42)
            X_inner_train_res, y_inner_train_res = smote.fit_resample(X_inner_train, y_inner_train)
            model = SVC(C=params['C'], kernel=params['kernel'], gamma=params['gamma'],
                        probability=True, random_state=42)
            model.fit(X_inner_train_res, y_inner_train_res)
            y_val_pred = model.predict(X_inner_val)
            acc = accuracy_score(y_inner_val, y_val_pred)
            val_scores.append(acc)
        avg_val_score = np.mean(val_scores)
        if avg_val_score > best_score:
            best_score = avg_val_score
            best_params = params

    # --- Retrain on full outer train set with best params, SMOTE again only on train ---
    smote = SMOTE(random_state=42)
    X_outer_train_res, y_outer_train_res = smote.fit_resample(X_outer_train, y_outer_train)
    final_model = SVC(**best_params, probability=True, random_state=42)
    final_model.fit(X_outer_train_res, y_outer_train_res)

    # --- Test on outer test fold ---
    y_outer_pred = final_model.predict(X_outer_test)
    acc = accuracy_score(y_outer_test, y_outer_pred)
    outer_accuracies.append(acc)

    # Macro AUC
    y_test_bin = label_binarize(y_outer_test, classes=[0, 1, 2])
    y_proba = final_model.predict_proba(X_outer_test)
    auc = roc_auc_score(y_test_bin, y_proba, average="macro", multi_class="ovr")
    outer_aucs.append(auc)

    print(f"Outer fold accuracy: {acc:.4f}, AUC: {auc:.4f}, Params: {best_params}")

print(f"\nMean accuracy across outer folds: {np.mean(outer_accuracies):.4f} ± {np.std(outer_accuracies):.4f}")
print(f"Mean macro AUC across outer folds: {np.mean(outer_aucs):.4f} ± {np.std(outer_aucs):.4f}")


### EEG Feature AI1 suggestion + Behavior AI1 suggestion

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.preprocessing import LabelEncoder, label_binarize
from imblearn.over_sampling import SMOTE

# Load the EEG features and metadata CSV file
eeg_file_path = 'EEG_features_AI_image_1_decision_making_full_eeg_behavior.csv'
df_eeg = pd.read_csv(eeg_file_path)

# Step 1: Ensure all feature columns are numeric
eeg_features = df_eeg.columns.difference(['ConfidenceLabel'])
df_eeg[eeg_features] = df_eeg[eeg_features].apply(pd.to_numeric, errors='coerce')
df_eeg = df_eeg.dropna()

# Step 2: Prepare Data (Features and Labels)
X = df_eeg[eeg_features]
y = df_eeg['ConfidenceLabel']

# --- Categorize confidence (edit as needed for your Likert scale) ---
def categorize_confidence(confidence_level):
    if confidence_level in [1, 2]:
        return 'Low'
    elif confidence_level in [3, 4]:
        return 'Medium'
    elif confidence_level in [5, 6]:
        return 'High'
    else:
        return np.nan

y_grouped = y.apply(categorize_confidence)
mask = ~y_grouped.isna()
X = X.loc[mask].reset_index(drop=True)
y_grouped = y_grouped.loc[mask].reset_index(drop=True)

le = LabelEncoder()
le.classes_ = np.array(['Low', 'Medium', 'High'])
y_encoded = le.transform(y_grouped)

# --- NESTED CROSS-VALIDATION (4 outer, 3 inner) ---
outer_cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=42)
outer_accuracies = []
outer_aucs = []

for outer_train_idx, outer_test_idx in outer_cv.split(X, y_encoded):
    X_outer_train, X_outer_test = X.iloc[outer_train_idx], X.iloc[outer_test_idx]
    y_outer_train, y_outer_test = y_encoded[outer_train_idx], y_encoded[outer_test_idx]

    # --- Inner CV for hyperparameter tuning ---
    inner_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    param_grid = [
        {'C': 0.1, 'kernel': 'linear', 'gamma': 'scale'},
        {'C': 1, 'kernel': 'linear', 'gamma': 'scale'},
        {'C': 10, 'kernel': 'linear', 'gamma': 'scale'},
        {'C': 0.1, 'kernel': 'rbf', 'gamma': 'scale'},
        {'C': 1, 'kernel': 'rbf', 'gamma': 'scale'},
        {'C': 10, 'kernel': 'rbf', 'gamma': 'scale'},
        {'C': 0.1, 'kernel': 'rbf', 'gamma': 'auto'},
        {'C': 1, 'kernel': 'rbf', 'gamma': 'auto'},
        {'C': 10, 'kernel': 'rbf', 'gamma': 'auto'},
    ]
    best_score = -np.inf
    best_params = None

    for params in param_grid:
        val_scores = []
        for inner_train_idx, inner_val_idx in inner_cv.split(X_outer_train, y_outer_train):
            X_inner_train, X_inner_val = X_outer_train.iloc[inner_train_idx], X_outer_train.iloc[inner_val_idx]
            y_inner_train, y_inner_val = y_outer_train[inner_train_idx], y_outer_train[inner_val_idx]
            # SMOTE only on inner train
            smote = SMOTE(random_state=42)
            X_inner_train_res, y_inner_train_res = smote.fit_resample(X_inner_train, y_inner_train)
            model = SVC(C=params['C'], kernel=params['kernel'], gamma=params['gamma'],
                        probability=True, random_state=42)
            model.fit(X_inner_train_res, y_inner_train_res)
            y_val_pred = model.predict(X_inner_val)
            acc = accuracy_score(y_inner_val, y_val_pred)
            val_scores.append(acc)
        avg_val_score = np.mean(val_scores)
        if avg_val_score > best_score:
            best_score = avg_val_score
            best_params = params

    # Retrain on full outer train with best params, SMOTE only on train
    smote = SMOTE(random_state=42)
    X_outer_train_res, y_outer_train_res = smote.fit_resample(X_outer_train, y_outer_train)
    final_model = SVC(**best_params, probability=True, random_state=42)
    final_model.fit(X_outer_train_res, y_outer_train_res)

    # Test on outer test fold
    y_outer_pred = final_model.predict(X_outer_test)
    acc = accuracy_score(y_outer_test, y_outer_pred)
    outer_accuracies.append(acc)

    # Macro AUC
    y_test_bin = label_binarize(y_outer_test, classes=[0, 1, 2])
    y_proba = final_model.predict_proba(X_outer_test)
    auc = roc_auc_score(y_test_bin, y_proba, average="macro", multi_class="ovr")
    outer_aucs.append(auc)

    print(f"Outer fold accuracy: {acc:.4f}, AUC: {auc:.4f}, Params: {best_params}")

print(f"\nMean accuracy across outer folds: {np.mean(outer_accuracies):.4f} ± {np.std(outer_accuracies):.4f}")
print(f"Mean macro AUC across outer folds: {np.mean(outer_aucs):.4f} ± {np.std(outer_aucs):.4f}")


### EEG Feature AI2 suggestion + Behavior AI2 suggestion

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.preprocessing import LabelEncoder, label_binarize
from imblearn.over_sampling import SMOTE

# Load the EEG features and metadata CSV file
eeg_file_path = 'EEG_features_AI_image_1_decision_making_full_eeg_behavior.csv'
df_eeg = pd.read_csv(eeg_file_path)

# Step 1: Ensure all feature columns are numeric
eeg_features = df_eeg.columns.difference(['ConfidenceLabel'])
df_eeg[eeg_features] = df_eeg[eeg_features].apply(pd.to_numeric, errors='coerce')
df_eeg = df_eeg.dropna()

# Step 2: Prepare Data (Features and Labels)
X = df_eeg[eeg_features]
y = df_eeg['ConfidenceLabel']

# --- Categorize confidence (edit as needed for your Likert scale) ---
def categorize_confidence(confidence_level):
    if confidence_level in [1, 2]:
        return 'Low'
    elif confidence_level in [3, 4]:
        return 'Medium'
    elif confidence_level in [5, 6]:
        return 'High'
    else:
        return np.nan

y_grouped = y.apply(categorize_confidence)
mask = ~y_grouped.isna()
X = X.loc[mask].reset_index(drop=True)
y_grouped = y_grouped.loc[mask].reset_index(drop=True)

le = LabelEncoder()
le.classes_ = np.array(['Low', 'Medium', 'High'])
y_encoded = le.transform(y_grouped)

# --- NESTED CROSS-VALIDATION (4 outer, 3 inner) ---
outer_cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=42)
outer_accuracies = []
outer_aucs = []

for outer_train_idx, outer_test_idx in outer_cv.split(X, y_encoded):
    X_outer_train, X_outer_test = X.iloc[outer_train_idx], X.iloc[outer_test_idx]
    y_outer_train, y_outer_test = y_encoded[outer_train_idx], y_encoded[outer_test_idx]

    # --- Inner CV for hyperparameter tuning ---
    inner_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    param_grid = [
        {'C': 0.1, 'kernel': 'linear', 'gamma': 'scale'},
        {'C': 1, 'kernel': 'linear', 'gamma': 'scale'},
        {'C': 10, 'kernel': 'linear', 'gamma': 'scale'},
        {'C': 0.1, 'kernel': 'rbf', 'gamma': 'scale'},
        {'C': 1, 'kernel': 'rbf', 'gamma': 'scale'},
        {'C': 10, 'kernel': 'rbf', 'gamma': 'scale'},
        {'C': 0.1, 'kernel': 'rbf', 'gamma': 'auto'},
        {'C': 1, 'kernel': 'rbf', 'gamma': 'auto'},
        {'C': 10, 'kernel': 'rbf', 'gamma': 'auto'},
    ]
    best_score = -np.inf
    best_params = None

    for params in param_grid:
        val_scores = []
        for inner_train_idx, inner_val_idx in inner_cv.split(X_outer_train, y_outer_train):
            X_inner_train, X_inner_val = X_outer_train.iloc[inner_train_idx], X_outer_train.iloc[inner_val_idx]
            y_inner_train, y_inner_val = y_outer_train[inner_train_idx], y_outer_train[inner_val_idx]
            # SMOTE only on inner train
            smote = SMOTE(random_state=42)
            X_inner_train_res, y_inner_train_res = smote.fit_resample(X_inner_train, y_inner_train)
            model = SVC(C=params['C'], kernel=params['kernel'], gamma=params['gamma'],
                        probability=True, random_state=42)
            model.fit(X_inner_train_res, y_inner_train_res)
            y_val_pred = model.predict(X_inner_val)
            acc = accuracy_score(y_inner_val, y_val_pred)
            val_scores.append(acc)
        avg_val_score = np.mean(val_scores)
        if avg_val_score > best_score:
            best_score = avg_val_score
            best_params = params

    # Retrain on full outer train with best params, SMOTE only on train
    smote = SMOTE(random_state=42)
    X_outer_train_res, y_outer_train_res = smote.fit_resample(X_outer_train, y_outer_train)
    final_model = SVC(**best_params, probability=True, random_state=42)
    final_model.fit(X_outer_train_res, y_outer_train_res)

    # Test on outer test fold
    y_outer_pred = final_model.predict(X_outer_test)
    acc = accuracy_score(y_outer_test, y_outer_pred)
    outer_accuracies.append(acc)

    # Macro AUC
    y_test_bin = label_binarize(y_outer_test, classes=[0, 1, 2])
    y_proba = final_model.predict_proba(X_outer_test)
    auc = roc_auc_score(y_test_bin, y_proba, average="macro", multi_class="ovr")
    outer_aucs.append(auc)

    print(f"Outer fold accuracy: {acc:.4f}, AUC: {auc:.4f}, Params: {best_params}")

print(f"\nMean accuracy across outer folds: {np.mean(outer_accuracies):.4f} ± {np.std(outer_accuracies):.4f}")
print(f"Mean macro AUC across outer folds: {np.mean(outer_aucs):.4f} ± {np.std(outer_aucs):.4f}")
