In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/preprocessed-raw-mat-csv/mat-csv-actual/mat-csv-actual/csv_files_from_mat2/02010030rest 20160324 1054..csv
/kaggle/input/preprocessed-raw-mat-csv/mat-csv-actual/mat-csv-actual/csv_files_from_mat2/02020025rest 20150713 1519..csv
/kaggle/input/preprocessed-raw-mat-csv/mat-csv-actual/mat-csv-actual/csv_files_from_mat2/02010013rest 20150703 1333..csv
/kaggle/input/preprocessed-raw-mat-csv/mat-csv-actual/mat-csv-actual/csv_files_from_mat2/02020016rest 20150701 1040..csv
/kaggle/input/preprocessed-raw-mat-csv/mat-csv-actual/mat-csv-actual/csv_files_from_mat2/02020015_rest 20150630 1527.csv
/kaggle/input/preprocessed-raw-mat-csv/mat-csv-actual/mat-csv-actual/csv_files_from_mat2/02010022restnew 20150724 14.csv
/kaggle/input/preprocessed-raw-mat-csv/mat-csv-actual/mat-csv-actual/csv_files_from_mat2/02020027rest 20150713 1049..csv
/kaggle/input/preprocessed-raw-mat-csv/mat-csv-actual/mat-csv-actual/csv_files_from_mat2/02010008_rest 20150619 1653.csv
/kaggle/input/preprocessed-raw-m

# Import Libraries 

In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, roc_curve, roc_auc_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

#  Load and Preprocess Data

In [3]:
# Set path to your EEG CSV files
directory_path = '/kaggle/input/preprocessed-raw-mat-csv/mat-csv-actual/mat-csv-actual/csv_files_from_mat2'

# Load and concatenate data
combined_data = pd.DataFrame()
for filename in os.listdir(directory_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(directory_path, filename)
        data = pd.read_csv(file_path)
        combined_data = pd.concat([combined_data, data], ignore_index=True)

# Drop unnecessary columns
combined_data.drop(columns=[col for col in ['time', 'condition'] if col in combined_data.columns], inplace=True)

# Map epochs to binary
combined_data['epoch'] = combined_data['epoch'].map({1: 0, 2: 1})
combined_data.dropna(subset=['epoch'], inplace=True)

# Features and labels
X = combined_data.drop(columns=['epoch'])
y = combined_data['epoch']

if y.nunique() <= 1:
    raise ValueError("Only one class present in the target.")

# Define models and custom split ratios

In [4]:
# Define models and train-test split ratios
models = {
    'KNN': KNeighborsClassifier(n_neighbors=5),
    'SVM': SVC(probability=True),
    'Random Forest': RandomForestClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    'Logistic Regression': LogisticRegression(),
    'LDA': LinearDiscriminantAnalysis(),
    'Lasso': LogisticRegression(penalty='l1', solver='liblinear')
}

split_ratios = {
    'KNN': 0.1,
    'SVM': 0.2,
    'Random Forest': 0.2,
    'Decision Tree': 0.1,
    'XGBoost': 0.2,
    'Logistic Regression': 0.3,
    'LDA': 0.2,
    'Lasso': 0.1
}

# Prepare plots

In [5]:
# Prepare plots
plt.figure(figsize=(10, 8))
colors = ['red', 'blue', 'green', 'orange', 'purple', 'brown', 'cyan', 'magenta']

# Confusion matrix setup
fig_cm, axes_cm = plt.subplots(3, 3, figsize=(18, 14))
axes_cm = axes_cm.ravel()

# Directory to save outputs
output_dir = "/kaggle/working"
os.makedirs(output_dir, exist_ok=True)

for i, (model_name, model) in enumerate(models.items()):
    test_size = split_ratios[model_name]
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, stratify=y, random_state=42)

    # Standardization
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Train model
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]

    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    report_text = classification_report(y_test, y_pred)

    # ROC Curve
    fpr, tpr, _ = roc_curve(y_test, y_proba)
    roc_auc = roc_auc_score(y_test, y_proba)
    plt.plot(fpr, tpr, label=f'{model_name} (AUC = {roc_auc:.2f})', color=colors[i % len(colors)])

    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Non-Depressed', 'Depressed'],
                yticklabels=['Non-Depressed', 'Depressed'],
                ax=axes_cm[i])
    axes_cm[i].set_title(f'{model_name} (Acc: {acc:.2f})')
    axes_cm[i].set_xlabel('Predicted')
    axes_cm[i].set_ylabel('Actual')

    # Save classification report as image
    fig_report, ax_report = plt.subplots(figsize=(6, 4))
    ax_report.axis('off')
    ax_report.text(0, 1, f'{model_name} Classification Report\n\n{report_text}',
                   fontsize=10, va='top', ha='left', family='monospace')
    report_path = os.path.join(output_dir, f"{model_name}_classification_report.png")
    fig_report.tight_layout()
    fig_report.savefig(report_path)
    plt.close(fig_report)

    print(f"Saved classification report for {model_name} to: {report_path}")

# Save confusion matrix image
fig_cm.suptitle("Confusion Matrices", fontsize=16)
plt.tight_layout()
conf_matrix_path = os.path.join(output_dir, "confusion_matrices_custom_split.png")
fig_cm.savefig(conf_matrix_path)
plt.close(fig_cm)

# Save ROC curve plot
plt.plot([0, 1], [0, 1], 'k--')
plt.title('ROC Curves with Custom Train-Test Splits')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right')
plt.grid(True)
roc_curve_path = os.path.join(output_dir, "roc_curves_custom_split.png")
plt.savefig(roc_curve_path)
plt.close()

# Final confirmation
print(f"\n All plots saved to: {output_dir}")
print(f"Confusion Matrix PNG: {conf_matrix_path}")
print(f"ROC Curve PNG: {roc_curve_path}")

Saved classification report for KNN to: /kaggle/working/KNN_classification_report.png
Saved classification report for SVM to: /kaggle/working/SVM_classification_report.png
Saved classification report for Random Forest to: /kaggle/working/Random Forest_classification_report.png
Saved classification report for Decision Tree to: /kaggle/working/Decision Tree_classification_report.png
Saved classification report for XGBoost to: /kaggle/working/XGBoost_classification_report.png


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Saved classification report for Logistic Regression to: /kaggle/working/Logistic Regression_classification_report.png
Saved classification report for LDA to: /kaggle/working/LDA_classification_report.png
Saved classification report for Lasso to: /kaggle/working/Lasso_classification_report.png

 All plots saved to: /kaggle/working
Confusion Matrix PNG: /kaggle/working/confusion_matrices_custom_split.png
ROC Curve PNG: /kaggle/working/roc_curves_custom_split.png


# Roc-specific

In [6]:
# Final ROC Curve plot (single combined with legend)
fig_roc, ax_roc = plt.subplots(figsize=(10, 8))

for i, (model_name, model) in enumerate(models.items()):
    test_size = split_ratios[model_name]

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, stratify=y, random_state=42)

    # Standardization
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    model.fit(X_train, y_train)
    y_proba = model.predict_proba(X_test)[:, 1]
    fpr, tpr, _ = roc_curve(y_test, y_proba)
    roc_auc = roc_auc_score(y_test, y_proba)

    ax_roc.plot(fpr, tpr, color=colors[i % len(colors)],
                label=f'{model_name} (AUC = {roc_auc:.2f})', linewidth=2)

# Add diagonal line for random guess
ax_roc.plot([0, 1], [0, 1], 'k--', label='Random Guess')

# Plot settings
ax_roc.set_title('ROC Curves for All Models', fontsize=14)
ax_roc.set_xlabel('False Positive Rate')
ax_roc.set_ylabel('True Positive Rate')
ax_roc.legend(loc='lower right')
ax_roc.grid(True)

# Save final ROC plot
roc_curve_path = os.path.join(output_dir, "roc_curves_custom_split_actual.png")
fig_roc.tight_layout()
fig_roc.savefig(roc_curve_path)
plt.close(fig_roc)

print(f"ROC Curve PNG: {roc_curve_path}")


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


ROC Curve PNG: /kaggle/working/roc_curves_custom_split_actual.png
