In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings


warnings.filterwarnings('ignore')

from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline
from sklearn.model_selection import learning_curve


In [None]:
from google.colab import files
uploaded = files.upload()

Saving df_cleann.csv to df_cleann.csv


In [None]:
df = pd.read_csv('df_cleann.csv')

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27868 entries, 0 to 27867
Data columns (total 12 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   Gender                                 27868 non-null  int64  
 1   Age                                    27868 non-null  float64
 2   Academic Pressure                      27868 non-null  float64
 3   CGPA                                   27868 non-null  float64
 4   Study Satisfaction                     27868 non-null  float64
 5   Sleep Duration                         27868 non-null  float64
 6   Dietary Habits                         27868 non-null  int64  
 7   Have you ever had suicidal thoughts ?  27868 non-null  int64  
 8   Work/Study Hours                       27868 non-null  float64
 9   Financial Stress                       27868 non-null  float64
 10  Family History of Mental Illness       27868 non-null  int64  
 11  De

In [None]:
X = df.drop(columns=["Depression"])
y = df["Depression"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
Train_df = pd.concat([X_train, y_train], axis=1)
Test_df = pd.concat([X_test, y_test], axis=1)

Train_df.to_csv('Train_df.csv', index=False)
Test_df.to_csv('Test_df.csv', index=False)

In [None]:
pipeline = Pipeline(steps=[
  ('scaler', StandardScaler()),
  ('smote', SMOTE()),
  ('model', MLPClassifier(
    max_iter=800,
    early_stopping=True,
    n_iter_no_change=20
))])

MLP1 = 128, 64, 32

In [None]:
X = Train_df.drop(columns=["Depression"])
y = Train_df["Depression"]

pipelineMLP1 = Pipeline(steps=[
  ('scaler', StandardScaler()),
  ('smote', SMOTE()),
  ('model', MLPClassifier(
    hidden_layer_sizes=(128, 64, 32),
    batch_size=64,
    max_iter=800,
    alpha=0.01,
    activation='logistic',
    learning_rate_init=0.0001,
    early_stopping=True,
    n_iter_no_change=20)
  )])

In [None]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []

for train_index, test_index in kfold.split(X, y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    pipelineMLP1.fit(X_train, y_train)
    predictions = pipelineMLP1.predict(X_test)

    accuracy_scores.append(accuracy_score(y_test, predictions))
    precision_scores.append(precision_score(y_test, predictions, average='macro'))
    recall_scores.append(recall_score(y_test, predictions, average='macro'))
    f1_scores.append(f1_score(y_test, predictions, average='macro'))

print("Resultados das métricas por fold:")
print("-----------------------------------")

for i in range(10):
  print(f"Fold {i+1}:")
  print(f"  Accuracy : {accuracy_scores[i]:.4f}")
  print(f"  Precision: {precision_scores[i]:.4f}")
  print(f"  Recall   : {recall_scores[i]:.4f}")
  print(f"  F1-Score : {f1_scores[i]:.4f}")
  print("-----------------------------------")

print("\nMédias gerais:")
print(f"Accuracy médio : {np.mean(accuracy_scores):.4f}")
print(f"Precision média: {np.mean(precision_scores):.4f}")
print(f"Recall médio   : {np.mean(recall_scores):.4f}")
print(f"F1 médio       : {np.mean(f1_scores):.4f}")

Resultados das métricas por fold:
-----------------------------------
Fold 1:
  Accuracy : 0.8386
  Precision: 0.8330
  Recall   : 0.8371
  F1-Score : 0.8347
-----------------------------------
Fold 2:
  Accuracy : 0.8395
  Precision: 0.8339
  Recall   : 0.8379
  F1-Score : 0.8356
-----------------------------------
Fold 3:
  Accuracy : 0.8502
  Precision: 0.8452
  Recall   : 0.8469
  F1-Score : 0.8460
-----------------------------------
Fold 4:
  Accuracy : 0.8493
  Precision: 0.8441
  Recall   : 0.8470
  F1-Score : 0.8454
-----------------------------------
Fold 5:
  Accuracy : 0.8506
  Precision: 0.8453
  Recall   : 0.8491
  F1-Score : 0.8469
-----------------------------------
Fold 6:
  Accuracy : 0.8484
  Precision: 0.8432
  Recall   : 0.8506
  F1-Score : 0.8456
-----------------------------------
Fold 7:
  Accuracy : 0.8336
  Precision: 0.8281
  Recall   : 0.8343
  F1-Score : 0.8302
-----------------------------------
Fold 8:
  Accuracy : 0.8349
  Precision: 0.8293
  Recall   : 0

MLP2 = 64,32

In [None]:
X = Train_df.drop(columns=["Depression"])
y = Train_df["Depression"]

pipelineMLP2 = Pipeline(steps=[
  ('scaler', StandardScaler()),
  ('smote', SMOTE()),
  ('model', MLPClassifier(
    hidden_layer_sizes=(64, 32),
    batch_size=64,
    max_iter=800,
    alpha=0.01,
    activation='tanh',
    learning_rate_init=0.0001,
    early_stopping=True,
    n_iter_no_change=20)
  )])

In [None]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []

for train_index, test_index in kfold.split(X, y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    pipelineMLP2.fit(X_train, y_train)
    predictions = pipelineMLP2.predict(X_test)

    accuracy_scores.append(accuracy_score(y_test, predictions))
    precision_scores.append(precision_score(y_test, predictions, average='macro'))
    recall_scores.append(recall_score(y_test, predictions, average='macro'))
    f1_scores.append(f1_score(y_test, predictions, average='macro'))

print("Resultados das métricas por fold:")
print("-----------------------------------")

for i in range(10):
  print(f"Fold {i+1}:")
  print(f"  Accuracy : {accuracy_scores[i]:.4f}")
  print(f"  Precision: {precision_scores[i]:.4f}")
  print(f"  Recall   : {recall_scores[i]:.4f}")
  print(f"  F1-Score : {f1_scores[i]:.4f}")
  print("-----------------------------------")

print("\nMédias gerais:")
print(f"Accuracy médio : {np.mean(accuracy_scores):.4f}")
print(f"Precision média: {np.mean(precision_scores):.4f}")
print(f"Recall médio   : {np.mean(recall_scores):.4f}")
print(f"F1 médio       : {np.mean(f1_scores):.4f}")

Resultados das métricas por fold:
-----------------------------------
Fold 1:
  Accuracy : 0.8444
  Precision: 0.8393
  Recall   : 0.8408
  F1-Score : 0.8400
-----------------------------------
Fold 2:
  Accuracy : 0.8417
  Precision: 0.8362
  Recall   : 0.8419
  F1-Score : 0.8384
-----------------------------------
Fold 3:
  Accuracy : 0.8489
  Precision: 0.8435
  Recall   : 0.8478
  F1-Score : 0.8453
-----------------------------------
Fold 4:
  Accuracy : 0.8511
  Precision: 0.8461
  Recall   : 0.8482
  F1-Score : 0.8470
-----------------------------------
Fold 5:
  Accuracy : 0.8475
  Precision: 0.8421
  Recall   : 0.8453
  F1-Score : 0.8435
-----------------------------------
Fold 6:
  Accuracy : 0.8484
  Precision: 0.8429
  Recall   : 0.8483
  F1-Score : 0.8450
-----------------------------------
Fold 7:
  Accuracy : 0.8380
  Precision: 0.8325
  Recall   : 0.8359
  F1-Score : 0.8339
-----------------------------------
Fold 8:
  Accuracy : 0.8345
  Precision: 0.8288
  Recall   : 0

MLP3 = 32,16


In [None]:
X = Train_df.drop(columns=["Depression"])
y = Train_df["Depression"]

pipelineMLP3 = Pipeline(steps=[
  ('scaler', StandardScaler()),
  ('smote', SMOTE()),
  ('model', MLPClassifier(
    hidden_layer_sizes=(32,16),
    batch_size=64,
    max_iter=800,
    alpha=0.01,
    activation='relu',
    learning_rate_init=0.0001,
    early_stopping=True,
    n_iter_no_change=20)
  )])

In [None]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []

for train_index, test_index in kfold.split(X, y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    pipelineMLP3.fit(X_train, y_train)
    predictions = pipelineMLP3.predict(X_test)

    accuracy_scores.append(accuracy_score(y_test, predictions))
    precision_scores.append(precision_score(y_test, predictions, average='macro'))
    recall_scores.append(recall_score(y_test, predictions, average='macro'))
    f1_scores.append(f1_score(y_test, predictions, average='macro'))

print("Resultados das métricas por fold:")
print("-----------------------------------")

for i in range(10):
  print(f"Fold {i+1}:")
  print(f"  Accuracy : {accuracy_scores[i]:.4f}")
  print(f"  Precision: {precision_scores[i]:.4f}")
  print(f"  Recall   : {recall_scores[i]:.4f}")
  print(f"  F1-Score : {f1_scores[i]:.4f}")
  print("-----------------------------------")

print("\nMédias gerais:")
print(f"Accuracy médio : {np.mean(accuracy_scores):.4f}")
print(f"Precision média: {np.mean(precision_scores):.4f}")
print(f"Recall médio   : {np.mean(recall_scores):.4f}")
print(f"F1 médio       : {np.mean(f1_scores):.4f}")

Resultados das métricas por fold:
-----------------------------------
Fold 1:
  Accuracy : 0.8439
  Precision: 0.8386
  Recall   : 0.8412
  F1-Score : 0.8398
-----------------------------------
Fold 2:
  Accuracy : 0.8363
  Precision: 0.8308
  Recall   : 0.8363
  F1-Score : 0.8329
-----------------------------------
Fold 3:
  Accuracy : 0.8462
  Precision: 0.8408
  Recall   : 0.8446
  F1-Score : 0.8424
-----------------------------------
Fold 4:
  Accuracy : 0.8457
  Precision: 0.8406
  Recall   : 0.8425
  F1-Score : 0.8415
-----------------------------------
Fold 5:
  Accuracy : 0.8493
  Precision: 0.8440
  Recall   : 0.8467
  F1-Score : 0.8452
-----------------------------------
Fold 6:
  Accuracy : 0.8484
  Precision: 0.8430
  Recall   : 0.8487
  F1-Score : 0.8451
-----------------------------------
Fold 7:
  Accuracy : 0.8354
  Precision: 0.8297
  Recall   : 0.8342
  F1-Score : 0.8315
-----------------------------------
Fold 8:
  Accuracy : 0.8371
  Precision: 0.8315
  Recall   : 0