In [11]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv1D, Flatten, LSTM, GRU
from tensorflow.keras.optimizers import Adam
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score


In [12]:
import pandas as pd

# Load dataset
df = pd.read_csv("mental_health_dataset.csv")
df.head()


Unnamed: 0,Patient_ID,Age,Gender,Sleep_Hours,Stress_Level,Anxiety_Score,Depression_Score,Heart_Rate,Suicidal_Thoughts,Diagnosis
0,P0001,56,Male,5.2,9,9,4,76,0,Anxiety
1,P0002,46,Female,5.3,3,2,6,97,0,
2,P0003,32,Male,5.8,1,8,5,73,0,
3,P0004,60,Other,8.0,7,5,7,82,0,Stress
4,P0005,25,Male,4.4,10,2,10,84,0,Stress


In [13]:
## Encode categorical variables ##
label_encoder = LabelEncoder()
df["Gender"] = label_encoder.fit_transform(df["Gender"])
df["Diagnosis"] = label_encoder.fit_transform(df["Diagnosis"])
df.head()


Unnamed: 0,Patient_ID,Age,Gender,Sleep_Hours,Stress_Level,Anxiety_Score,Depression_Score,Heart_Rate,Suicidal_Thoughts,Diagnosis
0,P0001,56,1,5.2,9,9,4,76,0,0
1,P0002,46,0,5.3,3,2,6,97,0,4
2,P0003,32,1,5.8,1,8,5,73,0,4
3,P0004,60,2,8.0,7,5,7,82,0,3
4,P0005,25,1,4.4,10,2,10,84,0,3


In [14]:
# Split features and target
X = df.drop(columns=["Patient_ID", "Diagnosis"])
y = df["Diagnosis"]
X.head()


Unnamed: 0,Age,Gender,Sleep_Hours,Stress_Level,Anxiety_Score,Depression_Score,Heart_Rate,Suicidal_Thoughts
0,56,1,5.2,9,9,4,76,0
1,46,0,5.3,3,2,6,97,0
2,32,1,5.8,1,8,5,73,0
3,60,2,8.0,7,5,7,82,0
4,25,1,4.4,10,2,10,84,0


In [15]:
# Scale numerical features
scaler = StandardScaler()
X = scaler.fit_transform(X)
print(X)

[[ 1.11288175e+00 -1.22812780e-03 -4.41103880e-01 ... -5.38763882e-01
  -5.60035503e-01 -4.11813845e-01]
 [ 3.71652397e-01 -1.22935593e+00 -3.82710462e-01 ...  1.72006411e-01
   9.21166709e-01 -4.11813845e-01]
 [-6.66068696e-01 -1.22812780e-03 -9.07433716e-02 ... -1.83378736e-01
  -7.71635819e-01 -4.11813845e-01]
 ...
 [-1.03668337e+00  1.22689968e+00  1.19391182e+00 ... -1.60491932e+00
   6.39032954e-01  2.42828164e+00]
 [ 1.48349643e+00 -1.22935593e+00  1.71945259e+00 ... -1.24953417e+00
  -1.33590333e+00  2.42828164e+00]
 [-1.62966685e+00 -1.22935593e+00  1.13551841e+00 ...  5.27391557e-01
   1.76756797e+00 -4.11813845e-01]]


In [16]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(800, 8) (200, 8) (800,) (200,)


In [18]:

# Function to evaluate model
def evaluate_model(model, X_train, y_train, X_test, y_test, is_nn=False):
    if is_nn:
        model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        model.fit(X_train, y_train, epochs=20, batch_size=16, validation_data=(X_test, y_test), verbose=0)
        loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
    
    return accuracy

# Define models
models = {
    "MLP": Sequential([
        Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dense(len(np.unique(y_train)), activation='softmax')
    ]),
    "CNN": Sequential([
        Conv1D(32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(len(np.unique(y_train)), activation='softmax')
    ]),
    "LSTM": Sequential([
        LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)),
        LSTM(50),
        Dense(64, activation='relu'),
        Dense(len(np.unique(y_train)), activation='softmax')
    ]),
    "GRU": Sequential([
        GRU(50, return_sequences=True, input_shape=(X_train.shape[1], 1)),
        GRU(50),
        Dense(64, activation='relu'),
        Dense(len(np.unique(y_train)), activation='softmax')
    ]),
    "SVM": SVC(),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "XGBoost": XGBClassifier(eval_metric='mlogloss')
}

# Evaluate each model
accuracies = {}
for name, model in models.items():
    print(f"Training {name}...")
    is_nn = isinstance(model, Sequential)
    accuracies[name] = evaluate_model(model, X_train, y_train, X_test, y_test, is_nn)

# Print accuracies
for model_name, acc in accuracies.items():
    print(f"{model_name}: Test Accuracy = {acc * 100:.2f}%")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(**kwargs)


Training MLP...
Training CNN...
Training LSTM...
Training GRU...
Training SVM...
Training Random Forest...
Training XGBoost...
MLP: Test Accuracy = 89.00%
CNN: Test Accuracy = 97.50%
LSTM: Test Accuracy = 92.00%
GRU: Test Accuracy = 98.00%
SVM: Test Accuracy = 88.50%
Random Forest: Test Accuracy = 100.00%
XGBoost: Test Accuracy = 100.00%
