<a href="https://colab.research.google.com/github/sumodha/git/blob/main/Mental_Health_Disorder_Data_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## APPROACH 1: MULTICLASSIFICATION



# Preprocessing Data

In [None]:
import pandas as pd
import numpy as np


In [None]:
disorder_mapping = {
    "Healthy control": 0,
    "Mood disorder": 1,
    "Schizophrenia": 2,
    "Obsessive compulsive disorder": 3,
    "Anxiety disorder": 4,
    "Trauma and stress related disorder": 5,
    "Addictive disorder": 6,
}

In [None]:
df = pd.read_csv("train_EEG.csv")
df.drop(['ID', 'sex', 'age', 'eeg.date', 'IQ', 'education', 'specific.disorder', 'Unnamed: 122'], axis=1, inplace=True) # dropping columns unused in model

df['main.disorder'].value_counts(ascending=True)  # analyzing frequency of disorders in given population


Unnamed: 0_level_0,count
main.disorder,Unnamed: 1_level_1
Obsessive compulsive disorder,38
Healthy control,87
Anxiety disorder,91
Trauma and stress related disorder,106
Schizophrenia,109
Addictive disorder,171
Mood disorder,250


In [None]:
df.isna().sum() # checking for null values

df['main.disorder'] = df['main.disorder'].map(disorder_mapping)

df.iloc[:,[0] + list(range(21,77))] ## only using specific portion of data

Unnamed: 0,main.disorder,AB.B.theta.b.FP2,AB.B.theta.c.F7,AB.B.theta.d.F3,AB.B.theta.e.Fz,AB.B.theta.f.F4,AB.B.theta.g.F8,AB.B.theta.h.T3,AB.B.theta.i.C3,AB.B.theta.j.Cz,...,AB.D.beta.j.Cz,AB.D.beta.k.C4,AB.D.beta.l.T4,AB.D.beta.m.T5,AB.D.beta.n.P3,AB.D.beta.o.Pz,AB.D.beta.p.P4,AB.D.beta.q.T6,AB.D.beta.r.O1,AB.D.beta.s.O2
0,1,18.303788,13.633757,22.041501,25.790113,25.823262,13.680359,10.190899,20.473551,30.154422,...,44.951035,46.139032,19.652424,21.509790,39.690256,47.537016,49.421018,32.762813,38.909935,34.682660
1,0,10.925079,6.613441,13.709309,15.116406,13.940799,9.737385,4.203904,10.596295,17.227815,...,12.561847,10.707997,6.391980,2.105308,6.449862,10.164398,10.185170,7.879386,3.574329,10.248237
2,2,23.856276,17.192334,27.821940,23.748798,28.188047,17.034603,11.359047,21.223044,26.626620,...,20.179951,15.792660,9.079362,15.583569,23.866946,14.442586,21.395784,22.512629,18.103919,18.985400
3,3,6.238748,5.580928,7.778670,8.253590,6.514957,4.101726,4.261923,5.598839,7.597268,...,9.583891,8.876966,7.714012,6.592033,13.158414,14.713435,11.898775,6.647382,8.314214,10.286457
4,0,20.500971,18.979308,27.082688,27.486381,20.751768,14.354886,13.017812,21.256323,25.306429,...,11.511984,11.211131,7.407756,7.071076,9.746076,10.957198,10.030780,6.566334,7.986665,6.538834
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
847,2,13.571850,7.966899,13.132145,17.836374,14.810302,9.291721,4.580396,8.576046,10.707775,...,10.593145,10.674908,7.756236,5.812431,12.415957,11.073957,13.638661,10.202449,19.303015,8.765116
848,5,12.308585,7.845604,16.138906,14.159804,18.262581,10.427668,7.107656,11.541728,13.789083,...,16.543733,17.078695,12.201750,4.277006,12.914835,11.304075,18.044994,12.226328,8.589709,10.948265
849,1,6.452553,4.626529,7.723620,10.294436,8.058488,3.346537,2.926677,8.548068,9.972517,...,13.541422,11.324239,5.565310,8.397118,15.908713,17.039693,16.081483,18.541701,23.400039,11.554367
850,1,29.545353,24.762565,33.244126,37.213638,39.148094,30.127781,23.975158,26.316786,31.819805,...,28.395532,29.196254,14.508960,16.294836,41.331719,61.375093,49.499607,23.333163,29.212448,29.097592


In [None]:
df.isna().sum()

Unnamed: 0,0
main.disorder,0
AB.A.delta.a.FP1,0
AB.A.delta.b.FP2,0
AB.A.delta.c.F7,0
AB.A.delta.d.F3,0
...,...
COH.F.gamma.p.P4.r.O1,0
COH.F.gamma.p.P4.s.O2,0
COH.F.gamma.q.T6.r.O1,0
COH.F.gamma.q.T6.s.O2,0


# Splitting and Standardizing Data



In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [None]:
y = df["main.disorder"].values.ravel()
x = df.drop(["main.disorder"], axis=1)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state = 0)

In [None]:
## STANDARDIZING X_TRAIN AND X_TEST
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

# Support Vector Machine

In [None]:
from sklearn.svm import SVC

In [None]:
svc_model = SVC(kernel = 'poly', degree = 4)

In [None]:
svc_model.fit(x_train_scaled, y_train)
y_pred = svc_model.predict(x_test_scaled)

Determining Accuracy

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

Confusion Matrix:
 [[ 1 21  1  0  1  0  1]
 [ 2 47  1  0  0  1  0]
 [ 0 17  0  0  1  0  0]
 [ 0  4  0  0  0  1  0]
 [ 1 13  0  0  0  0  0]
 [ 1 17  0  0  0  0  1]
 [ 0 35  1  1  0  1  1]]


In [None]:
accuracy = np.trace(cm) / np.sum(cm) ## sum of all diagonal elements / sum of all elements in matrix
print("Accuracy:", accuracy)

Accuracy: 0.28654970760233917


In [None]:
print(classification_report(y_test, y_pred, zero_division=0))

              precision    recall  f1-score   support

           0       0.20      0.04      0.07        25
           1       0.31      0.92      0.46        51
           2       0.00      0.00      0.00        18
           3       0.00      0.00      0.00         5
           4       0.00      0.00      0.00        14
           5       0.00      0.00      0.00        19
           6       0.33      0.03      0.05        39

    accuracy                           0.29       171
   macro avg       0.12      0.14      0.08       171
weighted avg       0.20      0.29      0.16       171



# Decision Tree Model

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
dt_model = DecisionTreeClassifier(max_depth = 25)

In [None]:

dt_model.fit(x_train, y_train)
y_pred = dt_model.predict(x_test)

Determining Accuracy

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

Confusion Matrix:
 [[ 2  6  1  1  5  3  7]
 [ 5 15  5  2  5  8 11]
 [ 2  4  3  1  2  1  5]
 [ 0  2  0  0  0  0  3]
 [ 0  1  1  3  2  3  4]
 [ 1  9  2  0  2  2  3]
 [ 3 13  8  1  3  2  9]]


In [None]:
accuracy = np.trace(cm) / np.sum(cm) ## sum of all diagonal elements / sum of all elements in matrix
print("Accuracy:", accuracy)

Accuracy: 0.19298245614035087


In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred, zero_division=0))

              precision    recall  f1-score   support

           0       0.15      0.08      0.11        25
           1       0.30      0.29      0.30        51
           2       0.15      0.17      0.16        18
           3       0.00      0.00      0.00         5
           4       0.11      0.14      0.12        14
           5       0.11      0.11      0.11        19
           6       0.21      0.23      0.22        39

    accuracy                           0.19       171
   macro avg       0.15      0.15      0.14       171
weighted avg       0.20      0.19      0.19       171



# Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rfc_model = RandomForestClassifier(n_estimators = 100, max_depth = 25)

In [None]:
rfc_model.fit(x_train, y_train)
y_pred = rfc_model.predict(x_test)

Determining Accuracy

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

Confusion Matrix:
 [[ 1 14  2  0  2  0  6]
 [ 0 35  0  0  0  1 15]
 [ 0 12  1  0  1  0  4]
 [ 0  2  0  0  0  0  3]
 [ 0 11  1  0  1  0  1]
 [ 1 17  0  0  0  0  1]
 [ 0 29  2  0  0  0  8]]


In [None]:
accuracy = np.trace(cm) / np.sum(cm) ## sum of all diagonal elements / sum of all elements in matrix
print("Accuracy:", accuracy)

Accuracy: 0.26900584795321636


In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred, zero_division=0))

              precision    recall  f1-score   support

           0       0.50      0.04      0.07        25
           1       0.29      0.69      0.41        51
           2       0.17      0.06      0.08        18
           3       0.00      0.00      0.00         5
           4       0.25      0.07      0.11        14
           5       0.00      0.00      0.00        19
           6       0.21      0.21      0.21        39

    accuracy                           0.27       171
   macro avg       0.20      0.15      0.13       171
weighted avg       0.25      0.27      0.20       171



# Neural Network

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [None]:
model = tf.keras.Sequential()

# Input layer
model.add(tf.keras.layers.InputLayer(shape=(x_train_scaled.shape[1],)))

# Hidden layers
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dense(32, activation='relu'))



# Output layer
model.add(tf.keras.layers.Dense(7, activation='softmax'))  # For multiclass classification (7 classes)

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Use sparse_categorical_crossentropy for integer labels
              metrics=['accuracy'])


In [None]:
# Train the model
history = model.fit(x_train_scaled, y_train,
                    epochs=20,  # Number of epochs
                    batch_size=32,  # Batch size
                    validation_split=0.2,  # Use a portion of training data for validation
                    verbose=1)


Epoch 1/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - accuracy: 0.2067 - loss: 2.2440 - val_accuracy: 0.2117 - val_loss: 2.1673
Epoch 2/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.3389 - loss: 1.7159 - val_accuracy: 0.2701 - val_loss: 2.2001
Epoch 3/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.4589 - loss: 1.4798 - val_accuracy: 0.2409 - val_loss: 2.2071
Epoch 4/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5940 - loss: 1.1998 - val_accuracy: 0.2336 - val_loss: 2.2044
Epoch 5/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6120 - loss: 1.1085 - val_accuracy: 0.2409 - val_loss: 2.2849
Epoch 6/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.6647 - loss: 1.0292 - val_accuracy: 0.2628 - val_loss: 2.4167
Epoch 7/20
[1m17/17[0m [32m━━━━━━

In [None]:
# Make predictions on the test data
y_pred = np.argmax(model.predict(x_test_scaled), axis=-1)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

Confusion Matrix:
 [[ 3  8  2  1  3  3  5]
 [ 4 15  8  3  9  5  7]
 [ 1  3  7  2  1  0  4]
 [ 0  3  0  0  0  0  2]
 [ 1  7  1  0  4  0  1]
 [ 2 10  1  0  3  2  1]
 [ 2 14  6  2  5  6  4]]


In [None]:
accuracy = np.trace(cm) / np.sum(cm) ## sum of all diagonal elements / sum of all elements in matrix
print("Accuracy:", accuracy)

Accuracy: 0.2046783625730994


In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred, zero_division=0))


              precision    recall  f1-score   support

           0       0.23      0.12      0.16        25
           1       0.25      0.29      0.27        51
           2       0.28      0.39      0.33        18
           3       0.00      0.00      0.00         5
           4       0.16      0.29      0.21        14
           5       0.12      0.11      0.11        19
           6       0.17      0.10      0.13        39

    accuracy                           0.20       171
   macro avg       0.17      0.19      0.17       171
weighted avg       0.20      0.20      0.20       171



# Convolutional Neural Network

In [None]:
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
import numpy as np
from tensorflow.keras.regularizers import l2


In [None]:
model = tf.keras.Sequential()

# First 1D Convolutional Layer
model.add(tf.keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(x_train_scaled.shape[1], 1)))
model.add(tf.keras.layers.MaxPooling1D(pool_size=2))

# Second 1D Convolutional Layer
model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(tf.keras.layers.MaxPooling1D(pool_size=2))

# third 1D convolutional layer
model.add(tf.keras.layers.Conv1D(filters=128, kernel_size=3, activation='relu'))
model.add(tf.keras.layers.MaxPooling1D(pool_size=2))

model.add(tf.keras.layers.Flatten())

# fully connected layer
model.add(tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=l2(0.01)))

# output layer
model.add(tf.keras.layers.Dense(7, activation='softmax'))

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Use categorical crossentropy for multi-class classification
              metrics=['accuracy'])

# training
history = model.fit(x_train_scaled, y_train, epochs=20, batch_size=32, validation_data=(x_test_scaled, y_test))


Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 225ms/step - accuracy: 0.1866 - loss: 3.9539 - val_accuracy: 0.2982 - val_loss: 2.4930
Epoch 2/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 126ms/step - accuracy: 0.2803 - loss: 2.3817 - val_accuracy: 0.2982 - val_loss: 2.0705
Epoch 3/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 132ms/step - accuracy: 0.2849 - loss: 2.0243 - val_accuracy: 0.2982 - val_loss: 1.9678
Epoch 4/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 175ms/step - accuracy: 0.2711 - loss: 1.9092 - val_accuracy: 0.2982 - val_loss: 1.9636
Epoch 5/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 171ms/step - accuracy: 0.2846 - loss: 1.8523 - val_accuracy: 0.2690 - val_loss: 1.8842
Epoch 6/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 126ms/step - accuracy: 0.3579 - loss: 1.7838 - val_accuracy: 0.3041 - val_loss: 1.9496
Epoch 7/20
[1m22/22[0m [32m━━━━━━━━━

In [None]:
test_loss, test_accuracy = model.evaluate(x_test_scaled, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.1686 - loss: 4.7093
Test Accuracy: 18.71%


# APPROACH 2: SINGLE CLASSIFICATION

# Anxiety Disorder

Preprocessing Data

In [None]:
import pandas as pd
import numpy as np

anxiety_df = pd.read_csv("train_EEG.csv")
anxiety_df.drop(['ID', 'sex', 'age', 'eeg.date', 'IQ', 'education', 'specific.disorder', 'Unnamed: 122'], axis=1, inplace=True) # dropping columns unused in model

anxiety_df = anxiety_df[anxiety_df['main.disorder'].isin(['Healthy control', 'Anxiety disorder'])]

anxiety_df["main.disorder"] = (anxiety_df['main.disorder'] == 'Anxiety disorder').astype(int)



Standardizing Data

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

y = anxiety_df["main.disorder"].values.ravel()
x = anxiety_df.drop(["main.disorder"], axis=1)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state = 0)

## STANDARDIZING X_TRAIN AND X_TEST
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

Support Vector Machine

In [None]:
from sklearn.svm import SVC

svc_model = SVC(kernel = 'poly', degree = 4)

svc_model.fit(x_train_scaled, y_train)
y_pred = svc_model.predict(x_test_scaled)


SVM Accuracy

In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

accuracy = np.trace(cm) / np.sum(cm) ## sum of all diagonal elements / sum of all elements in matrix
print("Accuracy:", accuracy)


Confusion Matrix:
 [[ 4 26]
 [ 4 20]]
Accuracy: 0.4444444444444444


Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(max_depth = 25)

dt_model.fit(x_train, y_train)
y_pred = dt_model.predict(x_test)

Decision Tree Accuracy

In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

accuracy = np.trace(cm) / np.sum(cm) ## sum of all diagonal elements / sum of all elements in matrix
print("Accuracy:", accuracy)


Confusion Matrix:
 [[16 14]
 [12 12]]
Accuracy: 0.5185185185185185
