In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [11]:
train_df = pd.read_csv("data/train.csv")
target_column = "main.disorder"
features = [col for col in train_df.columns if col.startswith("AB.")]
train_df = train_df[features + [target_column]]
train_df

Unnamed: 0,AB.A.delta.a.FP1,AB.A.delta.b.FP2,AB.A.delta.c.F7,AB.A.delta.d.F3,AB.A.delta.e.Fz,AB.A.delta.f.F4,AB.A.delta.g.F8,AB.A.delta.h.T3,AB.A.delta.i.C3,AB.A.delta.j.Cz,...,AB.F.gamma.k.C4,AB.F.gamma.l.T4,AB.F.gamma.m.T5,AB.F.gamma.n.P3,AB.F.gamma.o.Pz,AB.F.gamma.p.P4,AB.F.gamma.q.T6,AB.F.gamma.r.O1,AB.F.gamma.s.O2,main.disorder
0,48.843890,46.533704,25.924618,30.221841,32.925321,37.285136,24.448520,14.944974,24.660186,37.710757,...,3.894055,2.269100,3.179024,3.375610,3.396393,3.583319,2.388181,4.271034,4.093793,Mood disorder
1,34.108015,22.838567,20.646824,18.203362,17.361846,16.311194,23.092874,8.016081,13.411826,18.135517,...,4.932718,2.630569,0.597911,1.625700,2.268950,3.123711,2.507125,1.333279,7.415794,Healthy control
2,31.084064,28.212342,30.467865,48.643000,28.101162,39.882668,25.202585,22.667850,26.969479,37.164146,...,2.252665,2.052866,1.912678,2.727597,2.840801,2.968606,6.313069,2.739807,7.644500,Schizophrenia
3,14.310468,13.947459,10.761600,13.844880,14.644941,12.350391,8.722063,7.969303,10.513884,13.064346,...,6.116184,8.282201,5.601934,5.800018,5.847705,5.805174,5.484827,5.648344,5.717446,Obsessive compulsive disorder
4,21.763093,23.938428,19.517805,24.859077,24.922886,22.290161,19.428280,18.424021,21.690679,24.637127,...,1.839297,1.533745,1.461961,1.657828,1.687908,1.787018,1.440013,1.483373,1.406914,Healthy control
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
847,22.432700,22.799740,16.881976,17.067949,20.126371,19.186354,17.597424,11.902542,12.181277,14.505690,...,3.057784,3.636604,2.827548,3.866637,3.512524,3.443553,4.168884,16.519171,5.993952,Schizophrenia
848,12.887332,14.295926,13.959261,24.067048,21.583108,21.555084,11.928572,15.523025,13.422621,14.280448,...,5.027578,2.954189,1.069369,2.999649,2.747144,3.617672,2.172734,1.884339,2.157979,Trauma and stress related disorder
849,20.055370,11.734445,15.719485,11.606580,14.575470,10.810190,5.809250,5.802912,12.896609,14.800809,...,1.169481,0.650115,1.169574,1.845590,1.124135,1.089021,1.180694,1.100077,0.741887,Mood disorder
850,26.430570,20.721589,25.486586,23.016673,23.685829,27.051576,25.842116,14.512679,20.501558,23.496235,...,1.040173,0.783690,0.902926,1.148628,1.318362,1.221849,0.976765,1.362347,1.168174,Mood disorder


In [12]:
label_encoder = LabelEncoder()
train_df[target_column] = label_encoder.fit_transform(train_df[target_column])
train_df.fillna(train_df.median(), inplace=True)

scaler = StandardScaler()
X = scaler.fit_transform(train_df[features])
y = train_df[target_column]

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_val.shape, y_train.shape, y_val.shape

((681, 114), (171, 114), (681,), (171,))

In [13]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
rf_model.fit(X_train, y_train)

y_pred = rf_model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_val, y_pred, target_names=label_encoder.classes_))

Validation Accuracy: 0.30409356725146197
Classification Report:
                                     precision    recall  f1-score   support

                Addictive disorder       0.24      0.32      0.28        34
                  Anxiety disorder       0.00      0.00      0.00        20
                   Healthy control       0.40      0.10      0.16        20
                     Mood disorder       0.35      0.68      0.46        53
     Obsessive compulsive disorder       0.00      0.00      0.00         5
                     Schizophrenia       0.22      0.10      0.13        21
Trauma and stress related disorder       0.20      0.06      0.09        18

                          accuracy                           0.30       171
                         macro avg       0.20      0.18      0.16       171
                      weighted avg       0.25      0.30      0.24       171

