In [6]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import common as common
import pandas as pd

In [7]:
train_df = common.get_train_df()
train_df = common.over_sample_df(train_df)
target_column, feature_column = common.get_column_names(train_df)

label_encoder = LabelEncoder()
scaler = StandardScaler()

y = label_encoder.fit_transform(train_df[target_column])
X = scaler.fit_transform(train_df[feature_column])

In [8]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Train XGBoost model
xgb_model = xgb.XGBClassifier(
    n_estimators=200,
    learning_rate=0.0005,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    use_label_encoder=False,
    eval_metric="mlogloss"
)
xgb_model.fit(X_train, y_train)

# Evaluate on validation set
y_pred = xgb_model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_val, y_pred, target_names=label_encoder.classes_))

Parameters: { "use_label_encoder" } are not used.



Validation Accuracy: 0.6085714285714285
Classification Report:
                                     precision    recall  f1-score   support

                Addictive disorder       0.37      0.38      0.37        50
                  Anxiety disorder       0.70      0.74      0.72        50
                   Healthy control       0.71      0.75      0.73        59
                     Mood disorder       0.42      0.22      0.29        51
     Obsessive compulsive disorder       0.62      0.94      0.74        34
                     Schizophrenia       0.58      0.78      0.67        45
Trauma and stress related disorder       0.78      0.57      0.66        61

                          accuracy                           0.61       350
                         macro avg       0.60      0.62      0.60       350
                      weighted avg       0.60      0.61      0.59       350



In [9]:
test_df = pd.read_csv("data.csv")
X_test = scaler.transform(test_df[feature_column])
y_test = label_encoder.fit_transform(test_df[target_column])
y_pred = xgb_model.predict(X_test)
y_pred = label_encoder.inverse_transform(y_pred)
y_test = label_encoder.inverse_transform(y_test)

output_df = pd.DataFrame({"ID": test_df["ID"], "y_pred": y_pred, "y_true": y_test})
output_df

Unnamed: 0,ID,y_pred,y_true
0,17,Addictive disorder,Schizophrenia
1,28,Healthy control,Addictive disorder
2,33,Schizophrenia,Anxiety disorder
3,41,Schizophrenia,Schizophrenia
4,48,Healthy control,Schizophrenia
...,...,...,...
88,885,Mood disorder,Healthy control
89,896,Healthy control,Obsessive compulsive disorder
90,917,Addictive disorder,Anxiety disorder
91,918,Mood disorder,Anxiety disorder


In [10]:
common.get_accuracy(output_df)

Accuracy: 13.98%
