## **XGBoost**

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_validate, StratifiedKFold

In [2]:
data_train = pd.read_csv('train_radiomics_hipocamp_mod.csv')
data_test = pd.read_csv('test_radiomics_hipocamp_mod.csv')
data_control = pd.read_csv('train_radiomics_occipital_CONTROL_mod.csv')

In [3]:
# Dividir entre features e target
X = data_train.drop('Transition', axis=1)
y = data_train['Transition']

X_control = data_control.drop('Transition', axis=1)
y_control = data_control['Transition']

In [4]:
# Escalonar as features para média 0 e desvio padrão 1
scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)
data_test_scaled = scaler.transform(data_test)

X_scaled_control = scaler.fit_transform(X_control)

In [5]:
# Codificar a target feature com LabelEncoder
le = LabelEncoder()
y_encoded = le.fit_transform(y)

y_encoded_control = le.transform(y_control)

In [6]:
# Definir o modelo
xgb = XGBClassifier(booster='gbtree', num_class=5, eval_metric='mlogloss', random_state=2024)

# Definir o StratifiedKFold
stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2024)

# Cross Validation
scores = cross_validate(xgb, X_scaled, y_encoded, cv=stratified_kfold, scoring='f1_macro')

# Fit do modelo
xgb.fit(X_scaled, y_encoded)

# Acede apenas aos valores do F1-Score Macro
f1_macro_scores = scores['test_score']
print("F1 Macro Scores:", f1_macro_scores)

# Média e desvio padrão do F1 Macro
f1_macro_mean = f1_macro_scores.mean()
f1_macro_std = f1_macro_scores.std()
print(f"Média do F1 Macro: {f1_macro_mean:.4f}")
print(f"Desvio Padrão do F1 Macro: {f1_macro_std:.4f}")

F1 Macro Scores: [0.2641628  0.24462254 0.35477716 0.26022792 0.31411594]
Média do F1 Macro: 0.2876
Desvio Padrão do F1 Macro: 0.0409


In [7]:
# Prever os valores para o dataset de teste
predictions = xgb.predict(data_test)

# Descodifica os rótulos de volta para as categorias originais
predictions_decoded = le.inverse_transform(predictions)

In [8]:
# Salvar as previsões num ficheiro csv
output = pd.DataFrame({'RowId': data_test.index + 1, 'Result': predictions})
output.to_csv('Subs/xgb_test_predictions.csv', index=False)