## **Decision Tree Classifier**

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split, cross_validate, StratifiedKFold

In [2]:
data_train = pd.read_csv('train_radiomics_hipocamp_mod.csv')
data_test = pd.read_csv('test_radiomics_hipocamp_mod.csv')
data_control = pd.read_csv('train_radiomics_occipital_CONTROL_mod.csv')

In [3]:
# Dividir entre features e target
X = data_train.drop('Transition', axis=1)
y = data_train['Transition']

X_control = data_control.drop('Transition', axis=1)
y_control = data_control['Transition']

In [4]:
# Definir o modelo
clf = DecisionTreeClassifier(random_state=2024)

## Hold-Out Validation ##

In [5]:
# Dividir a data em  conjunto de treino e teste
X_train, X_test, y_train, y_test = train_test_split(X_control, y_control, test_size=0.2, random_state=2024, stratify=y)

# Fit do modelo no conjunto de treino
clf.fit(X_train, y_train)

# Avalia o modelo no conjunto de teste
y_pred = clf.predict(X_test)

# Resultado do F1-Score (Macro)
f1_macro = f1_score(y_test, y_pred, average='macro')
print("F1-Score (macro):", f1_macro)

F1-Score (macro): 0.22317266067266067


In [5]:
# Dividir a data em  conjunto de treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2024, stratify=y)

# Fit do modelo no conjunto de treino
clf.fit(X_train, y_train)

# Avalia o modelo no conjunto de teste
y_pred = clf.predict(X_test)

# Resultado do F1-Score (Macro)
f1_macro = f1_score(y_test, y_pred, average='macro')
print("F1-Score (macro):", f1_macro)

F1-Score (macro): 0.31374613003095975


## Stratified K Fold Cross Validation ##

In [5]:
# Definir Stratified K-Fold
stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2024)

# Cross Validation
scores = cross_validate(clf, X, y, cv=stratified_kfold, scoring='f1_macro')

# Fit do modelo
clf.fit(X, y)

# Acede apenas aos valores do F1-Score Macro
f1_macro_scores = scores['test_score']
print("F1 Macro Scores:", f1_macro_scores)

# Média e desvio padrão do F1 Macro
f1_macro_mean = f1_macro_scores.mean()
f1_macro_std = f1_macro_scores.std()
print(f"Média do F1 Macro: {f1_macro_mean:.4f}")
print(f"Desvio Padrão do F1 Macro: {f1_macro_std:.4f}")

F1 Macro Scores: [0.30179724 0.26426317 0.21182807 0.27723315 0.23785425]
Média do F1 Macro: 0.2586
Desvio Padrão do F1 Macro: 0.0312


In [6]:
# Prever os valores para o dataset de teste
predictions = clf.predict(data_test)

In [7]:
# Salvar as previsões num ficheiro csv
output = pd.DataFrame({'RowId': data_test.index + 1, 'Result': predictions})
output.to_csv('Subs/clf_test_predictions.csv', index=False)