In [1]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import confusion_matrix
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.metrics import f1_score
from sklearn.metrics import fbeta_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import root_mean_squared_error
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn import preprocessing
import sklearn as skl

from sklearn.model_selection import cross_val_score

In [2]:
df = pd.read_csv("../datasets/train_radiomics_hipocamp.csv")

In [3]:
df.columns
df.dtypes

ID                                   object
Image                                object
Mask                                 object
diagnostics_Versions_PyRadiomics     object
diagnostics_Versions_Numpy           object
                                     ...   
lbp-3D-k_ngtdm_Contrast             float64
lbp-3D-k_ngtdm_Strength             float64
Sex                                   int64
Age                                 float64
Transition                           object
Length: 2181, dtype: object

In [4]:
df['Transition']

0        CN-CN
1        CN-CN
2        AD-AD
3       CN-MCI
4        CN-CN
        ...   
300      CN-CN
301      CN-CN
302     MCI-AD
303    MCI-MCI
304      CN-CN
Name: Transition, Length: 305, dtype: object

In [3]:
X = df.drop(['Transition'], axis=1)     
y = df['Transition'].to_frame()         

In [4]:
df_numerico = X.select_dtypes(include=[np.number])

In [13]:
from sklearn.model_selection import KFold
clf = DecisionTreeClassifier(criterion='gini', max_depth=10, random_state=2021)
scores = []
f1_scores = []
kf = KFold(n_splits=5)

for train, test in kf.split(df_numerico):
    clf.fit(df_numerico.loc[train,:], y.loc[train,:])
    score = clf.score(df_numerico.loc[train,:], y.loc[train,:])
    scores.append(score)
    y_predicted = clf.predict(df_numerico.loc[test,:])
    print("Confusion Matrix:")
    print(confusion_matrix(y.loc[test,:], y_predicted))
    f1 = f1_score(y.loc[test], y_predicted, average='weighted')  # Use 'weighted' para classes desbalanceadas
    f1_scores.append(f1)
    print(f"F1-Score: {f1:.2f}")
    print(score)

print("RESULT: %0.2f accuracy with a std deviation of %0.2f" % (np.mean(scores), np.std(scores)))
print("F1-Score: %0.2f with a std deviation of %0.2f" % (np.mean(f1_scores), np.std(f1_scores)))

Confusion Matrix:
[[6 2 0 2 5]
 [3 9 0 4 6]
 [0 0 0 0 2]
 [2 0 1 3 6]
 [1 4 1 1 3]]
F1-Score: 0.37
1.0
Confusion Matrix:
[[ 5  2  0  5  1]
 [ 1 10  0  1  1]
 [ 0  1  0  1  1]
 [ 5  2  0  2  1]
 [ 5  8  1  6  2]]
F1-Score: 0.27
1.0
Confusion Matrix:
[[ 3  4  0  4  0]
 [ 1 10  1  2  4]
 [ 0  0  0  1  1]
 [ 3  3  0  4  4]
 [ 3  4  1  1  7]]
F1-Score: 0.39
1.0
Confusion Matrix:
[[ 4  1  0  6  1]
 [ 2 13  0  2  5]
 [ 0  2  0  0  0]
 [ 6  4  0  4  4]
 [ 2  2  0  2  1]]
F1-Score: 0.36
1.0
Confusion Matrix:
[[5 0 0 1 3]
 [3 9 2 3 4]
 [0 1 0 0 0]
 [5 1 0 5 3]
 [2 2 1 4 7]]
F1-Score: 0.44
1.0
RESULT: 1.00 accuracy with a std deviation of 0.00
F1-Score: 0.37 with a std deviation of 0.06


In [7]:

clf = DecisionTreeClassifier(random_state=2021)
clf.fit(df_numerico, y)
print(clf.get_depth())

10


In [8]:
X_test = pd.read_csv("../datasets/test_radiomics_hipocamp.csv")
X_test = X_test.select_dtypes(include=[np.number])

In [9]:
predictions = clf.predict(X_test)

In [10]:
predictions

array(['MCI-AD', 'MCI-MCI', 'CN-CN', 'CN-CN', 'MCI-MCI', 'MCI-AD',
       'AD-AD', 'CN-CN', 'AD-AD', 'MCI-MCI', 'CN-CN', 'CN-CN', 'CN-CN',
       'CN-CN', 'CN-CN', 'MCI-AD', 'CN-CN', 'MCI-AD', 'MCI-AD', 'MCI-AD',
       'MCI-AD', 'MCI-MCI', 'CN-CN', 'MCI-MCI', 'MCI-MCI', 'CN-CN',
       'AD-AD', 'CN-CN', 'CN-CN', 'CN-CN', 'CN-MCI', 'MCI-MCI', 'CN-CN',
       'MCI-MCI', 'MCI-MCI', 'MCI-MCI', 'AD-AD', 'MCI-MCI', 'CN-CN',
       'CN-CN', 'MCI-MCI', 'CN-CN', 'AD-AD', 'CN-CN', 'CN-CN', 'CN-MCI',
       'MCI-MCI', 'MCI-MCI', 'AD-AD', 'AD-AD', 'MCI-AD', 'MCI-AD',
       'MCI-MCI', 'MCI-MCI', 'CN-CN', 'MCI-MCI', 'CN-CN', 'MCI-MCI',
       'MCI-AD', 'MCI-AD', 'CN-CN', 'MCI-MCI', 'CN-CN', 'MCI-AD', 'CN-CN',
       'CN-CN', 'CN-CN', 'CN-CN', 'MCI-AD', 'CN-CN', 'MCI-AD', 'AD-AD',
       'CN-CN', 'MCI-AD', 'AD-AD', 'MCI-AD', 'MCI-AD', 'MCI-MCI', 'CN-CN',
       'MCI-AD', 'AD-AD', 'MCI-AD', 'AD-AD', 'CN-CN', 'AD-AD', 'MCI-MCI',
       'AD-AD', 'MCI-AD', 'MCI-MCI', 'CN-CN', 'AD-AD', 'MCI-MCI',
      

In [11]:
data = pd.DataFrame({
    'RowId': np.arange(1, len(predictions) + 1), 
    'Result': predictions
})

#data.to_csv('decision_tree_predicts_maxdepth6.csv', index=False)