In [None]:
## DECISION TREE FOR ALL FEATURES

# IMPORTING LIBRARIES
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, roc_auc_score, plot_roc_curve
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import auc
import numpy as np

# Fitting the Decision Tree using the Training set with all features
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

# GRID SEARCH FOR DECISION TREE

# define the range of hyperparameters to search over
param_grid = {'max_depth': [5,10,15],
              'min_samples_split': [2, 5, 10],
              'min_samples_leaf': [1, 2, 4],
              'criterion': ['gini', 'entropy']}

# define the grid search object
dt_grid_search = GridSearchCV(estimator=dt, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1, verbose=1)

# fit the grid search to the data
dt_grid_search.fit(X_train, y_train)

# print the best hyperparameters and corresponding accuracy
print("Best hyperparameters: ", dt_grid_search.best_params_)
print("Best accuracy: {:.2f}".format(dt_grid_search.best_score_))
y_pred_dt = dt_grid_search.predict(X_val)

# Plot the confusion matrix
cm = confusion_matrix(y_val, y_pred_dt)
sns.heatmap(cm, annot=True, cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()

# Plot ROC & AUC
y_prob = dt_grid_search.predict_proba(X_val)[:,1]
fpr, tpr, thresholds = roc_curve(y_val, y_prob)
roc_auc = auc(fpr, tpr)

plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (AUC = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

# Predict using the best hyperparameters found from the grid search
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(**dt_grid_search.best_params_, random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_val)

# Print classification report and confusion matrix
print(classification_report(y_val, y_pred_dt))
cm = confusion_matrix(y_val, y_pred_dt)
sns.heatmap(cm, annot=True, cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()

# Plot ROC & AUC
y_prob = dt.predict_proba(X_val)[:,1]
fpr, tpr, thresholds = roc_curve(y_val, y_prob)
roc_auc = auc(fpr, tpr)

plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (AUC = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()
