## 1. Load data

In [1]:
from process import load_data, DatasetDirectory, prepare_dataset_v2, data_preprocessing

data = load_data(DatasetDirectory.BANK)

## 2. Prepare datasets

In [2]:
data_preprocessing(data=data, dataset_directory=DatasetDirectory.BANK)
feature_train, label_train, feature_test, label_test = prepare_dataset_v2(data, train_ratio=0.8, test_ratio=0.2,
                                                                           dataset_directory=DatasetDirectory.BANK)

## 3. Build the decision tree classifier

### 3.1 Build model

In [3]:
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(criterion="entropy", random_state=42, max_depth=4)

clf.fit(feature_train, label_train)

label_pred = clf.predict(feature_test)
print("Predicted labels:", label_pred)
print("Actual labels:", label_test)

Predicted labels: [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 0 0
 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 

### 3.2 Visualize the decision tree

In [None]:
# import graphviz
# from sklearn import tree
# import os
# dot_data = tree.export_graphviz(clf, out_file=None,
#                                  feature_names=[str(i) for i in range(feature_train.shape[1])],
#                                     class_names=[str(i) for i in np.unique(label_train)],
#                                     filled=True, rounded=True,
#                                     special_characters=True)

# graph = graphviz.Source(dot_data)
# directory = os.getcwd()
# graph.render(filename=os.path.join(directory, 'heart_disease_tree'), format='png', cleanup=True)
# print("Decision tree visualization saved as 'heart_disease_tree.png'")

import matplotlib.pyplot as plt
from sklearn.tree import plot_tree

plt.figure(figsize=(20,10))
plot_tree(clf, 
          feature_names=[str(i) for i in range(feature_train.shape[1])],
          class_names=[str(i) for i in np.unique(label_train)],
          filled=True, rounded=True)
plt.show()

## 4. Evaluating the decision tree classifiers

In [4]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

accuracy = accuracy_score(label_test, label_pred)
print(f"Accuracy: {accuracy:.2f}")

print("Classification Report:")
print(classification_report(label_test, label_pred))

print("Confusion Matrix:")
print(confusion_matrix(label_test, label_pred))

Accuracy: 0.90
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.97      0.94       807
           1       0.53      0.32      0.40        98

    accuracy                           0.90       905
   macro avg       0.73      0.64      0.67       905
weighted avg       0.88      0.90      0.88       905

Confusion Matrix:
[[780  27]
 [ 67  31]]
