In [44]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [45]:
df = pd.read_csv('Cardiovascular_Disease_Dataset.csv')
df.head()

Unnamed: 0,patientid,age,gender,chestpain,restingBP,serumcholestrol,fastingbloodsugar,restingrelectro,maxheartrate,exerciseangia,oldpeak,slope,noofmajorvessels,target
0,103368,53,1,2,171,0,0,1,147,0,5.3,3,3,1
1,119250,40,1,0,94,229,0,1,115,0,3.7,1,1,0
2,119372,49,1,2,133,142,0,0,202,1,5.0,1,0,0
3,132514,43,1,0,138,295,1,1,153,0,3.2,2,2,1
4,146211,31,1,1,199,0,0,2,136,0,5.3,3,2,1


In [46]:
# DECISION TREE CLASS
class Node:
    def __init__(self, gini, num_samples, num_samples_per_class, predicted_class):
        self.gini = gini
        self.num_samples = num_samples
        self.num_samples_per_class = num_samples_per_class
        self.predicted_class = predicted_class
        self.feature_index = 0
        self.threshold = 0
        self.left = None
        self.right = None

class Classifier:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.n_classes_ = len(np.unique(y))
        self.n_features_ = X.shape[1]
        self.tree_ = self._grow_tree(X, y)

    def predict(self, X):
        return [self._predict(inputs) for inputs in X]

    def _predict(self, inputs):
        node = self.tree_
        while node.left:
            if inputs[node.feature_index] < node.threshold:
                node = node.left
            else:
                node = node.right
        return node.predicted_class

    def _grow_tree(self, X, y, depth=0):
        num_samples_per_class = [np.sum(y == i) for i in range(self.n_classes_)]
        predicted_class = np.argmax(num_samples_per_class)
        node = DecisionTreeNode(
            gini=self._gini(y),
            num_samples=len(y),
            num_samples_per_class=num_samples_per_class,
            predicted_class=predicted_class,
        )

        if depth < self.max_depth:
            idx, thr = self._best_split(X, y)
            if idx is not None:
                indices_left = X[:, idx] < thr
                X_left, y_left = X[indices_left], y[indices_left]
                X_right, y_right = X[~indices_left], y[~indices_left]
                node.feature_index = idx
                node.threshold = thr
                node.left = self._grow_tree(X_left, y_left, depth + 1)
                node.right = self._grow_tree(X_right, y_right, depth + 1)
        return node

    def _gini(self, y):
        m = len(y)
        return 1.0 - sum((np.sum(y == c) / m) ** 2 for c in range(self.n_classes_))

    # chatgpt assisted with this function
    def _best_split(self, X, y):
        m, n = X.shape
        if m <= 1:
            return None, None

        num_parent = [np.sum(y == c) for c in range(self.n_classes_)]

        best_gini = 1.0 - sum((num / m) ** 2 for num in num_parent)
        best_idx, best_thr = None, None

        for idx in range(n):
            thresholds, classes = zip(*sorted(zip(X[:, idx], y)))
            num_left = [0] * self.n_classes_
            num_right = num_parent.copy()
            for i in range(1, m):
                c = classes[i - 1]
                num_left[c] += 1
                num_right[c] -= 1
                gini_left = 1.0 - sum(
                    (num_left[x] / i) ** 2 for x in range(self.n_classes_)
                )
                gini_right = 1.0 - sum(
                    (num_right[x] / (m - i)) ** 2 for x in range(self.n_classes_)
                )
                gini = (i * gini_left + (m - i) * gini_right) / m
                if thresholds[i] == thresholds[i - 1]:
                    continue
                if gini < best_gini:
                    best_gini = gini
                    best_idx = idx
                    best_thr = (thresholds[i] + thresholds[i - 1]) / 2
                    
        return best_idx, best_thr

In [47]:
X = df.drop(columns=['target']).values
y = df['target'].values

In [48]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [49]:
clf = Classifier(max_depth=5)
clf.fit(X_train, y_train)

In [50]:
y_pred = clf.predict(X_test)

In [51]:
from sklearn.metrics import accuracy_score, f1_score
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("F1 Score:", f1)

Accuracy: 0.955
F1 Score: 0.9613733905579399


In [43]:
# manually printing decision tree
# formatting was done by chatgpt
def print_tree(node, feature_names, class_names, depth=0):
    indent = "  " * depth
    if node is not None:
        if node.feature_index != -1:
            feature_name = feature_names[node.feature_index]
            threshold = node.threshold
            print(f"{indent}if {feature_name} <= {threshold}:")
            print_tree(node.left, feature_names, class_names, depth + 1)
            print(f"{indent}else:")
            print_tree(node.right, feature_names, class_names, depth + 1)
        else:
            predicted_class = class_names[node.predicted_class]
            print(f"{indent}class: {predicted_class}")

print_tree(clf.tree_, df.columns[:-1], ['Heart Disease not Present', 'Heart Disease Present'])

if slope <= 1.5:
  if restingBP <= 159.0:
    if serumcholestrol <= 467.0:
      if serumcholestrol <= 66.0:
        if patientid <= 0:
        else:
      else:
        if chestpain <= 1.5:
          if patientid <= 0:
          else:
        else:
          if patientid <= 0:
          else:
    else:
      if patientid <= 0:
      else:
  else:
    if oldpeak <= 2.45:
      if slope <= 0.5:
        if patientid <= 0:
        else:
      else:
        if gender <= 0.5:
          if patientid <= 0:
          else:
        else:
          if patientid <= 0:
          else:
    else:
      if patientid <= 0:
      else:
else:
  if maxheartrate <= 85.0:
    if patientid <= 0:
    else:
  else:
    if noofmajorvessels <= 0.5:
      if gender <= 0.5:
        if age <= 78.0:
          if patientid <= 0:
          else:
        else:
          if patientid <= 0:
          else:
      else:
        if patientid <= 0:
        else:
    else:
      if restingBP <= 199.5:
        if restingBP <=