In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the dataset
df = pd.read_csv('/content/heart_disease_data (1).csv')

# Display the first few rows of the dataset
print(df.head())

# Separate features and target
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Define the TreeNode class
class TreeNode:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

# Define the DecisionTree class
class DecisionTree:
    def __init__(self, min_samples_split=2, max_depth=float('inf')):
        self.min_samples_split = min_samples_split
        self.max_depth = max_depth
        self.root = None

    def fit(self, X, y):
        self.root = self._build_tree(X, y)

    def _build_tree(self, X, y, depth=0):
        num_samples, num_features = X.shape
        if num_samples >= self.min_samples_split and depth <= self.max_depth:
            best_split = self._find_best_split(X, y, num_features)
            if best_split:
                left_X, left_y, right_X, right_y = self._split_dataset(X, y, best_split['feature'], best_split['threshold'])
                left_subtree = self._build_tree(left_X, left_y, depth + 1)
                right_subtree = self._build_tree(right_X, right_y, depth + 1)
                return TreeNode(best_split['feature'], best_split['threshold'], left_subtree, right_subtree)

        leaf_value = self._calculate_leaf_value(y)
        return TreeNode(value=leaf_value)

    def _find_best_split(self, X, y, num_features):
        best_split = {}
        best_impurity = float('inf')
        for feature in range(num_features):
            thresholds = np.unique(X[:, feature])
            for threshold in thresholds:
                left_X, left_y, right_X, right_y = self._split_dataset(X, y, feature, threshold)
                if len(left_y) > 0 and len(right_y) > 0:
                    impurity = self._calculate_impurity(left_y, right_y)
                    if impurity < best_impurity:
                        best_impurity = impurity
                        best_split = {"feature": feature, "threshold": threshold}
        return best_split if best_split else None

    def _calculate_impurity(self, left_y, right_y):
        m = len(left_y) + len(right_y)
        left_impurity = self._gini_impurity(left_y)
        right_impurity = self._gini_impurity(right_y)
        weighted_impurity = (len(left_y) / m) * left_impurity + (len(right_y) / m) * right_impurity
        return weighted_impurity

    def _gini_impurity(self, y):
        classes = np.unique(y)
        n = len(y)
        gini = 1.0
        for c in classes:
            gini -= (np.sum(y == c) / n) ** 2
        return gini

    def _calculate_leaf_value(self, y):
        return np.bincount(y).argmax()

    def _split_dataset(self, X, y, feature, threshold):
        left_indices = X[:, feature] < threshold
        right_indices = X[:, feature] >= threshold
        left_X, left_y = X[left_indices], y[left_indices]
        right_X, right_y = X[right_indices], y[right_indices]
        return left_X, left_y, right_X, right_y

    def predict(self, X):
        return np.array([self._predict(inputs) for inputs in X])

    def _predict(self, inputs):
        node = self.root
        while node.value is None:
            if inputs[node.feature] < node.threshold:
                node = node.left
            else:
                node = node.right
        return node.value

# Create and train the decision tree
tree = DecisionTree(min_samples_split=3, max_depth=3)
tree.fit(X, y)

# Make predictions
predictions = tree.predict(X)
print(predictions)

# Calculate accuracy
accuracy = np.sum(predictions == y) / len(y)
print(f'Accuracy: {accuracy:.4f}')


   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  
[1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 0
 1 1 0 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 1 1 1 0 0 0 0 1 0 0 0 0 1 0 0
 0 0 0