In [None]:
import numpy as np


In [None]:
import numpy as np
class CustomDecesionTree:
  def __init__(self,max_depth=None):
    self.max_depth= max_depth
    self.tree= None

  def fit(self,X,y):
    self.tree= self._build_tree(X,y)
  def _build_tree(self,X,y,depth=0):
      num_samples,num_features= X.shape
      unique_classes= np.unique(y)

      if len(unique_classes)==1:
        return {'class':unique_classes[0]}
      if num_samples == 0 or (self.max_depth and depth >= self.max_depth):
        return {'class':np.bincount(y).argmax()}

      best_info_gain= -float('inf')
      best_split= None
      for feature_idx in range(num_features):
        thresholds= np.unique(X[:,feature_idx])
        for threshold in thresholds:
          left_mask= X[:,feature_idx]<=threshold
          right_mask= ~left_mask
          left_y= y[left_mask]
          right_y= y[right_mask]
          info_gain= self._information_gain(y,left_y, right_y)
          if info_gain > best_info_gain:
            best_info_gain = info_gain
            best_split = {
                "feature_idx": feature_idx,
                 "threshold": threshold,
                    "left_y": left_y,
                    "right_y": right_y,
                        }
      if best_split is None:
        return {'class':np.bincount(y).argmax()}
      left_tree = self._build_tree(X[best_split['left_y']], best_split['left_y'], depth + 1)
      right_tree = self._build_tree(X[best_split['right_y']], best_split['right_y'], depth + 1)
      return {'feature_idx': best_split['feature_idx'], 'threshold': best_split['threshold'], 'left_tree': left_tree, 'right_tree': right_tree}

  def _information_gain(self,parent,left,right):
    parent_entropy = self._entropy(parent)
    left_entropy = self._entropy(left)
    right_entropy = self._entropy(right)
    weighted_avg_entropy = (len(left) / len(parent)) * left_entropy + (len(right) / len(parent)) *right_entropy
    return parent_entropy - weighted_avg_entropy

  def _entropy(self,y):
    class_probs= np.bincount(y)/len(y)
    return -np.sum(class_probs * np.log2(class_probs + 1e-9))
  def predict(self, X):
      """
      Predicts the target labels for the given test data based on the trained decision tree.
      Parameters:
      X (array-like): Feature matrix (n_samples, n_features) for prediction.
      Returns:
      list: A list of predicted target labels (n_samples,).
      """
      return [self._predict_single(x, self.tree) for x in X];

  def _predict_single(self,x,tree):
    if 'class' in tree:
      return tree['class']
    feature_val= x[tree['feature_idx']]
    if feature_val <= tree ['threshold']:
      return self._predict_single(x,tree['left_tree'])
    else:
      return self._predict_single(x,tree['right_tree'])




In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load data
data = load_iris()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Custom tree
custom_tree = CustomDecesionTree(max_depth=3)
custom_tree.fit(X_train, y_train)
y_pred_custom = custom_tree.predict(X_test)
accuracy_custom = accuracy_score(y_test, y_pred_custom)
print(f"Custom Decision Tree Accuracy: {accuracy_custom:.4f}")

# Scikit-learn tree
sklearn_tree = DecisionTreeClassifier(max_depth=3, random_state=42)
sklearn_tree.fit(X_train, y_train)
y_pred_sklearn = sklearn_tree.predict(X_test)
accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn)
print(f"Scikit-learn Decision Tree Accuracy: {accuracy_sklearn:.4f}")

print("Accuracy Comparison:")
print(f"Custom Decision Tree: {accuracy_custom:.4f}")
print(f"Scikit-learn Decision Tree: {accuracy_sklearn:.4f}")

Custom Decision Tree Accuracy: 0.8000
Scikit-learn Decision Tree Accuracy: 1.0000
Accuracy Comparison:
Custom Decision Tree: 0.8000
Scikit-learn Decision Tree: 1.0000


In [None]:
# Necessary Imports
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
# Load the Iris dataset
data = load_iris()
X = data.data
y = data.target
# Split into training and test sets (80% training, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train the custom decision tree
custom_tree = CustomDecesionTree(max_depth=3)
custom_tree.fit(X_train, y_train)
# Predict on the test set
y_pred_custom = custom_tree.predict(X_test)
# Calculate accuracy
accuracy_custom = accuracy_score(y_test, y_pred_custom)
print(f"Custom Decision Tree Accuracy: {accuracy_custom:.4f}")

# Train the Scikit-learn decision tree
sklearn_tree = DecisionTreeClassifier(max_depth=3, random_state=42)
sklearn_tree.fit(X_train, y_train)
# Predict on the test set
y_pred_sklearn = sklearn_tree.predict(X_test)
# Calculate accuracy
accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn)
print(f"Scikit-learn Decision Tree Accuracy: {accuracy_sklearn:.4f}")


print(f"Accuracy Comparison:")
print(f"Custom Decision Tree: {accuracy_custom:.4f}")
print(f"Scikit-learn Decision Tree: {accuracy_sklearn:.4f}")

Custom Decision Tree Accuracy: 0.8000
Scikit-learn Decision Tree Accuracy: 1.0000
Accuracy Comparison:
Custom Decision Tree: 0.8000
Scikit-learn Decision Tree: 1.0000
