## Decision Tree using CART Algorithm

In [1]:
# Decision Tree using CART (Gini Impurity)

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np

# Load dataset
data = load_iris()
X, y = data.data, data.target

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# --- CART Implementation ---
class DecisionTreeCART:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    def gini(self, y):
        """Calculate Gini impurity"""
        classes = np.unique(y)
        n = len(y)
        gini = 1 - sum((np.sum(y == c) / n) ** 2 for c in classes)
        return gini

    def best_split(self, X, y):
        """Find the best split for the data"""
        best_gini = 1.0
        best_idx, best_thresh = None, None
        n_features = X.shape[1]

        for i in range(n_features):
            thresholds = np.unique(X[:, i])
            for t in thresholds:
                left_idx = X[:, i] <= t
                right_idx = X[:, i] > t
                if len(y[left_idx]) == 0 or len(y[right_idx]) == 0:
                    continue
                gini_left = self.gini(y[left_idx])
                gini_right = self.gini(y[right_idx])
                gini_total = (len(y[left_idx]) * gini_left + len(y[right_idx]) * gini_right) / len(y)
                if gini_total < best_gini:
                    best_gini = gini_total
                    best_idx = i
                    best_thresh = t
        return best_idx, best_thresh

    def build_tree(self, X, y, depth=0):
        """Recursively build the decision tree"""
        if len(np.unique(y)) == 1:
            return {'leaf': True, 'class': y[0]}
        if self.max_depth and depth >= self.max_depth:
            return {'leaf': True, 'class': np.bincount(y).argmax()}

        idx, thresh = self.best_split(X, y)
        if idx is None:
            return {'leaf': True, 'class': np.bincount(y).argmax()}

        left_idx = X[:, idx] <= thresh
        right_idx = X[:, idx] > thresh

        return {
            'leaf': False,
            'feature_index': idx,
            'threshold': thresh,
            'left': self.build_tree(X[left_idx], y[left_idx], depth + 1),
            'right': self.build_tree(X[right_idx], y[right_idx], depth + 1)
        }

    def fit(self, X, y):
        self.tree_ = self.build_tree(X, y)

    def predict_one(self, x, tree=None):
        if tree is None:
            tree = self.tree_
        if tree['leaf']:
            return tree['class']
        if x[tree['feature_index']] <= tree['threshold']:
            return self.predict_one(x, tree['left'])
        else:
            return self.predict_one(x, tree['right'])

    def predict(self, X):
        return np.array([self.predict_one(x) for x in X])


# --- Train and test ---
model = DecisionTreeCART(max_depth=3)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Accuracy
accuracy = np.sum(y_pred == y_test) / len(y_test)
print(f"Accuracy: {accuracy:.2f}")


Accuracy: 0.96


## Rule-Based Classification using OneR Algorithm

In [4]:
# OneR Algorithm Implementation

import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load dataset
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

class OneR:
    def fit(self, X, y):
        best_feature = None
        best_rules = None
        best_error = float('inf')

        for col in X.columns:
            rules = {}
            for val in X[col].unique():
                subset = y[X[col] == val]
                if len(subset) == 0:
                    continue
                rules[val] = subset.mode()[0]

            preds = X[col].map(rules)
            error = sum(preds != y)

            if error < best_error:
                best_error = error
                best_feature = col
                best_rules = rules

        self.best_feature = best_feature
        self.best_rules = best_rules

    def predict(self, X):
        preds = X[self.best_feature].map(self.best_rules)
        preds.fillna(preds.mode()[0], inplace=True)
        return preds


# --- Train and test ---
model = OneR()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Accuracy
accuracy = sum(y_pred == y_test) / len(y_test)
print(f"Best feature: {model.best_feature}")
print(f"Rules: {model.best_rules}")
print(f"Accuracy: {accuracy:.2f}")


Best feature: petal length (cm)
Rules: {np.float64(3.7): np.int64(1), np.float64(5.1): np.int64(2), np.float64(5.5): np.int64(2), np.float64(4.4): np.int64(1), np.float64(6.1): np.int64(2), np.float64(4.2): np.int64(1), np.float64(6.6): np.int64(2), np.float64(4.5): np.int64(1), np.float64(1.4): np.int64(0), np.float64(6.7): np.int64(2), np.float64(4.1): np.int64(1), np.float64(1.3): np.int64(0), np.float64(1.9): np.int64(0), np.float64(3.5): np.int64(1), np.float64(4.9): np.int64(1), np.float64(1.6): np.int64(0), np.float64(1.7): np.int64(0), np.float64(1.5): np.int64(0), np.float64(4.3): np.int64(1), np.float64(5.0): np.int64(2), np.float64(4.8): np.int64(2), np.float64(4.0): np.int64(1), np.float64(5.4): np.int64(2), np.float64(3.3): np.int64(1), np.float64(5.3): np.int64(2), np.float64(1.2): np.int64(0), np.float64(3.8): np.int64(1), np.float64(3.0): np.int64(1), np.float64(5.7): np.int64(2), np.float64(5.6): np.int64(2), np.float64(1.1): np.int64(0), np.float64(6.0): np.int64(2), 