<a href="https://colab.research.google.com/github/mlan18/ML-AND-DS-ASSIGNMENT1/blob/main/Cart_Algo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

import numpy as np
import pandas as pd


# --- Step 1: Define the CART Class ---
class CARTDecisionTreeClassifier:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth
        self.tree = None


    # Calculate Gini index for a set of class labels
    def gini_index(self, y):
        classes, counts = np.unique(y, return_counts=True)
        probs = counts / counts.sum()
        return 1 - np.sum(probs ** 2)


    # Find the best binary split for one attribute
    def gini_split(self, X_column, y):
        values = np.unique(X_column)
        best_gini = 1.0
        best_split = None


        for v in values:
            left_mask = (X_column == v)
            right_mask = ~left_mask
            if left_mask.sum() == 0 or right_mask.sum() == 0:
                continue


            gini_left = self.gini_index(y[left_mask])
            gini_right = self.gini_index(y[right_mask])
            weighted_gini = (left_mask.sum()/len(y)) * gini_left + \
                            (right_mask.sum()/len(y)) * gini_right


            if weighted_gini < best_gini:
                best_gini = weighted_gini
                best_split = (v, left_mask, right_mask)


        return best_gini, best_split


    # Find the best feature to split on
    def best_attribute(self, X, y, features):
        best_gini = 1.0
        best_feature = None
        best_split = None


        for feature in features:
            gini, split = self.gini_split(X[:, feature], y)
            if split is not None and gini < best_gini:
                best_gini = gini
                best_feature = feature
                best_split = split


        return best_feature, best_split


    # Recursively build the decision tree
    def build_tree(self, X, y, features, depth=0):
        classes, counts = np.unique(y, return_counts=True)
        majority_class = classes[np.argmax(counts)]


        # Stopping conditions
        if len(classes) == 1:
            return classes[0]
        if len(features) == 0 or (self.max_depth is not None and depth >= self.max_depth):
            return majority_class


        best_feat, best_split = self.best_attribute(X, y, features)
        if best_feat is None:
            return majority_class


        v, left_mask, right_mask = best_split


        # Build left and right subtrees recursively
        tree = {
            "feature": best_feat,
            "value": v,
            "left": self.build_tree(X[left_mask], y[left_mask], features, depth + 1),
            "right": self.build_tree(X[right_mask], y[right_mask], features, depth + 1),
            "_majority": majority_class
        }


        return tree


    # Fit the model
    def fit(self, X, y):
        features = list(range(X.shape[1]))
        self.tree = self.build_tree(X, y, features)


    # Predict for a single instance
    def _predict_single(self, x, tree):
        if not isinstance(tree, dict):
            return tree
        feature = tree["feature"]
        value = tree["value"]
        if x[feature] == value:
            return self._predict_single(x, tree["left"])
        else:
            return self._predict_single(x, tree["right"])


    # Predict for multiple instances
    def predict(self, X):
        return np.array([self._predict_single(x, self.tree) for x in X])




# --- Step 2: Example Dataset ---
data = {
    'Age': ['Youth', 'Youth', 'Middle', 'Senior', 'Senior', 'Senior', 'Middle',
             'Youth', 'Youth', 'Senior', 'Youth', 'Middle', 'Middle', 'Senior'],
    'Income': ['High', 'High', 'High', 'Medium', 'Low', 'Low', 'Low',
               'Medium', 'Low', 'Medium', 'Medium', 'Medium', 'High', 'Medium'],
    'Student': ['No', 'No', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No'],
    'Credit': ['Fair', 'Excellent', 'Fair', 'Fair', 'Fair', 'Excellent', 'Excellent',
               'Fair', 'Fair', 'Fair', 'Excellent', 'Excellent', 'Fair', 'Excellent'],
    'Buy': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
}


df = pd.DataFrame(data)


# --- Step 3: Train the Model ---
X = df.drop('Buy', axis=1).values
y = df['Buy'].values


dt = CARTDecisionTreeClassifier()
dt.fit(X, y)


# --- Step 4: Display the Tree ---
print("CART Decision Tree Structure:")
print(dt.tree)


# --- Step 5: Make Predictions ---
test_samples = np.array([
    ['Middle', 'Medium', 'Yes', 'Fair'],
    ['Senior', 'Low', 'No', 'Excellent']
])


predictions = dt.predict(test_samples)
print("\nPredictions:")
for i, p in enumerate(predictions):
    print(f"Sample {i+1}: {p}")


CART Decision Tree Structure:
{'feature': 0, 'value': 'Middle', 'left': 'Yes', 'right': {'feature': 2, 'value': 'No', 'left': {'feature': 0, 'value': 'Senior', 'left': {'feature': 3, 'value': 'Excellent', 'left': 'No', 'right': 'Yes', '_majority': 'No'}, 'right': 'No', '_majority': 'No'}, 'right': {'feature': 3, 'value': 'Excellent', 'left': {'feature': 0, 'value': 'Senior', 'left': 'No', 'right': 'Yes', '_majority': 'No'}, 'right': 'Yes', '_majority': 'Yes'}, '_majority': 'No'}, '_majority': 'Yes'}

Predictions:
Sample 1: Yes
Sample 2: No
