## Loading Irish dataset

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from collections import Counter

In [None]:
# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target



In [None]:
# Split dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Gini Impurity function
def gini_impurity(y):
    class_counts = Counter(y)
    impurity = 1
    total_count = len(y)
    for count in class_counts.values():
        prob = count / total_count
        impurity -= prob ** 2
    return impurity

In [None]:
# Information Gain function (uses Gini Impurity)
def information_gain(y, y_left, y_right):
    # Calculate the impurity for the left and right splits
    p_left = len(y_left) / len(y)
    p_right = len(y_right) / len(y)
    return gini_impurity(y) - p_left * gini_impurity(y_left) - p_right * gini_impurity(y_right)

## Creting Decision Tree Node Class

In [None]:

class DecisionTreeNode:
    def __init__(self, gini=None, num_samples=None, num_samples_per_class=None, predicted_class=None, feature_index=None, threshold=None):
        self.gini = gini
        self.num_samples = num_samples
        self.num_samples_per_class = num_samples_per_class
        self.predicted_class = predicted_class
        self.feature_index = feature_index
        self.threshold = threshold
        self.left = None
        self.right = None

## Creating DecisionTreeClassifier class to include all operations

In [None]:
# Decision Tree Classifier Class
class DecisionTreeClassifierFromScratch:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth
        self.tree = None

    # Recursive function to build the tree
    def _build_tree(self, X, y, depth=0):
        num_samples = len(y)
        num_samples_per_class = [np.sum(y == i) for i in np.unique(y)]
        predicted_class = np.argmax(num_samples_per_class)

        node = DecisionTreeNode(
            gini=gini_impurity(y),
            num_samples=num_samples,
            num_samples_per_class=num_samples_per_class,
            predicted_class=predicted_class
        )

        # Stopping criteria: max depth or pure node
        if depth >= self.max_depth or node.gini == 0:
            return node

        # Find the best split
        best_gain = -1
        best_split = None
        for feature_index in range(X.shape[1]):
            # Get all unique values of the feature to split
            thresholds = np.unique(X[:, feature_index])
            for threshold in thresholds:
                # Split the dataset
                left_mask = X[:, feature_index] <= threshold
                right_mask = ~left_mask
                y_left, y_right = y[left_mask], y[right_mask]

                # If there are no samples in the split, skip it
                if len(y_left) == 0 or len(y_right) == 0:
                    continue

                # Compute information gain
                gain = information_gain(y, y_left, y_right)
                if gain > best_gain:
                    best_gain = gain
                    best_split = (feature_index, threshold)

        if best_gain == -1:
            return node

        feature_index, threshold = best_split
        left_mask = X[:, feature_index] <= threshold
        right_mask = ~left_mask
        y_left, y_right = y[left_mask], y[right_mask]

        # Build left and right subtrees recursively
        node.feature_index = feature_index
        node.threshold = threshold
        node.left = self._build_tree(X[left_mask], y_left, depth + 1)
        node.right = self._build_tree(X[right_mask], y_right, depth + 1)

        return node

    # Fit the model to the training data
    def fit(self, X, y):
        self.tree = self._build_tree(X, y)

    # Predict for a single sample
    def _predict_one(self, x, node):
        if node.left is None and node.right is None:
            return node.predicted_class
        if x[node.feature_index] <= node.threshold:
            return self._predict_one(x, node.left)
        else:
            return self._predict_one(x, node.right)

    # Predict for all samples
    def predict(self, X):
        return [self._predict_one(x, self.tree) for x in X]

In [None]:
# Train the decision tree model
model = DecisionTreeClassifierFromScratch(max_depth=3)
model.fit(X_train, y_train)

## Performing Evaluation of model

In [None]:
# Evaluate the model on the test set
y_pred = model.predict(X_test)
accuracy = np.mean(y_pred == y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 42.22%


## This scratch approach gives 42% accuracy