<a href="https://colab.research.google.com/github/theperiperi/Machine-Learning-Sem-IV/blob/main/decision_trees.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# funtion based decision tree

In [None]:
# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the Iris dataset (a built-in dataset in scikit-learn)
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a decision tree classifier
clf = DecisionTreeClassifier()

# Train the classifier on the training data
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Evaluate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")


Accuracy: 1.0


# mathematical decision tree

In [None]:
import numpy as np

class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature = feature        # Index of feature to split on
        self.threshold = threshold    # Threshold value for the feature
        self.left = left              # Left subtree
        self.right = right            # Right subtree
        self.value = value            # Class label (for leaf nodes)

def entropy(y):
    # Calculate entropy of a set
    unique_classes, counts = np.unique(y, return_counts=True)
    probabilities = counts / len(y)
    return -np.sum(probabilities * np.log2(probabilities + 1e-10))

def information_gain(X, y, feature, threshold):
    # Calculate information gain for a split
    left_mask = X[:, feature] <= threshold
    right_mask = ~left_mask
    left_entropy = entropy(y[left_mask])
    right_entropy = entropy(y[right_mask])
    total_entropy = entropy(y)
    gain = total_entropy - (left_entropy * np.sum(left_mask) / len(y) + right_entropy * np.sum(right_mask) / len(y))
    return gain

def find_best_split(X, y):
    # Find the best feature and threshold to split on
    best_gain = 0
    best_feature = None
    best_threshold = None

    for feature in range(X.shape[1]):
        thresholds = np.unique(X[:, feature])
        for threshold in thresholds:
            gain = information_gain(X, y, feature, threshold)
            if gain > best_gain:
                best_gain = gain
                best_feature = feature
                best_threshold = threshold

    return best_feature, best_threshold

def build_decision_tree(X, y, depth=0, max_depth=None):
    # Recursively build the decision tree
    if depth == max_depth or len(np.unique(y)) == 1:
        # If max depth is reached or all instances have the same class, create a leaf node
        return Node(value=np.argmax(np.bincount(y)))

    feature, threshold = find_best_split(X, y)

    if feature is None:
        # If no split improves information gain, create a leaf node
        return Node(value=np.argmax(np.bincount(y)))

    left_mask = X[:, feature] <= threshold
    right_mask = ~left_mask

    left_subtree = build_decision_tree(X[left_mask], y[left_mask], depth + 1, max_depth)
    right_subtree = build_decision_tree(X[right_mask], y[right_mask], depth + 1, max_depth)

    return Node(feature=feature, threshold=threshold, left=left_subtree, right=right_subtree)

def predict_tree(node, instance):
    # Recursively traverse the tree to make predictions
    if node.value is not None:
        return node.value

    if instance[node.feature] <= node.threshold:
        return predict_tree(node.left, instance)
    else:
        return predict_tree(node.right, instance)

# Example usage with a simplified Iris dataset
# Features: sepal length, sepal width
# Labels: 0 - Setosa, 1 - Versicolor
X_iris = np.array([
    [5.1, 3.5],
    [4.9, 3.0],
    [5.8, 2.6],
    [6.7, 3.0],
    [5.6, 2.5],
    [5.5, 2.4],
    [6.1, 3.0],
    [6.2, 3.4]
])

y_iris = np.array([0, 0, 1, 1, 0, 0, 1, 1])

# Build the decision tree
iris_tree = build_decision_tree(X_iris, y_iris, max_depth=2)

# Make predictions on a new instance
new_instance = np.array([5.0, 3.2])
prediction = predict_tree(iris_tree, new_instance)
print("Predicted class:", prediction)


Predicted class: 0
