In [124]:
import numpy as np
import pandas as pd

In [125]:
dataset = pd.read_csv('play_tennis.csv', index_col=0)
y = dataset['play']
X = dataset.drop('play', axis=1)

In [126]:
y = y.to_numpy()
y

array(['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes',
       'Yes', 'Yes', 'Yes', 'No'], dtype=object)

In [127]:
X = X.to_numpy()
X

array([['Sunny', 'Hot', 'High', 'Weak'],
       ['Sunny', 'Hot', 'High', 'Strong'],
       ['Overcast', 'Hot', 'High', 'Weak'],
       ['Rain', 'Mild', 'High', 'Weak'],
       ['Rain', 'Cool', 'Normal', 'Weak'],
       ['Rain', 'Cool', 'Normal', 'Strong'],
       ['Overcast', 'Cool', 'Normal', 'Strong'],
       ['Sunny', 'Mild', 'High', 'Weak'],
       ['Sunny', 'Cool', 'Normal', 'Weak'],
       ['Rain', 'Mild', 'Normal', 'Weak'],
       ['Sunny', 'Mild', 'Normal', 'Strong'],
       ['Overcast', 'Mild', 'High', 'Strong'],
       ['Overcast', 'Hot', 'Normal', 'Weak'],
       ['Rain', 'Mild', 'High', 'Strong']], dtype=object)

In [218]:
class Node:
    def __init__(self, decision=None, attribute=None):
        """
        Create a node in the decision tree.
        """
        self.decision = decision
        self.attribute = attribute
        self.children = {}

In [249]:
class ID3DecisionTree:
    def __init__(self, max_depth=3, min_samples_split=0):
        """
        Initialize the decision tree.

        Parameters
        ----------
        max_depth : int
            The maximum depth of the tree.
        min_samples_split : int
            The minimum number of samples required to split an internal node.
        """
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.tree = None

    def _entropy(self, y):
        """
        Returns the entropy of a given set of labels.
        """
        _, counts = np.unique(y, return_counts=True)
        p = counts / len(y)
        return -np.sum(p * np.log2(p))
    
    def _information_gain(self, X, y, attribute):
        """
        Returns the information gain of a given attribute.
        """
        unique, counts = np.unique(X[:, attribute], return_counts=True)
        p = counts / len(X)
        entropy = np.sum(p * self._entropy(y[X[:, attribute] == unique[0]]))
        return self._entropy(y) - entropy
    
    def _best_attribute(self, X, y, attributes):
        """
        Returns the best attribute to split on and its index in the attributes array.
        """
        a = [self._information_gain(X, y, attribute) for attribute in attributes]
        return (np.argmax(a), attributes[np.argmax(a)])
    
    def _major_class(self, y):
        """
        Returns the most common class in a given set of labels.
        """
        unique, counts = np.unique(y, return_counts=True)
        return unique[np.argmax(counts)]

    def fit(self, X, y, features):
        """
        Build decision tree classifier.
        """
        self.tree = self._build_tree(X, y, features)

    def _build_tree(self, X, y, features, depth=0):
        """
        Recursively build the decision tree.
        """
        if np.unique(y).size == 1:
            return Node(y[0])
        
        if features.size == 0:
            return Node(self._major_class(y))
        
        if depth >= self.max_depth or X.size < self.min_samples_split:
            return Node(self._major_class(y))
        
        best_attr = self._best_attribute(X, y, features)
        node = Node(None, best_attr[1])

        for value in np.unique(X[:, best_attr]):
            subset_X = X[X[:, best_attr[1]] == value]
            subset_y = y[X[:, best_attr[1]] == value]

            if subset_X.size > 0:
                node.children[value] = self._build_tree(subset_X, subset_y, np.delete(features, best_attr[0]), depth=depth+1)
            else:
                node.children[value] = Node(self._major_class(y))

        return node
    
    def _predict(self, x, node):
        """
        Recursively predict the class of a given sample.
        """
        if node.decision is not None:
            return node.decision
        else:
            return self._predict(x, node.children[x[node.attribute]])

    def predict(self, X):
        """
        Predict classes for X samples.
        """
        return np.array([self._predict(x, self.tree) for x in X])
    
    def evaluate(self, X, y):
        """
        Evaluate the accuracy of the decision tree.
        """
        return np.sum(self.predict(X) == y) / len(y)
    
  

In [250]:
clf = ID3DecisionTree(max_depth=9)
clf.fit(X, y, np.arange(X.shape[1]))
clf.evaluate(X, y)

1.0