In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris



In [2]:
dataset=load_iris()
x=dataset.data
y=dataset.target

In [3]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

In [4]:
# making a function to calculate the the entropy
def calculate_entropy(y):
  classes=np.unique(y)
  entropy=0
  for i in classes:
    p=np.sum(y==i)/ len(y)
    entropy+=-p*np.log2(p)
  return entropy

In [5]:
# Now calculating the information gain
def calculate_information_gain(x,y,feature,threshold):
  parent_entropy=calculate_entropy(y)
  left_indices=x[:,feature]<=threshold
  right_indices=x[:,feature]>threshold
  left_entropy=calculate_entropy(y[left_indices])
  right_entropy=calculate_entropy(y[right_indices])
  left_weight=len(y[left_indices])/len(y)
  right_weight=len(y[right_indices])/len(y)
  return parent_entropy-(left_weight*left_entropy+right_weight*right_entropy)
  
  

In [6]:
class Decision_tree:
  def __init__(self,max_depth=None):
    self.max_depth=max_depth
    self.tree=None
  def fit(self,x,y):
    self.tree=self.build_tree(x,y,depth=0)
  def build_tree(self,x,y,depth):
    if depth==self.max_depth or len(np.unique(y))==1:
      return np.bincount(y).argmax()
    best_feature=None
    best_threshold=None
    best_information_gain=-np.inf
    for feature in range(x.shape[1]):
      thresholds=np.unique(x[:,feature])
      for threshold in thresholds:
        information_gain=calculate_information_gain(x,y,feature,threshold)
        if information_gain>best_information_gain:
          best_feature=feature
          best_threshold=threshold
          best_information_gain=information_gain
    left_indices=x[:,best_feature]<=best_threshold
    right_indices=x[:,best_feature]>best_threshold
    if best_information_gain > 0 and (self.max_depth is None or depth < self.max_depth):
      left_subtree=self.build_tree(x[left_indices],y[left_indices],depth+1)
      right_subtree=self.build_tree(x[right_indices],y[right_indices],depth+1)
      return {'feature':best_feature,'threshold':best_threshold,'left':left_subtree,'right':right_subtree}
    else:
      return np.bincount(y).argmax()
  def predict(self,x):
    return np.array([self.traverse_tree(x_i,self.tree) for x_i in x])
  def traverse_tree(self,inputs,node):
    while isinstance(node, dict) and 'feature' in node:
      if inputs[node['feature']]<=node['threshold']:
        node=node['left']
      else:
        node=node['right']
    return node

In [7]:
# training and evaluating decision Tree
tree=Decision_tree()
tree.fit(x_train, y_train)
y_pred=tree.predict(x_test)
accuracy=np.sum(y_pred == y_test)/len(y_test)
print("Accuracy:", accuracy)

Accuracy: 0.9
