In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

- `X_train`: for each example, contains 3 features:
            - Ear Shape (1 if pointy, 0 otherwise)
            - Face Shape (1 if round, 0 otherwise)
            - Whiskers (1 if present, 0 otherwise)
            
- `y_train`: whether the animal is a cat
            - 1 if the animal is a cat
            - 0 otherwise

In [72]:
X_train = np.array([[1, 1, 1],
[0, 0, 1],
 [0, 1, 0],
 [1, 0, 1],
 [1, 1, 1],
 [1, 1, 0],
 [0, 0, 0],
 [1, 1, 0],
 [0, 1, 0],
 [0, 1, 0]])

y_train = np.array([1, 1, 0, 0, 1, 1, 0, 1, 0, 0])

In [73]:
def entropy(p):
    if p == 0 or p == 1:
        return 0
    else:
        return -p * np.log2(p) - (1- p)*np.log2(1 - p)
    
print(entropy(0.5))

1.0


In [85]:
def split_features(X, index_features):
    """
        Given a dataset and a index feature, return two lists for the two split nodes, 
        the left node has the animals with features = 1 and 
        the right has the animals with the feature = 0

        index feature = 0 => ear shape
        index feature = 1 => face shape
        index feature = 2 => whisker shape
    
    """
    
    left_indices = np.where(X[:, index_features] == 1)
    right_indics = np.where(X[:, index_features] == 0)

    return left_indices[0], right_indics[0]

In [86]:
def weighted_entropy(X,y,left_indices,right_indices):
    """
    This function takes the splitted dataset, the indices we chose to split and returns the weighted entropy.
    """
    w_left = len(left_indices)/len(X)
    w_right = len(right_indices)/len(X)
    p_left = sum(y[left_indices])/len(left_indices)
    p_right = sum(y[right_indices])/len(right_indices)
    
    weighted_entropy = w_left * entropy(p_left) + w_right * entropy(p_right)
    return weighted_entropy

In [87]:
left_indices, right_indices = split_features(X_train, 0)
weighted_entropy(X_train, y_train, left_indices, right_indices)

0.7219280948873623

In [95]:
def information_gain(X,y, left_indices, right_indices):
    """
    Here, X has the elements in the node and y is theirs respectives classes
    """
    p_node = np.sum(y)/len(y)
    h_node = entropy(p_node)
    w_entropy = weighted_entropy(X, y, left_indices, right_indices)
    return h_node - w_entropy

In [96]:
information_gain(X_train, y_train, left_indices, right_indices)

0.2780719051126377

In [104]:
gains = []
features_set = ['Ear Shape', 'Face Shape', 'Whiskers']
for i, feature_name in enumerate(features_set):
    
    left_indices, right_indices = split_indices(X_train, i)
    i_gain = information_gain(X_train, y_train, left_indices, right_indices)
    gains.append(i_gain)
    print(f"Feature: {feature_name}, information gain if we split the root node using this feature: {i_gain:.2f}")
    

Feature: Ear Shape, information gain if we split the root node using this feature: 0.28
Feature: Face Shape, information gain if we split the root node using this feature: 0.03
Feature: Whiskers, information gain if we split the root node using this feature: 0.12


In [105]:
def select_node(list_gains):
    index_of_max_i_gains = np.argmax(np.array(gains))
    return [features_set[index_of_max_i_gains], gains[index_of_max_i_gains]]

In [106]:
select_node(gains)

['Ear Shape', 0.2780719051126377]