In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from utils import *

|                                                     |   Ear Shape | Face Shape | Whiskers |   Cat  |
|:---------------------------------------------------:|:---------:|:-----------:|:---------:|:------:|
| <img src="images/0.png" alt="drawing" width="50"/> |   Pointy   |   Round     |  Present  |    1   |
| <img src="images/1.png" alt="drawing" width="50"/> |   Floppy   |  Not Round  |  Present  |    1   |
| <img src="images/2.png" alt="drawing" width="50"/> |   Floppy   |  Round      |  Absent   |    0   |
| <img src="images/3.png" alt="drawing" width="50"/> |   Pointy   |  Not Round  |  Present  |    0   |
| <img src="images/4.png" alt="drawing" width="50"/> |   Pointy   |   Round     |  Present  |    1   |
| <img src="images/5.png" alt="drawing" width="50"/> |   Pointy   |   Round     |  Absent   |    1   |
| <img src="images/6.png" alt="drawing" width="50"/> |   Floppy   |  Not Round  |  Absent   |    0   |
| <img src="images/7.png" alt="drawing" width="50"/> |   Pointy   |  Round      |  Absent   |    1   |
| <img src="images/8.png" alt="drawing" width="50"/> |    Floppy  |   Round     |  Absent   |    0   |
| <img src="images/9.png" alt="drawing" width="50"/> |   Floppy   |  Round      |  Absent   |    0   |

In [7]:
# With one-hot encoding
x_train = np.array([[1, 1, 1],
[0, 0, 1],
 [0, 1, 0],
 [1, 0, 1],
 [1, 1, 1],
 [1, 1, 0],
 [0, 0, 0],
 [1, 1, 0],
 [0, 1, 0],
 [0, 1, 0]])

y_train = np.array([1, 1, 0, 0, 1, 1, 0, 1, 0, 0])

In [8]:
x_train[0]

array([1, 1, 1])

$$\text{Information Gain} = H(p_1^\text{node})- \left(w^{\text{left}}H\left(p_1^\text{left}\right) + w^{\text{right}}H\left(p_1^\text{right}\right)\right),$$

and $H$ is the entropy, defined as

$$H(p_1) = -p_1 \text{log}_2(p_1) - (1- p_1) \text{log}_2(1- p_1)$$

In [9]:
def entropy(p):
    if p == 0 or p == 1:
        return 0
    else:
        return -p * np.log2(p) - (1-p) * np.log2(1-p)
print(entropy(0.5))

1.0


In [10]:
# Information gain func
def split_indices(x, index_feature):
    right = []
    left = []
    for i, x in enumerate(x):
        if x[index_feature] == 1:
            left.append(i)
        else:
            right.append(i)
    return right, left

In [34]:
split_indices(x_train, 0)

([1, 2, 6, 8, 9], [0, 3, 4, 5, 7])

In [25]:
def weighted_entropy(x, y, right, left):
    w_right = len(right)/len(x) 
    w_left = len(left)/len(x)
    p_right = sum(y[right])/len(right)
    p_left = sum(y[left])/len(left)
    
    weighted_entropy = w_left * entropy(p_left) + w_right * entropy(p_right)
    return weighted_entropy

In [35]:
left, right = split_indices(x_train, 0)
weighted_entropy(x_train, y_train, right, left)

0.7219280948873623

In [27]:
def information_gain(x,y, right, left):
    p_node = sum(y)/len(y)
    h_node = entropy(p_node)
    w_entropy = weighted_entropy(x,y,right,left)
    return h_node - w_entropy

In [28]:
information_gain(x_train, y_train, right, left)

0.2780719051126377

In [42]:
for i, feature_name in enumerate(['Ear Shape', 'Face Shape', 'Whiskers']):
    right, left = split_indices(x_train, i)
    i_gain = information_gain(x_train, y_train, right, left)
    print(f"Feature: {feature_name}, information gain if we split the root node using this feature: {i_gain:.2f}")

Feature: Ear Shape, information gain if we split the root node using this feature: 0.28
Feature: Face Shape, information gain if we split the root node using this feature: 0.03
Feature: Whiskers, information gain if we split the root node using this feature: 0.12
