* d_utv - Dataset's Unique Value
* d_utvf - Dataset's Unique Value Frequencies
* d_mftv - Dataset's Most Frequent Target Value


Inside ID3 Function:
* mftv - Most Frequent Target Value of Parent's Data
* utv - Data's Unique Target Value
* utvf - Data's Unique Target Value Frequencies
* ig - Information Gains
* sf - sub-feature

Inside get_weighted_entropy function:
* uf - Unique Features
* uff - Unique Feature's Frequencies

In [290]:

import numpy as np
import pandas as pd
from pprint import pprint

dataset = pd.read_csv("data.csv").iloc[:]
features = dataset.columns[:-1].tolist()
target = 'Target'
d_mftv = None

In [291]:
def main():
    global d_mftv
    d_utv, d_utvf, d_mftv = get_uniqueValues_and_frequencies_and_mostFrequentUniqueValue(dataset[target])

    pprint(ID3(dataset, features, d_mftv))

In [292]:
def ID3(data, features, most_freq_target_value):
    
    utv, utv_f, mftv = get_uniqueValues_and_frequencies_and_mostFrequentUniqueValue(data[target])
    
    if len(utv) <= 1: return utv[0]
    elif len(data) == 0: return d_mftv
    elif len(features) == 0: return most_freq_target_value
    
    else:

        #calculate information gain
        ig = []
        for feature in features:
            total_entropy = entropy(data[target])
            weighted_entropy = get_weighted_entropy(data, feature)
            ig.append(total_entropy - weighted_entropy)
        
        #create tree
        best_feature = features[np.argmax(ig)]
        tree = {best_feature:{}}
        features.remove(best_feature)
        
        #create sub_features_table_and_sub_tree
        sub_features = np.unique(data[best_feature])
        for sf in sub_features:
            sub_table = data.where(data[best_feature] == sf).dropna()
            sub_tree = ID3(sub_table, features, mftv)
            tree[best_feature][sf] = sub_tree
            
        return tree

In [293]:
def get_uniqueValues_and_frequencies_and_mostFrequentUniqueValue(column):
    uv, uvf = np.unique(column, return_counts=True)
    mfv = uv[np.argmax(uvf)]
    return [uv, uvf, mfv]

In [294]:
def entropy(column):
    values, frequencies = np.unique(column, return_counts=True)
    entropy = 0
    for i in range(len(values)):
        ratio = frequencies[i] / np.sum(frequencies)
        entropy += (-ratio)*np.log2(ratio)
    return entropy

In [295]:
def get_weighted_entropy(data, feature):
    uf, uff = np.unique(data[feature], return_counts=True)
    weighted_entropy = 0
    for i, frequency in enumerate(uff):
        ratio = frequency/np.sum(uff)
        table = data.where(data[feature] == uf[i]).dropna()
        table_entropy = entropy(table[target])
        weighted_entropy += ratio * table_entropy
    return weighted_entropy

In [296]:
if __name__ == '__main__':
    main()

{'Outlook': {'overcast': 'yes',
             'rain': {'Wind': {'strong': 'no', 'weak': 'yes'}},
             'sunny': {'Humidity': {'high': 'no', 'normal': 'yes'}}}}
