# **Libraries Import**

In [1]:
import pandas as pd
import numpy as np


## **Dataset Create**

In [2]:
data = {
    'Outlook': ['Sunny','Sunny','Overcast','Rain','Rain','Rain','Overcast'],
    'Temperature': ['Hot','Hot','Hot','Mild','Cool','Cool','Mild'],
    'Humidity': ['High','High','High','High','Normal','Normal','High'],
    'Wind': ['Weak','Strong','Weak','Weak','Weak','Strong','Strong'],
    'PlayTennis': ['No','No','Yes','Yes','Yes','No','Yes']   # Target column
}

df = pd.DataFrame(data)
df


Unnamed: 0,Outlook,Temperature,Humidity,Wind,PlayTennis
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes
5,Rain,Cool,Normal,Strong,No
6,Overcast,Mild,High,Strong,Yes


# **Entropy Function**

In [3]:
def entropy(col):
    values, counts = np.unique(col, return_counts=True)
    ent = 0
    for i in range(len(values)):
        p = counts[i] / np.sum(counts)
        ent -= p * np.log2(p)
    return ent


# **Information Gain**

In [4]:
def information_gain(data, feature, target):
    total_ent = entropy(data[target])
    values, counts = np.unique(data[feature], return_counts=True)

    weighted_ent = 0
    for i in range(len(values)):
        subset = data[data[feature] == values[i]]
        weighted_ent += (counts[i] / np.sum(counts)) * entropy(subset[target])

    return total_ent - weighted_ent


# **Best Feature Select**

In [5]:
def best_feature(data, target):
    gains = {}
    for col in data.columns:
        if col != target:
            gains[col] = information_gain(data, col, target)
    return max(gains, key=gains.get)


# **ID3 Tree Builder**

In [6]:
def id3(data, target):
    # If all labels are same → return that label
    if len(np.unique(data[target])) == 1:
        return np.unique(data[target])[0]

    # If no feature left → return most common label
    if len(data.columns) == 1:
        return data[target].mode()[0]

    best = best_feature(data, target)
    tree = {best: {}}

    for value in np.unique(data[best]):
        sub_data = data[data[best] == value].drop(columns=[best])
        tree[best][value] = id3(sub_data, target)

    return tree


# **Tree Generate**

In [7]:
tree = id3(df, "PlayTennis")
tree


{'Outlook': {'Overcast': 'Yes',
  'Rain': {'Wind': {'Strong': 'No', 'Weak': 'Yes'}},
  'Sunny': 'No'}}