In [1]:
import pandas as pd
data = {
    "outlook": ['sunny', 'sunny', 'overcast', 'rainy', 'rainy', 'rainy', 'overcast', 'sunny', 'sunny', 'rainy', 'sunny', 'overcast', 'overcast', 'rainy'],
    "temperature": ['hot', 'hot', 'hot', 'mild', 'cool', 'cool', 'cool', 'mild', 'cool', 'mild', 'mild', 'mild', 'hot', 'mild'],
    "humidity": ['high', 'high', 'high', 'high', 'normal', 'normal', 'normal', 'high', 'normal', 'normal', 'normal', 'high', 'normal', 'high'],
    "wind": ['weak', 'strong', 'weak', 'weak', 'weak', 'strong', 'strong', 'weak', 'weak', 'weak', 'strong', 'strong', 'weak', 'strong'],
    "play": ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']
}
df = pd.DataFrame(data)
display(df)

Unnamed: 0,outlook,temperature,humidity,wind,play
0,sunny,hot,high,weak,no
1,sunny,hot,high,strong,no
2,overcast,hot,high,weak,yes
3,rainy,mild,high,weak,yes
4,rainy,cool,normal,weak,yes
5,rainy,cool,normal,strong,no
6,overcast,cool,normal,strong,yes
7,sunny,mild,high,weak,no
8,sunny,cool,normal,weak,yes
9,rainy,mild,normal,weak,yes


In [2]:
import math
# Entropy calculation
def entropy(target_col):
    values = target_col.value_counts(normalize=True)
    return -sum(p * math.log2(p) for p in values)

# Information Gain calculation
def information_gain(df, feature, target='play'):
    total_entropy = entropy(df[target])
    values = df[feature].unique()

    weighted_entropy = 0
    for value in values:
        subset = df[df[feature] == value]
        weight = len(subset) / len(df)
        subset_entropy = entropy(subset[target])
        weighted_entropy += weight * subset_entropy

    return total_entropy - weighted_entropy

# Calculate and print results
entropy_total = entropy(df['play'])
info_gains = {
    feature: information_gain(df, feature, target='play')
    for feature in ['outlook', 'temperature', 'humidity', 'wind']
}

print(f"Total Entropy: {entropy_total:.4f}")
for feature, ig in info_gains.items():
    print(f"Information Gain for {feature}: {ig:.4f}")

Total Entropy: 0.9403
Information Gain for outlook: 0.2467
Information Gain for temperature: 0.0292
Information Gain for humidity: 0.1518
Information Gain for wind: 0.0481



===== Step-by-Step Iterations =====


----------------------------------------
At ROOT:
Best Feature: outlook
Gains: {'outlook': 0.24674981977443933, 'temperature': 0.02922256565895487, 'humidity': 0.15183550136234159, 'wind': 0.04812703040826949}


----------------------------------------
At ROOT → outlook = sunny:
Best Feature: humidity
Gains: {'temperature': 0.5709505944546686, 'humidity': 0.9709505944546686, 'wind': 0.01997309402197489}

ROOT → outlook = sunny → humidity = high is pure → no
ROOT → outlook = sunny → humidity = normal is pure → yes
ROOT → outlook = overcast is pure → yes

----------------------------------------
At ROOT → outlook = rainy:
Best Feature: wind
Gains: {'temperature': 0.01997309402197489, 'humidity': 0.01997309402197489, 'wind': 0.9709505944546686}

ROOT → outlook = rainy → wind = weak is pure → yes
ROOT → outlook = rainy → wind = strong is pure → no

===== Final Decision Tree =====

[outlook]
→ sunny:
  [humidity]
  → high:
    → no
  → normal:
    → ye

In [13]:

data = [
    ['sunny', 'hot', 'high', 'weak', 'no'],
    ['sunny', 'hot', 'high', 'strong', 'no'],
    ['overcast', 'hot', 'high', 'weak', 'yes'],
    ['rainy', 'mild', 'high', 'weak', 'yes'],
    ['rainy', 'cool', 'normal', 'weak', 'yes'],
    ['rainy', 'cool', 'normal', 'strong', 'no'],
    ['overcast', 'cool', 'normal', 'strong', 'yes'],
    ['sunny', 'mild', 'high', 'weak', 'no'],
    ['sunny', 'cool', 'normal', 'weak', 'yes'],
    ['rainy', 'mild', 'normal', 'weak', 'yes'],
    ['sunny', 'mild', 'normal', 'strong', 'yes'],
    ['overcast', 'mild', 'high', 'strong', 'yes'],
    ['overcast', 'hot', 'normal', 'weak', 'yes'],
    ['rainy', 'mild', 'high', 'strong', 'no']
]

In [16]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

my_data = pd.DataFrame(data , columns=['outlook','temp','humidity','wind','play'])
my_data


Unnamed: 0,outlook,temp,humidity,wind,play
0,sunny,hot,high,weak,no
1,sunny,hot,high,strong,no
2,overcast,hot,high,weak,yes
3,rainy,mild,high,weak,yes
4,rainy,cool,normal,weak,yes
5,rainy,cool,normal,strong,no
6,overcast,cool,normal,strong,yes
7,sunny,mild,high,weak,no
8,sunny,cool,normal,weak,yes
9,rainy,mild,normal,weak,yes


In [18]:
LabelEncoder = LabelEncoder()

In [31]:

for i in my_data.columns:
    my_data[i] = LabelEncoder.fit_transform(my_data[i])

my_data





Unnamed: 0,outlook,temp,humidity,wind,play
0,2,1,0,1,0
1,2,1,0,0,0
2,0,1,0,1,1
3,1,2,0,1,1
4,1,0,1,1,1
5,1,0,1,0,0
6,0,0,1,0,1
7,2,2,0,1,0
8,2,0,1,1,1
9,1,2,1,1,1


In [None]:
from sklearn.tree import DecisionTreeClassifier
y=my_data["play"]
x = my_data.drop("play", axis=1)


In [23]:
DecisionTreeClassifier=DecisionTreeClassifier()


In [24]:
model = DecisionTreeClassifier.fit(x,y)

In [None]:
# pridiction1 = model.predict(x)
# suny hot normal weak
pridiction2 = model.predict([[2,1,1,1]])
pridiction2





array([1], dtype=int64)

In [26]:
y.values

array([0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0], dtype=int64)

In [32]:
data = [
    ['sunny', 'hot', 'high', 'weak', 'no'],
    ['sunny', 'hot', 'high', 'strong', 'no'],
    ['overcast', 'hot', 'high', 'weak', 'yes'],
    ['rainy', 'mild', 'high', 'weak', 'yes'],
    ['rainy', 'cool', 'normal', 'weak', 'yes'],
    ['rainy', 'cool', 'normal', 'strong', 'no'],
    ['overcast', 'cool', 'normal', 'strong', 'yes'],
    ['sunny', 'mild', 'high', 'weak', 'no'],
    ['sunny', 'cool', 'normal', 'weak', 'yes'],
    ['rainy', 'mild', 'normal', 'weak', 'yes'],
    ['sunny', 'mild', 'normal', 'strong', 'yes'],
    ['overcast', 'mild', 'high', 'strong', 'yes'],
    ['overcast', 'hot', 'normal', 'weak', 'yes'],
    ['rainy', 'mild', 'high', 'strong', 'no']
]


In [46]:
import pandas as pd
import numpy as np
my_data = pd.DataFrame(data , columns=['outlook','temp','humidity','wind','play'])
my_data

Unnamed: 0,outlook,temp,humidity,wind,play
0,sunny,hot,high,weak,no
1,sunny,hot,high,strong,no
2,overcast,hot,high,weak,yes
3,rainy,mild,high,weak,yes
4,rainy,cool,normal,weak,yes
5,rainy,cool,normal,strong,no
6,overcast,cool,normal,strong,yes
7,sunny,mild,high,weak,no
8,sunny,cool,normal,weak,yes
9,rainy,mild,normal,weak,yes


In [None]:
count = my_data['play'].value_counts().values
print(count)
probabilities = count / count.sum()

entropy = -np.sum(probabilities * np.log2(probabilities))
print(f"Entropy: {entropy}")






[9 5]
Entropy: 0.9402859586706311


In [54]:
sunny_rows = my_data.loc[my_data["outlook"] == "sunny"]
print(sunny_rows)


   outlook  temp humidity    wind play
0    sunny   hot     high    weak   no
1    sunny   hot     high  strong   no
7    sunny  mild     high    weak   no
8    sunny  cool   normal    weak  yes
10   sunny  mild   normal  strong  yes
