<a href="https://colab.research.google.com/github/tirth4077-byte/ML-Lab/blob/main/mlpr5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import pandas as pd
import numpy as np
from math import log2

dataset = {
    'Outlook': ['Sunny','Sunny','Overcast','Rain','Rain','Rain','Overcast',
                'Sunny','Sunny','Rain','Sunny','Overcast','Overcast','Rain'],
    'Temp': ['Hot','Hot','Hot','Mild','Cool','Cool','Cool',
             'Mild','Cool','Mild','Mild','Mild','Hot','Mild'],
    'Humidity': ['High','High','High','High','Normal','Normal','Normal',
                 'High','Normal','Normal','Normal','High','Normal','High'],
    'Wind': ['Weak','Strong','Weak','Weak','Weak','Strong','Strong',
             'Weak','Weak','Weak','Strong','Strong','Weak','Strong'],
    'Play': ['No','No','Yes','Yes','Yes','No','Yes',
             'No','Yes','Yes','Yes','Yes','Yes','No']
}

df = pd.DataFrame(dataset)
print(df)

     Outlook  Temp Humidity    Wind Play
0      Sunny   Hot     High    Weak   No
1      Sunny   Hot     High  Strong   No
2   Overcast   Hot     High    Weak  Yes
3       Rain  Mild     High    Weak  Yes
4       Rain  Cool   Normal    Weak  Yes
5       Rain  Cool   Normal  Strong   No
6   Overcast  Cool   Normal  Strong  Yes
7      Sunny  Mild     High    Weak   No
8      Sunny  Cool   Normal    Weak  Yes
9       Rain  Mild   Normal    Weak  Yes
10     Sunny  Mild   Normal  Strong  Yes
11  Overcast  Mild     High  Strong  Yes
12  Overcast   Hot   Normal    Weak  Yes
13      Rain  Mild     High  Strong   No


In [14]:
def entropy(target_col):
  values, counts = np.unique(target_col, return_counts=True)
  entropy_value = 0
  for i in range(len(values)):
    p = counts[i] / sum(counts)
    entropy_value -=p * log2(p)
  return entropy_value
print("Entropy of dataset:", entropy(df['Play']))

Entropy of dataset: 0.9402859586706311


In [20]:
def information_gain(data, feature, target="Play"):

    total_entropy = entropy(data[target])

    values, counts = np.unique(data[feature], return_counts=True)

    weighted_entropy = 0

    for i in range(len(values)):
        subset = data[data[feature] == values[i]]
        weighted_entropy += (counts[i] / sum(counts)) * entropy(subset[target])

    return total_entropy - weighted_entropy

for col in df.columns[:-1]:
    print(col, ":", information_gain(df, col))


Outlook : 0.24674981977443933
Temp : 0.02922256565895487
Humidity : 0.15183550136234159
Wind : 0.04812703040826949


In [23]:
def id3(data, original_data, features, target="Play", parent_node=None):

    # If all target values same → return that value
    if len(np.unique(data[target])) <= 1:
        return np.unique(data[target])[0]

    # If dataset empty → return majority class of original dataset
    elif len(data) == 0:
        return np.unique(original_data[target])[np.argmax(
            np.unique(original_data[target], return_counts=True)[1])]

    # If no features left → return parent node
    elif len(features) == 0:
        return parent_node

    else:
        parent_node = np.unique(data[target])[np.argmax(
            np.unique(data[target], return_counts=True)[1])]

        # Select best feature
        gains = [information_gain(data, feature, target) for feature in features]
        best_feature = features[np.argmax(gains)]

        tree = {best_feature: {}}

        features = [f for f in features if f != best_feature]

        for value in np.unique(data[best_feature]):
            sub_data = data[data[best_feature] == value]
            subtree = id3(sub_data, original_data, features, target, parent_node)
            tree[best_feature][value] = subtree

        return tree


In [24]:
features = df.columns[:-1].tolist()
tree = id3(df, df, features)

print("Decision Tree:")
print(tree)

Decision Tree:
{'Outlook': {'Overcast': 'Yes', 'Rain': {'Wind': {'Strong': 'No', 'Weak': 'Yes'}}, 'Sunny': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}}}


In [25]:
def predict(query, tree):
    for key in list(query.keys()):
        if key in tree:
            value = query[key]
            subtree = tree[key][value]

            if isinstance(subtree, dict):
                return predict(query, subtree)
            else:
                return subtree


In [27]:
new_sample = {
    'Outlook': 'Rain',
    'Temp': 'Mild',
    'Humidity': 'High',
    'Wind': 'Weak'
}

result = predict(new_sample, tree)
print("Prediction:", result)


Prediction: Yes
