In [39]:
import pandas as pd

file_path = "/content/Weather.csv"
df = pd.read_csv(file_path)

print("Dataset Loaded Successfully:\n")
print(df)


Dataset Loaded Successfully:

    Day   Outlook  Temp Humidity    Wind Decision
0     1     Sunny   Hot     High    Weak       No
1     2     Sunny   Hot     High  Strong       No
2     3  Overcast   Hot     High    Weak      Yes
3     4      Rain  Mild     High    Weak      Yes
4     5      Rain  Cool   Normal    Weak      Yes
5     6      Rain  Cool   Normal  Strong       No
6     7  Overcast  Cool   Normal  Strong      Yes
7     8     Sunny  Mild     High    Weak       No
8     9     Sunny  Cool   Normal    Weak      Yes
9    10      Rain  Mild   Normal    Weak      Yes
10   11     Sunny  Mild   Normal  Strong      Yes
11   12  Overcast  Mild     High  Strong      Yes
12   13  Overcast   Hot   Normal    Weak      Yes
13   14      Rain  Mild     High  Strong       No


In [40]:
import numpy as np
from collections import Counter

def entropy(data):
    labels = data.iloc[:, -1]
    label_counts = Counter(labels)
    total_instances = len(data)

    entropy_value = -sum((count / total_instances) * np.log2(count / total_instances) for count in label_counts.values())
    return entropy_value

dataset_entropy = entropy(df)
print(f"Entropy of the dataset: {dataset_entropy}")


Entropy of the dataset: 0.9402859586706311


In [41]:
ignore_columns = ["Day"]
attribute_entropies = {}

for attr in df.columns[:-1]:
    if attr in ignore_columns:
        continue

    attribute_entropies[attr] = entropy_of_attribute(df, attr)
    print(f"\nAttribute: {attr}")
    for val, ent in attribute_entropies[attr].items():
        print(f" - {val}: {ent}")




Attribute: Outlook
 - Sunny: 0.9709505944546686
 - Overcast: -0.0
 - Rain: 0.9709505944546686

Attribute: Temp
 - Hot: 1.0
 - Mild: 0.9182958340544896
 - Cool: 0.8112781244591328

Attribute: Humidity
 - High: 0.9852281360342515
 - Normal: 0.5916727785823275

Attribute: Wind
 - Weak: 0.8112781244591328
 - Strong: 1.0


In [42]:
def information_gain(data, attribute):
    total_entropy = entropy(data)

    attribute_values = data[attribute].unique()
    total_instances = len(data)

    weighted_entropy = 0
    for value in attribute_values:
        subset = data[data[attribute] == value]
        prob = len(subset) / total_instances
        weighted_entropy += prob * entropy(subset)

    gain = total_entropy - weighted_entropy
    return gain

attributes = df.columns[:-1]
gains = {attr: information_gain(df, attr) for attr in attributes}

print("\nInformation Gain for Each Attribute:")
for attr, gain in gains.items():
    print(f"{attr}: {gain}")



Information Gain for Each Attribute:
Day: 0.9402859586706311
Outlook: 0.24674981977443933
Temp: 0.02922256565895487
Humidity: 0.15183550136234159
Wind: 0.04812703040826949


In [43]:
def print_tree(tree, depth=0):
    """Recursively prints the decision tree row-wise with proper indentation."""
    if not isinstance(tree, dict):
        print("  " * depth + f"⮕ Decision: {tree}")
        return

    for attribute, branches in tree.items():
        print("  " * depth + f"[{attribute}]")
        for value, subtree in branches.items():
            print("  " * (depth + 1) + f"→ {value}:")
            print_tree(subtree, depth + 2)
print("\nDecision Tree (Row-wise):")
print_tree(decision_tree)




Decision Tree (Row-wise):
[Day]
  → 1:
    ⮕ Decision: No
  → 2:
    ⮕ Decision: No
  → 3:
    ⮕ Decision: Yes
  → 4:
    ⮕ Decision: Yes
  → 5:
    ⮕ Decision: Yes
  → 6:
    ⮕ Decision: No
  → 7:
    ⮕ Decision: Yes
  → 8:
    ⮕ Decision: No
  → 9:
    ⮕ Decision: Yes
  → 10:
    ⮕ Decision: Yes
  → 11:
    ⮕ Decision: Yes
  → 12:
    ⮕ Decision: Yes
  → 13:
    ⮕ Decision: Yes
  → 14:
    ⮕ Decision: No
