## 🚀 Program 3

### 📋 Objective

##### Write a program to demonstrate the working of the decision tree based ID3 algorithm. Use an appropriate data set for building the decision tree and apply this knowledge to classify a new sample. 

In [None]:
# Import necessary libraries
import pandas as pd
import math
from collections import Counter

In [37]:
# Load dataset
df = pd.read_csv("tennis.csv")
features = list(df.columns[:-1])
data = df.values.tolist()

In [38]:
# Calculate entropy of the dataset
def entropy(rows):
    labels = [row[-1] for row in rows]
    total = len(labels)
    counts = Counter(labels)
    return -sum((c / total) * math.log2(c / total) for c in counts.values())

In [39]:
# Split data by attribute value
def split(rows, index):
    splits = {}
    for row in rows:
        key = row[index]
        splits.setdefault(key, []).append(row)
    return splits

In [40]:
# Information Gain
def info_gain(rows, index):
    total_entropy = entropy(rows)
    splits = split(rows, index)
    weighted_entropy = sum((len(s) / len(rows)) * entropy(s) for s in splits.values())
    return total_entropy - weighted_entropy

In [41]:
# ID3 algorithm
def id3(rows, feature_names):
    labels = [row[-1] for row in rows]
    if labels.count(labels[0]) == len(labels):  # Pure node
        return labels[0]
    if not feature_names:  # No features left
        return Counter(labels).most_common(1)[0][0]

    # Best feature selection
    gains = [info_gain(rows, i) for i in range(len(feature_names))]
    best_idx = gains.index(max(gains))
    best_feat = feature_names[best_idx]

    tree = {best_feat: {}}
    splits = split(rows, best_idx)

    for val, subset in splits.items():
        new_features = feature_names[:best_idx] + feature_names[best_idx+1:]
        reduced_subset = [row[:best_idx] + row[best_idx+1:] for row in subset]
        tree[best_feat][val] = id3(reduced_subset, new_features)

    return tree

In [42]:
# Print the decision tree
def print_tree(tree, indent=""):
    if isinstance(tree, dict):
        attr = next(iter(tree))
        for val, subtree in tree[attr].items():
            print(f"{indent}|-- {attr} = {val}")
            print_tree(subtree, indent + "   ")
    else:
        print(f"{indent}--> {tree}")

In [43]:
# Classify a new sample
def classify(tree, features, sample):
    if isinstance(tree, dict):
        attr = next(iter(tree))
        idx = features.index(attr)
        val = sample[idx]
        subtree = tree[attr].get(val)
        if subtree is None:
            return "Unknown"
        return classify(subtree, features, sample)
    return tree

In [44]:
# Build and print the decision tree
tree = id3(data, features)
print("=== Decision Tree ===")
print_tree(tree)

=== Decision Tree ===
|-- Outlook = Sunny
   |-- Humidity = High
      --> No
   |-- Humidity = Normal
      --> Yes
|-- Outlook = Overcast
   --> Yes
|-- Outlook = Rain
   |-- Wind = Weak
      --> Yes
   |-- Wind = Strong
      --> No


In [45]:
# Classify a new sample
sample = ['Sunny', 'Cool', 'High', 'Strong']
prediction = classify(tree, features, sample)
print("\nNew Sample:", sample)
print("Predicted Class:", prediction)


New Sample: ['Sunny', 'Cool', 'High', 'Strong']
Predicted Class: No
