<a href="https://colab.research.google.com/github/mlan18/ML-AND-DS-ASSIGNMENT1/blob/main/ID3_Updated.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

# ---- Step 1: Dataset ----
data = {'Age': ['<30', '<30', '31-40', '>40', '>40', '>40', '31-40', '<30', '<30', '>40', '<30', '31-40', '31-40', '>40'],
        'Income': ['High', 'High', 'High', 'Medium', 'Low', 'Low', 'Low', 'Medium', 'Low', 'Medium', 'Medium', 'Medium', 'High', 'Medium'],
        'Student': ['No', 'No', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'No', 'No'],
        'Credit': ['Fair', 'Excellent', 'Fair', 'Fair', 'Fair', 'Excellent', 'Excellent', 'Fair', 'Fair', 'Fair', 'Excellent', 'Excellent', 'Fair', 'Excellent'],
        'Buy': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']}

df = pd.DataFrame(data)

# ---- Step 2: Entropy Function ----
def entropy(target_col):
    elements, counts = np.unique(target_col, return_counts=True)
    entropy_val = -np.sum((counts/len(target_col)) * np.log2(counts/len(target_col)))
    return entropy_val

# ---- Step 3: Information Gain ----
def info_gain(data, split_attribute_name, target_name="Buy"):
    total_entropy = entropy(data[target_name])
    vals, counts = np.unique(data[split_attribute_name], return_counts=True)
    weighted_entropy = np.sum([(counts[i]/np.sum(counts)) * entropy(data.where(data[split_attribute_name] == vals[i]).dropna()[target_name]) for i in range(len(vals))])
    return total_entropy - weighted_entropy

# ---- Step 4: ID3 Tree Builder ----
def ID3(data, original_data, features, target_attribute_name="Buy", parent_node_class=None):
    # If all target values are same -> return that class
    if len(np.unique(data[target_attribute_name])) <= 1:
        return np.unique(data[target_attribute_name])[0]

    # If dataset is empty -> return mode of original dataset
    elif len(data) == 0:
        return np.unique(original_data[target_attribute_name])[np.argmax(np.unique(original_data[target_attribute_name], return_counts=True)[1])]

    # If no features left -> return mode of parent node
    elif len(features) == 0:
        return parent_node_class

    else:
        parent_node_class = np.unique(data[target_attribute_name])[np.argmax(np.unique(data[target_attribute_name], return_counts=True)[1])]

        # Select feature with max information gain
        item_values = [info_gain(data, feature, target_attribute_name) for feature in features]
        best_feature_index = np.argmax(item_values)
        best_feature = features[best_feature_index]

        # Build the tree
        tree = {best_feature: {}}
        features = [i for i in features if i != best_feature]

        for value in np.unique(data[best_feature]):
            sub_data = data.where(data[best_feature] == value).dropna()
            subtree = ID3(sub_data, data, features, target_attribute_name, parent_node_class)
            tree[best_feature][value] = subtree

        return tree

# ---- Step 5: Fit the model ----
features = df.columns[:-1].tolist()  # all except target
tree = ID3(df, df, features)

print("Decision Tree:\n")
print(tree)

# ---- Step 6: Prediction Function ----
def predict(query, tree, default='Yes'):
    for key in list(query.keys()):
        if key in list(tree.keys()):
            try:
                result = tree[key][query[key]]
            except:
                return default

            if isinstance(result, dict):
                return predict(query, result)
            else:
                return result

# ---- Step 7: Make predictions ----
queries = [
    {'Age': '31-40', 'Income': 'Medium', 'Student': 'Yes', 'Credit': 'Fair'},
    {'Age': '>40', 'Income': 'Low', 'Student': 'No', 'Credit': 'Excellent'}
]

for q in queries:
    print(f"Query: {q} => Prediction: {predict(q, tree)}")

Decision Tree:

{'Age': {'31-40': 'Yes', '<30': {'Student': {'No': 'No', 'Yes': 'Yes'}}, '>40': {'Credit': {'Excellent': 'No', 'Fair': 'Yes'}}}}
Query: {'Age': '31-40', 'Income': 'Medium', 'Student': 'Yes', 'Credit': 'Fair'} => Prediction: Yes
Query: {'Age': '>40', 'Income': 'Low', 'Student': 'No', 'Credit': 'Excellent'} => Prediction: No
