In [None]:
import math
import pandas as pd

# Calculate entropy of a dataset
def entropy(data):
    total = len(data)
    # Calculate the frequency of each unique value in the target column
    value_counts = {}
    for value in data:
        if value in value_counts:
            value_counts[value] += 1
        else:
            value_counts[value] = 1

    # Calculate entropy based on the frequencies
    entropy_value = 0
    for count in value_counts.values():
        prob = count / total
        entropy_value -= prob * math.log2(prob)

    return entropy_value

# Calculate information gain for a feature
def information_gain(data, feature, target):
    total_entropy = entropy(data[target])

    # Group data by the feature and calculate the weighted entropy
    feature_values = data[feature].unique()
    weighted_entropy = 0

    for value in feature_values:
        subset = data[data[feature] == value]
        subset_entropy = entropy(subset[target])
        weighted_entropy += (len(subset) / len(data)) * subset_entropy

    # Information Gain = Entropy(Parent) - Weighted Entropy
    return total_entropy - weighted_entropy

# Select the feature with the highest information gain
def best_feature(data, features, target):
    best_gain = -1
    best_feature = None
    for feature in features:
        gain = information_gain(data, feature, target)
        if gain > best_gain:
            best_gain = gain
            best_feature = feature
    return best_feature

# Create the decision tree recursively
def id3(data, features, target):
    # If all data points have the same target, return that target value
    if len(set(data[target])) == 1:
        return data[target].iloc[0]

    # If no features are left to split on, return the most frequent target value
    if not features:
        return data[target].mode()[0]

    # Select the best feature to split on
    best = best_feature(data, features, target)

    # Create a node for the decision tree
    tree = {best: {}}

    # Recursively split the dataset for each value of the best feature
    for value in data[best].unique():
        subset = data[data[best] == value]
        new_features = [f for f in features if f != best]
        tree[best][value] = id3(subset, new_features, target)

    return tree

# Function to classify a single instance based on the decision tree
def classify(tree, instance):
    if isinstance(tree, dict):
        # Get the best feature to split on
        feature = list(tree.keys())[0]
        feature_value = instance[feature]

        # Recursively classify based on the feature value
        return classify(tree[feature][feature_value], instance)
    else:
        # If it's a leaf node, return the classification
        return tree

# Example usage with a dataset
if __name__ == '__main__':
    # Sample dataset
    data = {
        'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Rainy'],
        'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild'],
        'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal'],
        'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Weak', 'Weak'],
        'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes']
    }

    # Convert data to a pandas DataFrame
    df = pd.DataFrame(data)

    # Specify target column and feature columns
    target = 'PlayTennis'
    features = [f for f in df.columns if f != target]

    # Build the decision tree using ID3
    tree = id3(df, features, target)
    print("Decision Tree:")
    print(tree)

    # Classify a new instance (for example, Outlook='Sunny', Temperature='Hot', etc.)
    new_instance = {'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Weak'}
    prediction = classify(tree, new_instance)
    print(f"Prediction for {new_instance}: {prediction}")


Decision Tree:
{'Outlook': {'Sunny': {'Temperature': {'Hot': 'No', 'Mild': 'No', 'Cool': 'Yes'}}, 'Overcast': 'Yes', 'Rainy': {'Wind': {'Weak': 'Yes', 'Strong': 'No'}}}}
Prediction for {'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Weak'}: No
