In [None]:
import math
import pandas as pd

# Calculate entropy of a dataset
def entropy(data):
    total = len(data)
    value_counts = {}
    for value in data:
        if value in value_counts:
            value_counts[value] += 1
        else:
            value_counts[value] = 1
    entropy_value = 0
    for count in value_counts.values():
        prob = count / total
        entropy_value -= prob * math.log2(prob)
    return entropy_value

# Calculate information gain for a feature
def information_gain(data, feature, target):
    total_entropy = entropy(data[target])
    feature_values = data[feature].unique()
    weighted_entropy = 0
    for value in feature_values:
        subset = data[data[feature] == value]
        subset_entropy = entropy(subset[target])
        weighted_entropy += (len(subset) / len(data)) * subset_entropy
    return total_entropy - weighted_entropy

# Select the feature with the highest information gain
def best_feature(data, features, target):
    best_gain = -1
    best_feature = None
    for feature in features:
        gain = information_gain(data, feature, target)
        if gain > best_gain:
            best_gain = gain
            best_feature = feature
    return best_feature

# Create the decision tree recursively
def id3(data, features, target):
    if len(set(data[target])) == 1:
        return data[target].iloc[0]
    if not features:
        return data[target].mode()[0]
    best = best_feature(data, features, target)
    tree = {best: {}}
    for value in data[best].unique():
        subset = data[data[best] == value]
        new_features = [f for f in features if f != best]
        tree[best][value] = id3(subset, new_features, target)
    return tree

# Function to classify a single instance
def classify(tree, instance):
    if isinstance(tree, dict):
        feature = list(tree.keys())[0]
        feature_value = instance[feature]
        return classify(tree[feature][feature_value], instance)
    else:
        return tree

# Example dataset
if __name__ == '__main__':
    data = {
        'Income': ['High', 'High', 'Medium', 'Low', 'Low', 'Low', 'Medium', 'High', 'Low', 'Medium'],
        'CreditScore': ['Good', 'Bad', 'Good', 'Good', 'Bad', 'Good', 'Bad', 'Good', 'Good', 'Bad'],
        'Age': ['Young', 'Young', 'Middle-aged', 'Senior', 'Senior', 'Senior', 'Middle-aged', 'Young', 'Senior', 'Middle-aged'],
        'PreviousLoan': ['No', 'Yes', 'No', 'No', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes'],
        'BuyCar': ['Yes', 'No', 'Yes', 'No', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes']
    }

    df = pd.DataFrame(data)
    target = 'BuyCar'
    features = [f for f in df.columns if f != target]

    tree = id3(df, features, target)
    print("Decision Tree:")
    print(tree)

    new_instance = {'Income': 'Medium', 'CreditScore': 'Good', 'Age': 'Young', 'PreviousLoan': 'No'}
    prediction = classify(tree, new_instance)
    print(f"Prediction for {new_instance}: {prediction}")


Decision Tree:
{'Income': {'High': {'CreditScore': {'Good': 'Yes', 'Bad': 'No'}}, 'Medium': {'CreditScore': {'Good': 'Yes', 'Bad': {'Age': {'Middle-aged': {'PreviousLoan': {'Yes': 'No'}}}}}}, 'Low': {'CreditScore': {'Good': {'Age': {'Senior': {'PreviousLoan': {'No': 'No'}}}}, 'Bad': 'No'}}}}
Prediction for {'Income': 'Medium', 'CreditScore': 'Good', 'Age': 'Young', 'PreviousLoan': 'No'}: Yes
