<a href="https://colab.research.google.com/github/saifulislamsarfaraz/Artificial-Intelligence/blob/main/Decision_Tree.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import math
import pandas as pd

# Function to calculate entropy
def calculate_entropy(data):
    labels = data.iloc[:, -1]  # Get the last column (target labels)
    label_counts = labels.value_counts()  # Count the frequency of each label (e.g., Yes/No)
    total = len(labels)  # Total number of samples
    entropy = -sum((count / total) * math.log2(count / total) for count in label_counts)  # Entropy formula
    return entropy

# Function to calculate information gain
def calculate_information_gain(data, attribute):
    total_entropy = calculate_entropy(data)  # Entropy of the whole dataset
    values = data[attribute].unique()  # Unique values in the attribute (e.g., Sunny, Rain, Overcast for Outlook)
    weighted_entropy = 0

    for value in values:
        subset = data[data[attribute] == value]  # Subset where attribute matches the value
        weighted_entropy += (len(subset) / len(data)) * calculate_entropy(subset)  # Weighted entropy calculation

    return total_entropy - weighted_entropy

# Recursive function to build the decision tree
def build_decision_tree(data, features):
    labels = data.iloc[:, -1]  # Target column (e.g., Play Tennis)

    # Stop conditions
    if len(labels.unique()) == 1:  # If all labels are the same (pure subset)
        return labels.iloc[0]
    if len(features) == 0:  # If no features are left to split
        return labels.mode()[0]  # Return the most common label

    # Select the best feature
    gains = {feature: calculate_information_gain(data, feature) for feature in features}  # Calculate gain for all features
    best_feature = max(gains, key=gains.get)  # Feature with the highest information gain

    # Create the tree node
    tree = {best_feature: {}}  # Start building the tree with the best feature

    # Recursively build subtrees
    for value in data[best_feature].unique():
        subset = data[data[best_feature] == value]  # Subset for each value of the best feature
        if subset.empty:  # Handle missing data
            tree[best_feature][value] = labels.mode()[0]  # Assign most common label
        else:
            tree[best_feature][value] = build_decision_tree(
                subset, [feat for feat in features if feat != best_feature]
            )

    return tree

# Prediction function
def predict(tree, sample):
    if not isinstance(tree, dict):  # If the tree node is a label (leaf)
        return tree

    feature = next(iter(tree))  # Root feature of the current tree
    value = sample.get(feature)  # Get the sample's value for this feature
    subtree = tree[feature].get(value)  # Find the subtree corresponding to this value

    if subtree is None:  # Handle unseen attribute values
        return None

    return predict(subtree, sample)  # Recur on the subtree

# Reduced Dataset
data = pd.DataFrame({
    "Day": ["D1", "D2", "D3", "D4", "D5"],
    "Outlook": ["Sunny", "Sunny", "Overcast", "Rain", "Rain"],
    "Temp": ["Hot", "Hot", "Hot", "Mild", "Cool"],
    "Humidity": ["High", "High", "High", "High", "Normal"],
    "Wind": ["Weak", "Strong", "Weak", "Weak", "Weak"],
    "Play Tennis": ["No", "No", "Yes", "Yes", "Yes"]
})

# Remove the "Day" column as it is not a feature
features = list(data.columns[1:-1])  # Select features excluding "Day" and "Play Tennis"
data = data.drop(columns=["Day"])  # Drop "Day" column as it is irrelevant

# Build tree
decision_tree = build_decision_tree(data, features)
print("Decision Tree:", decision_tree)

# Predict a sample
sample = {"Outlook": "Sunny", "Temp": "Cool", "Humidity": "High", "Wind": "Strong"}  # Sample input
prediction = predict(decision_tree, sample)  # Predict the class label
print("Prediction:", prediction)

Decision Tree: {'Outlook': {'Sunny': 'No', 'Overcast': 'Yes', 'Rain': 'Yes'}}
Prediction: No


In [None]:
data = pd.DataFrame({
    "Day": ["D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "D10", "D11", "D12", "D13", "D14"],
    "Outlook": ["Sunny", "Sunny", "Overcast", "Rain", "Rain", "Rain", "Overcast", "Sunny", "Sunny", "Rain", "Sunny", "Overcast", "Overcast", "Rain"],
    "Temp": ["Hot", "Hot", "Hot", "Mild", "Cool", "Cool", "Cool", "Mild", "Cool", "Mild", "Mild", "Mild", "Hot", "Mild"],
    "Humidity": ["High", "High", "High", "High", "Normal", "Normal", "Normal", "High", "Normal", "Normal", "Normal", "High", "Normal", "High"],
    "Wind": ["Weak", "Strong", "Weak", "Weak", "Weak", "Strong", "Strong", "Weak", "Weak", "Weak", "Strong", "Strong", "Weak", "Strong"],
    "Play Tennis": ["No", "No", "Yes", "Yes", "Yes", "No", "Yes", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "No"]
})