<a href="https://colab.research.google.com/github/student-monika/Marvel_tasks_Level_2/blob/main/Marvel_level_3_Task_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import math
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx

# Function to calculate entropy
def calculate_entropy(data):
    labels = data.iloc[:, -1]
    total_count = len(labels)
    label_counts = labels.value_counts()
    entropy = -sum((count / total_count) * math.log2(count / total_count) for count in label_counts)
    return entropy

# Function to calculate information gain
def calculate_information_gain(data, feature):
    total_entropy = calculate_entropy(data)
    total_count = len(data)
    values = data[feature].unique()
    weighted_entropy = sum(
        (len(subset) / total_count) * calculate_entropy(subset)
        for value in values
        for subset in [data[data[feature] == value]]
    )
    return total_entropy - weighted_entropy

# Recursive ID3 algorithm
def id3(data, features, tree=None):
    # If all instances have the same class, return that class
    if len(data.iloc[:, -1].unique()) == 1:
        return data.iloc[0, -1]

    # If no features are left, return the majority class
    if not features:
        return data.iloc[:, -1].mode()[0]

    # Select the feature with the highest information gain
    gains = {feature: calculate_information_gain(data, feature) for feature in features}
    best_feature = max(gains, key=gains.get)

    # Initialize the tree structure
    tree = {best_feature: {}}

    # Split the dataset on the best feature
    for value in data[best_feature].unique():
        subset = data[data[best_feature] == value]
        subtree = id3(subset.drop(columns=[best_feature]), [f for f in features if f != best_feature])
        tree[best_feature][value] = subtree

    return tree

# Example dataset
data = pd.DataFrame({
    "Outlook": ["Sunny", "Sunny", "Overcast", "Rain", "Rain", "Rain", "Overcast", "Sunny", "Sunny", "Rain", "Sunny", "Overcast", "Overcast", "Rain"],
    "Temperature": ["Hot", "Hot", "Hot", "Mild", "Cool", "Cool", "Cool", "Mild", "Cool", "Mild", "Mild", "Mild", "Hot", "Mild"],
    "Humidity": ["High", "High", "High", "High", "Normal", "Normal", "Normal", "High", "Normal", "Normal", "Normal", "High", "Normal", "High"],
    "Windy": [False, True, False, False, False, True, True, False, False, False, True, True, False, True],
    "Play": ["No", "No", "Yes", "Yes", "Yes", "No", "Yes", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "No"]
})

# Build the decision tree
features = ["Outlook", "Temperature", "Humidity", "Windy"]
decision_tree = id3(data, features)
print("Decision Tree:", decision_tree)


Decision Tree: {'Outlook': {'Sunny': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}, 'Overcast': 'Yes', 'Rain': {'Windy': {False: 'Yes', True: 'No'}}}}
