In [3]:
# write a program to dem

hello


In [1]:
import math
import pandas as pd
import numpy as np

# Calculate entropy
def entropy(data):
    value_counts = data.value_counts()
    total = len(data)
    entropy_value = 0
    for count in value_counts:
        probability = count / total
        entropy_value -= probability * math.log2(probability)
    return entropy_value

# Calculate Information Gain
def information_gain(data, feature, target):
    total_entropy = entropy(data[target])
    feature_values = data[feature].value_counts()
    
    weighted_entropy = 0
    for value in feature_values.index:
        subset = data[data[feature] == value]
        subset_entropy = entropy(subset[target])
        weighted_entropy += (len(subset) / len(data)) * subset_entropy

    return total_entropy - weighted_entropy

# ID3 Algorithm to Build the Decision Tree
def id3(data, features, target):
    # If all records have the same target value, return that value
    if len(data[target].unique()) == 1:
        return data[target].iloc[0]
    
    # If no features are left, return the most frequent target value
    if len(features) == 0:
        return data[target].mode()[0]
    
    # Calculate information gain for all features
    gains = {feature: information_gain(data, feature, target) for feature in features}
    
    # Select the feature with the highest information gain
    best_feature = max(gains, key=gains.get)
    
    # Create a decision node with the best feature
    tree = {best_feature: {}}
    
    # Recurse on the subsets of data split by the best feature
    for value in data[best_feature].unique():
        subset = data[data[best_feature] == value]
        subtree = id3(subset, [feature for feature in features if feature != best_feature], target)
        tree[best_feature][value] = subtree
    
    return tree

# Classify a new sample using the decision tree
def classify(tree, sample):
    if not isinstance(tree, dict):  # Leaf node (target class)
        return tree
    
    feature = list(tree.keys())[0]
    feature_value = sample[feature]
    
    if feature_value in tree[feature]:
        return classify(tree[feature][feature_value], sample)
    else:
        return None  # Handle case where feature value is not present in the tree

# Example Dataset (using the classic 'PlayTennis' dataset)
data = pd.DataFrame({
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Rainy', 'Sunny', 'Overcast', 'Overcast', 'Rainy'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild', 'Mild', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'High', 'Low', 'Low', 'High', 'Low', 'Low', 'Low', 'Low', 'High', 'High'],
    'Windy': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Weak', 'Strong', 'Weak', 'Weak', 'Strong', 'Weak', 'Strong', 'Weak', 'Strong'],
    'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No']
})

# Features and Target
features = ['Outlook', 'Temperature', 'Humidity', 'Windy']
target = 'PlayTennis'

# Build the Decision Tree
tree = id3(data, features, target)

# Display the Decision Tree
print("Decision Tree:")
print(tree)

# Classify a new sample
new_sample = {'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Windy': 'Weak'}
result = classify(tree, new_sample)

print("\nClassification Result for New Sample:")
print(result)
