In [11]:
import pandas as pd
import math

# Step 1: Load the dataset (simpledata.csv)
# Assuming the CSV file 'simpledata.csv' is already saved in the current directory
# You can change the file path as needed.
df = pd.read_csv('/simpledata (1).csv')

# Step 2: Function to calculate entropy
def entropy(data):
    class_counts = data['PlayTennis'].value_counts()
    probabilities = class_counts / len(data)
    return -sum(probabilities * probabilities.apply(lambda x: math.log2(x) if x > 0 else 0))

# Step 3: Function to calculate information gain
def information_gain(data, attribute):
    total_entropy = entropy(data)
    values = data[attribute].value_counts()

    weighted_entropy = 0
    for value, count in values.items():
        subset = data[data[attribute] == value]
        weighted_entropy += (count / len(data)) * entropy(subset)

    return total_entropy - weighted_entropy

# Step 4: Function to select the best attribute based on information gain
def best_split(data):
    attributes = data.columns[:-1]  # Excluding the target column 'PlayTennis'
    gains = {attribute: information_gain(data, attribute) for attribute in attributes}
    best_attribute = max(gains, key=gains.get)
    return best_attribute

# Step 5: Function to build the decision tree recursively
def build_tree(data):
    # If the dataset is pure (only one class), return the class
    if len(data['PlayTennis'].unique()) == 1:
        return data['PlayTennis'].iloc[0]

    # If no attributes are left to split, return the majority class
    if len(data.columns) == 1:  # No attributes left to split
        return data['PlayTennis'].mode()[0]

    # Otherwise, choose the best attribute to split on
    best_attribute = best_split(data)
    tree = {best_attribute: {}}

    for value in data[best_attribute].unique():
        subset = data[data[best_attribute] == value]
        tree[best_attribute][value] = build_tree(subset.drop(columns=[best_attribute]))

    return tree

# Step 6: Function to classify a new sample using the decision tree
def classify(tree, sample):
    if isinstance(tree, dict):
        attribute = list(tree.keys())[0]
        value = sample[attribute]
        return classify(tree[attribute].get(value), sample)
    else:
        return tree

# Step 7: Build the decision tree using the dataset
tree = build_tree(df)

# Step 8: Print the decision tree
print("Decision Tree:")
print(tree)

# Step 9: Classify a new sample
new_sample = {'Outlook': 'Sunny', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Wind': 'Weak'}
classification = classify(tree, new_sample)
print(f"\nClassified as: {classification}")

Decision Tree:
{'Outlook': {'Sunny': 'No', 'Overcast': 'Yes', 'Rainy': {'Temperature': {'Mild': 'Yes', 'Cool': 'No'}}}}

Classified as: No
