In [1]:
import pandas as pd
import numpy as np
from math import log2
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.preprocessing import LabelEncoder

# Dataset Play Golf
data = {
    'Outlook': ['Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Sunny', 'Rainy', 'Overcast', 'Overcast', 'Sunny'],
    'Temp': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'Windy': [False, True, False, False, False, True, True, False, False, False, True, True, False, True],
    'PlayGolf': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No']
}

# Convert to DataFrame
df = pd.DataFrame(data)
print("Dataset:\n", df)

# Hitung Entropy
def entropy(target_col):
    elements, counts = np.unique(target_col, return_counts=True)
    return np.sum([(-counts[i] / np.sum(counts)) * log2(counts[i] / np.sum(counts)) for i in range(len(elements))])

# Hitung Information Gain
def info_gain(data, split_attribute_name, target_name="PlayGolf"):
    total_entropy = entropy(data[target_name])
    vals, counts = np.unique(data[split_attribute_name], return_counts=True)
    weighted_entropy = np.sum([(counts[i] / np.sum(counts)) * 
                               entropy(data.where(data[split_attribute_name] == vals[i]).dropna()[target_name])
                               for i in range(len(vals))])
    return total_entropy - weighted_entropy

# Tampilkan entropy dan information gain
print("\nEntropy (PlayGolf):", round(entropy(df["PlayGolf"]), 4))
for col in df.columns[:-1]:
    print(f"Information Gain ({col}): {round(info_gain(df, col), 4)}")

# Encoding data kategori menjadi numerik
le = LabelEncoder()
df_encoded = df.apply(le.fit_transform)

# Split data features dan target
X = df_encoded.drop(columns=["PlayGolf"])
y = df_encoded["PlayGolf"]

# Training Decision Tree
clf = DecisionTreeClassifier(criterion="entropy")
clf.fit(X, y)

# Output pohon keputusan
tree_rules = export_text(clf, feature_names=list(X.columns))
print("\nModel Decision Tree:\n")
print(tree_rules)


Dataset:
      Outlook  Temp Humidity  Windy PlayGolf
0      Rainy   Hot     High  False       No
1      Rainy   Hot     High   True       No
2   Overcast   Hot     High  False      Yes
3      Sunny  Mild     High  False      Yes
4      Sunny  Cool   Normal  False      Yes
5      Sunny  Cool   Normal   True       No
6   Overcast  Cool   Normal   True      Yes
7      Rainy  Mild     High  False       No
8      Rainy  Cool   Normal  False      Yes
9      Sunny  Mild   Normal  False      Yes
10     Rainy  Mild   Normal   True       No
11  Overcast  Mild     High   True      Yes
12  Overcast   Hot   Normal  False      Yes
13     Sunny  Mild     High   True       No

Entropy (PlayGolf): 0.9852
Information Gain (Outlook): 0.3806
Information Gain (Temp): 0.0391
Information Gain (Humidity): 0.0611
Information Gain (Windy): 0.1281

Model Decision Tree:

|--- Outlook <= 0.50
|   |--- class: 1
|--- Outlook >  0.50
|   |--- Windy <= 0.50
|   |   |--- Outlook <= 1.50
|   |   |   |--- Temp <= 0.50
|