Pemuatan dan Pembagian Data.


In [1]:
# Dataset Iris dimuat dari URL yang diberikan ke dalam DataFrame Pandas.
# Fitur dan target variabel dipisahkan.
# Dataset dibagi menjadi set pelatihan dan pengujian menggunakan train_test_split.

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
df = pd.read_csv(url, header=None, names=names)

X = df.drop('class', axis=1)
y = df['class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

print("Training:", len(X_train))
print("Testing:", len(X_test))

Training: 105
Testing: 45


Fungsi Gini Impurity dan Entropy

In [2]:
# Dua fungsi, gini_impurity dan entropy, didefinisikan untuk menghitung kriteria pemilihan atribut pada set data

def gini_impurity(y):
    counts = np.unique(y, return_counts=True)[1]
    p = counts / len(y)
    return 1 - np.sum(p**2)

def entropy(y):
    counts = np.unique(y, return_counts=True)
    p = counts[1] / len(y)
    return -np.sum(p * np.log2(p))

Fungsi Pemilihan Atribut dan Konstruksi Decision Tree

In [3]:
# Fungsi choose_attribute memilih atribut terbaik berdasarkan nilai entropy.
# Fungsi-fungsi create_internal_node, create_leaf_node, dan build_decision_tree digunakan untuk membangun pohon keputusan.

def choose_attribute(X, y):
    entropy_values = []
    for i in range(X.shape[1]):
        entropy_values.append(entropy(y[X.iloc[:, i] == 1]) + entropy(y[X.iloc[:, i] == 0]))
    return np.argmax(entropy_values)

def create_internal_node(X, y, attribute):
    return {
        "attribute": attribute,
        "value": X.iloc[:, attribute].mean(),
        "left": None,
        "right": None,
    }

def create_leaf_node(y):
    return {
        "label": y.mode()[0],
    }

def build_decision_tree(X, y):
    if len(np.unique(y)) == 1:
        return create_leaf_node(y)

    if X.shape[0] == 0:
        return create_leaf_node(y)

    attribute = choose_attribute(X, y)

    if attribute is None:
        return create_leaf_node(y)

    node = create_internal_node(X, y, attribute)

    X_left = X[X.iloc[:, attribute] <= node["value"]]
    X_right = X[X.iloc[:, attribute] > node["value"]]
    y_left = y[X.iloc[:, attribute] <= node["value"]]
    y_right = y[X.iloc[:, attribute] > node["value"]]

    if len(X_left) == 0 or len(X_right) == 0:
        return create_leaf_node(y)

    node["left"] = build_decision_tree(X_left, y_left)
    node["right"] = build_decision_tree(X_right, y_right)

    return node




Fungsi Print Decision Tree

In [4]:
# Fungsi print_decision_tree menghasilkan representasi yang mudah dibaca dari pohon keputusan dalam bentuk nested dictionary.

def print_decision_tree(node):
    if "label" in node:
        return {"children": []}
    else:
        return {
            "attribute": node["attribute"],
            "value": node["value"],
            "left": print_decision_tree(node["left"]),
            "right": print_decision_tree(node["right"]),
        }

tree = build_decision_tree(X, y)

formatted_tree = print_decision_tree(tree)
print(formatted_tree)


{'attribute': 0, 'value': 5.843333333333334, 'left': {'attribute': 0, 'value': 5.1987499999999995, 'left': {'attribute': 0, 'value': 4.85609756097561, 'left': {'children': []}, 'right': {'attribute': 0, 'value': 5.0120000000000005, 'left': {'attribute': 0, 'value': 4.9625, 'left': {'children': []}, 'right': {'children': []}}, 'right': {'children': []}}}, 'right': {'attribute': 0, 'value': 5.558974358974359, 'left': {'attribute': 0, 'value': 5.388888888888889, 'left': {'attribute': 0, 'value': 5.220000000000001, 'left': {'children': []}, 'right': {'children': []}}, 'right': {'attribute': 0, 'value': 5.453846153846154, 'left': {'children': []}, 'right': {'children': []}}}, 'right': {'attribute': 0, 'value': 5.704761904761904, 'left': {'attribute': 0, 'value': 5.6571428571428575, 'left': {'children': []}, 'right': {'children': []}}, 'right': {'children': []}}}}, 'right': {'attribute': 0, 'value': 6.579999999999998, 'left': {'attribute': 0, 'value': 6.227500000000001, 'left': {'attribute':

Pengujian Model dan Confusion Matrix

In [5]:
# Model diuji pada subset data uji yang diambil secara acak.
# Hasil prediksi dan nilai sebenarnya dicetak, dan confusion matrix dihitung.

def predict_tree(node, x):
    if "label" in node:
        return node["label"]
    if x[node["attribute"]] <= node["value"]:
        return predict_tree(node["left"], x)
    else:
        return predict_tree(node["right"], x)

X_test = df.sample(frac=0.2, random_state=42)  
y_test = X_test['class']
X_test = X_test.drop('class', axis=1)
predictions = [predict_tree(tree, x) for index, x in X_test.iterrows()]

print("Prediction: ", predictions)
print("Real Values: ", y_test.values)


Prediction:  ['Iris-versicolor', 'Iris-versicolor', 'Iris-virginica', 'Iris-versicolor', 'Iris-virginica', 'Iris-setosa', 'Iris-versicolor', 'Iris-virginica', 'Iris-versicolor', 'Iris-versicolor', 'Iris-virginica', 'Iris-setosa', 'Iris-versicolor', 'Iris-setosa', 'Iris-setosa', 'Iris-virginica', 'Iris-virginica', 'Iris-versicolor', 'Iris-versicolor', 'Iris-virginica', 'Iris-setosa', 'Iris-versicolor', 'Iris-setosa', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica', 'Iris-setosa', 'Iris-setosa']
Real Values:  ['Iris-versicolor' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'
 'Iris-versicolor' 'Iris-setosa' 'Iris-versicolor' 'Iris-virginica'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-setosa'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor'
 'Iris-virginica' 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica'
 'Iris-setosa' 'Iris-virginica' 'Iris-setosa' 'Iris-virginica'
 'Iris-virginica' 'Iris-virginica' 'Iris-virgin

  if x[node["attribute"]] <= node["value"]:


Fungsi Confusion Matrix dan Pengukuran Kinerja

In [6]:
# Fungsi create_confusion_matrix membuat confusion matrix dari hasil prediksi dan nilai sebenarnya.
# Fungsi calculate_metrics menghitung metrik kinerja seperti akurasi, presisi, sensitivitas, dan spesifisitas

def create_confusion_matrix(true_labels, predicted_labels, classes):
    confusion_matrix = np.zeros((len(classes), len(classes)), dtype=int)
    for true, pred in zip(true_labels, predicted_labels):
        true_idx = np.where(classes == true)[0][0]
        pred_idx = np.where(classes == pred)[0][0]
        confusion_matrix[true_idx][pred_idx] += 1
    return confusion_matrix

classes = np.unique(y_test)
confusion_matrix = create_confusion_matrix(y_test.values, predictions, classes)

print("--------Confusion Matrix--------")
print(confusion_matrix)


# akurasi, presisi, sensitivitas, dan spesifitas
def calculate_metrics(confusion_matrix):
    true_positive = confusion_matrix.diagonal()
    false_positive = confusion_matrix.sum(axis=0) - true_positive
    false_negative = confusion_matrix.sum(axis=1) - true_positive
    true_negative = confusion_matrix.sum() - (true_positive + false_positive + false_negative)

    accuracy = (true_positive + true_negative) / confusion_matrix.sum()
    precision = true_positive / (true_positive + false_positive)
    recall = true_positive / (true_positive + false_negative)
    specificity = true_negative / (true_negative + false_positive)

    return accuracy, precision, recall, specificity

accuracy, precision, recall, specificity = calculate_metrics(confusion_matrix)

for i, class_name in enumerate(classes):
    print(f"--------{class_name}--------")
    print(f"Accuracy: {accuracy[i]:.2f}")
    print(f"Precision: {precision[i]:.2f}")
    print(f"Recall (Sensitivity): {recall[i]:.2f}")
    print(f"Specificity: {specificity[i]:.2f}")

# Overall metrics (macro-average)
print("--------Overall Metrics (Macro-average)--------")
print(f"Accuracy: {np.mean(accuracy):.2f}")
print(f"Precision: {np.mean(precision):.2f}")
print(f"Recall (Sensitivity): {np.mean(recall):.2f}")
print(f"Specificity: {np.mean(specificity):.2f}")


--------Confusion Matrix--------
[[ 8  2  0]
 [ 0  7  2]
 [ 0  1 10]]
--------Iris-setosa--------
Accuracy: 0.93
Precision: 1.00
Recall (Sensitivity): 0.80
Specificity: 1.00
--------Iris-versicolor--------
Accuracy: 0.83
Precision: 0.70
Recall (Sensitivity): 0.78
Specificity: 0.86
--------Iris-virginica--------
Accuracy: 0.90
Precision: 0.83
Recall (Sensitivity): 0.91
Specificity: 0.89
--------Overall Metrics (Macro-average)--------
Accuracy: 0.89
Precision: 0.84
Recall (Sensitivity): 0.83
Specificity: 0.92
