In [23]:
import numpy as np
from decision_tree import ID3 as Tree
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.model_selection import KFold

In [24]:
def accuracy(y_test, y_pred):
    return np.sum(y_test == y_pred) / len(y_test)

In [25]:
def k_fold_cross_validation(X, y, k=5):
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    accuracies = []

    for fold, (train_index, test_index) in enumerate(kf.split(X), 1):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        tree = Tree(max_depth=10, min_samples_split=5)
        tree.fit(X_train, y_train)
        y_pred = tree.predict(X_test)

        acc = accuracy(y_test, y_pred)
        accuracies.append(acc)
        print(f"Fold {fold} Accuracy: {acc:.4f}")

    mean_accuracy = np.mean(accuracies)
    std_accuracy = np.std(accuracies)
    print(f"\nMean Accuracy: {mean_accuracy:.4f}")
    print(f"Standard Deviation: {std_accuracy:.4f}")

    return mean_accuracy, std_accuracy

In [26]:
data = datasets.load_iris()

X, y = data.data, data.target
mean_acc, std_acc = k_fold_cross_validation(X, y, k=10)

Fold 1 Accuracy: 1.0000
Fold 2 Accuracy: 1.0000
Fold 3 Accuracy: 1.0000
Fold 4 Accuracy: 0.9333
Fold 5 Accuracy: 0.9333
Fold 6 Accuracy: 0.8667
Fold 7 Accuracy: 0.8667
Fold 8 Accuracy: 0.9333
Fold 9 Accuracy: 0.8667
Fold 10 Accuracy: 0.9333

Mean Accuracy: 0.9333
Standard Deviation: 0.0516


In [27]:
data = datasets.load_breast_cancer()

X, y = data.data, data.target
mean_acc, std_acc = k_fold_cross_validation(X, y, k=10)

Fold 1 Accuracy: 0.9298
Fold 2 Accuracy: 0.9474
Fold 3 Accuracy: 0.9474
Fold 4 Accuracy: 0.9649
Fold 5 Accuracy: 0.9474
Fold 6 Accuracy: 0.9123
Fold 7 Accuracy: 0.9649
Fold 8 Accuracy: 0.9474
Fold 9 Accuracy: 0.9649
Fold 10 Accuracy: 0.9464

Mean Accuracy: 0.9473
Standard Deviation: 0.0157
