<a href="https://colab.research.google.com/github/mdzikrim/Hands-on_DL/blob/main/Chapter_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from sklearn.datasets import make_moons
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, ShuffleSplit
from sklearn.metrics import accuracy_score
from scipy.stats import mode


In [2]:
# Generate moons dataset
X, y = make_moons(n_samples=10000, noise=0.4, random_state=42)

# Split menjadi training dan test
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

print("Ukuran data train:", X_train.shape)
print("Ukuran data test:", X_test.shape)


Ukuran data train: (7500, 2)
Ukuran data test: (2500, 2)


In [3]:
# Hyperparameter tuning menggunakan GridSearchCV
param_grid = {"max_leaf_nodes": list(range(2, 100))}
grid_search = GridSearchCV(DecisionTreeClassifier(random_state=42), param_grid, cv=3, scoring="accuracy")
grid_search.fit(X_train, y_train)

print("Best max_leaf_nodes:", grid_search.best_params_)


Best max_leaf_nodes: {'max_leaf_nodes': 21}


In [4]:
# Latih model terbaik dan evaluasi
best_tree = grid_search.best_estimator_
y_pred_test = best_tree.predict(X_test)
accuracy = accuracy_score(y_test, y_pred_test)

print("Test set accuracy (best tree):", accuracy)


Test set accuracy (best tree): 0.8648


In [5]:
# Buat 1000 subset random dari training set, masing-masing 100 data
n_trees = 1000
n_instances = 100
splitter = ShuffleSplit(n_splits=n_trees, train_size=n_instances, random_state=42)

mini_sets = []
for train_idx, _ in splitter.split(X_train):
    X_mini = X_train[train_idx]
    y_mini = y_train[train_idx]
    mini_sets.append((X_mini, y_mini))

print("Jumlah subset:", len(mini_sets))


Jumlah subset: 1000


In [6]:
# Latih Decision Tree untuk masing-masing subset
trees = []
for X_mini, y_mini in mini_sets:
    tree = DecisionTreeClassifier(max_leaf_nodes=grid_search.best_params_["max_leaf_nodes"], random_state=42)
    tree.fit(X_mini, y_mini)
    trees.append(tree)

print("1000 pohon selesai dilatih.")


1000 pohon selesai dilatih.


In [7]:
# Voting semua pohon terhadap test set
all_predictions = np.array([tree.predict(X_test) for tree in trees])
majority_votes, _ = mode(all_predictions, axis=0)
y_pred_forest = majority_votes.flatten()

# Akurasi ensemble
ensemble_accuracy = accuracy_score(y_test, y_pred_forest)
print("Ensemble (manual random forest) accuracy:", ensemble_accuracy)


Ensemble (manual random forest) accuracy: 0.8656
