In [1]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
# === 1. Load Dataset ===
df = pd.read_csv('datasetbaru.csv', delimiter=';')


In [3]:
# === 2. Feature Engineering ===
df['AC-CE'] = df['AC'] - df['CE']
df['AE-RO'] = df['AE'] - df['RO']
np.random.seed(42)
df['Noise'] = np.random.normal(0, 1, size=len(df))

In [4]:
# === 3. Fitur & Label ===
X = df[['AC-CE', 'AE-RO', 'Noise']]
y = df['Learning_Style']

In [5]:
# === 4. Grid Search untuk C4.5 ===
param_grid_dt = {
    'criterion': ['entropy'],
    'max_depth': [2, 3],                # Batasi kedalaman
    'min_samples_split': [10, 20],      # Split besar supaya tidak overfit
    'min_samples_leaf': [5, 10]
}
print("🔍 Grid Search: C4.5 (Decision Tree - Entropy)")
gs_dt = GridSearchCV(
    DecisionTreeClassifier(random_state=42),
    param_grid=param_grid_dt,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)
gs_dt.fit(X, y)
print("✅ Best Params C4.5:", gs_dt.best_params_)

🔍 Grid Search: C4.5 (Decision Tree - Entropy)
✅ Best Params C4.5: {'criterion': 'entropy', 'max_depth': 3, 'min_samples_leaf': 5, 'min_samples_split': 10}


In [6]:
# === 5. Evaluasi Model Terbaik dengan Stratified K-Fold ===
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

acc, prec, rec, f1 = [], [], [], []

for train_idx, test_idx in skf.split(X, y):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    gs_dt.best_estimator_.fit(X_train, y_train)
    y_pred = gs_dt.best_estimator_.predict(X_test)

    acc.append(accuracy_score(y_test, y_pred))
    prec.append(precision_score(y_test, y_pred, average='weighted', zero_division=0))
    rec.append(recall_score(y_test, y_pred, average='weighted', zero_division=0))
    f1.append(f1_score(y_test, y_pred, average='weighted', zero_division=0))

print("\n📊 Evaluasi Model: C4.5 Decision Tree")
print(f"Accuracy : {np.mean(acc):.4f}")
print(f"Precision: {np.mean(prec):.4f}")
print(f"Recall   : {np.mean(rec):.4f}")
print(f"F1-Score : {np.mean(f1):.4f}")


📊 Evaluasi Model: C4.5 Decision Tree
Accuracy : 0.8757
Precision: 0.8939
Recall   : 0.8757
F1-Score : 0.8656
