In [32]:
import pandas as pd
import numpy as np
import joblib
import os

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [33]:
# === 1. Load Dataset ===
df = pd.read_csv('datasetbaru.csv', delimiter=';')

In [34]:
# === 2. Feature Engineering ===
df['AC-CE'] = df['AC'] - df['CE']
df['AE-RO'] = df['AE'] - df['RO']
np.random.seed(42)
df['Noise'] = np.random.normal(0, 1, size=len(df))

X = df[['AC-CE', 'AE-RO', 'Noise']]
y = df['Learning_Style']

In [35]:
# === 3. Grid Search: Random Forest ===
param_grid_rf = {
    'n_estimators': [50, 100],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5]
}

print("🔍 Grid Search: Random Forest")
gs_rf = GridSearchCV(RandomForestClassifier(random_state=42), param_grid_rf, cv=5, scoring='accuracy', n_jobs=-1)
gs_rf.fit(X, y)
print("✅ Best Params RF:", gs_rf.best_params_)

🔍 Grid Search: Random Forest
✅ Best Params RF: {'max_depth': 5, 'min_samples_split': 5, 'n_estimators': 50}


In [36]:
# === 4. Grid Search: Gradient Boosting ===
param_grid_gb = {
    'n_estimators': [50, 100],
    'learning_rate': [0.05, 0.1],
    'max_depth': [3, 5]
}

print("\n🔍 Grid Search: Gradient Boosting")
gs_gb = GridSearchCV(GradientBoostingClassifier(random_state=42), param_grid_gb, cv=5, scoring='accuracy', n_jobs=-1)
gs_gb.fit(X, y)
print("✅ Best Params GB:", gs_gb.best_params_)


🔍 Grid Search: Gradient Boosting
✅ Best Params GB: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 50}


In [37]:
# === 5. Evaluasi Model ===
models = {
    "Random Forest": gs_rf.best_estimator_,
    "Gradient Boosting": gs_gb.best_estimator_
}

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for name, model in models.items():
    acc, prec, rec, f1 = [], [], [], []

    for train_idx, test_idx in skf.split(X, y):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        acc.append(accuracy_score(y_test, y_pred))
        prec.append(precision_score(y_test, y_pred, average='weighted', zero_division=0))
        rec.append(recall_score(y_test, y_pred, average='weighted', zero_division=0))
        f1.append(f1_score(y_test, y_pred, average='weighted', zero_division=0))

    print(f"\n📊 Evaluasi Model: {name}")
    print(f"Accuracy : {np.mean(acc):.4f}")
    print(f"Precision: {np.mean(prec):.4f}")
    print(f"Recall   : {np.mean(rec):.4f}")
    print(f"F1-Score : {np.mean(f1):.4f}")


📊 Evaluasi Model: Random Forest
Accuracy : 0.9475
Precision: 0.9527
Recall   : 0.9475
F1-Score : 0.9461

📊 Evaluasi Model: Gradient Boosting
Accuracy : 0.9477
Precision: 0.9580
Recall   : 0.9477
F1-Score : 0.9471


In [38]:
# === 6. Simpan Model Random Forest ke .pkl ===
output_dir = 'models'
os.makedirs(output_dir, exist_ok=True)

joblib.dump(gs_rf.best_estimator_, f'{output_dir}/model_rf.pkl')
print(f"\n💾 Model Random Forest berhasil disimpan di {output_dir}/model_rf.pkl")


💾 Model Random Forest berhasil disimpan di models/model_rf.pkl
