<a href="https://colab.research.google.com/github/samuelhtampubolon/SDPM2025/blob/main/Klasifikasi_Iris_dengan_Pipeline_dan_Cross_Validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Tujuan: klasifikasi spesies iris (supervised, multi-class)
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load data (built-in)
X, y = load_iris(return_X_y=True)

In [2]:
#  Split data ter-stratify
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, stratify=y, random_state=42
)

In [3]:
#  Pipeline: scaling + logistic regression
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", LogisticRegression(max_iter=1000))
])

In [4]:
#  Cross-validation (F1-macro) di train set
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(pipe, X_train, y_train, cv=cv, scoring="f1_macro")

In [5]:
#  Fit & evaluasi di test set
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)

print(f"CV F1-macro: {cv_scores.mean():.3f} ± {cv_scores.std():.3f}")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred):.3f}")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred, digits=3))

CV F1-macro: 0.964 ± 0.053
Test Accuracy: 0.921
Confusion Matrix:
 [[12  0  0]
 [ 0 12  1]
 [ 0  2 11]]
              precision    recall  f1-score   support

           0      1.000     1.000     1.000        12
           1      0.857     0.923     0.889        13
           2      0.917     0.846     0.880        13

    accuracy                          0.921        38
   macro avg      0.925     0.923     0.923        38
weighted avg      0.923     0.921     0.921        38

