In [None]:
# Cell 1: Imports & Load
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from pathlib import Path

from src.data_utils import load_iris_csv, standardize_iris_columns, IRIS_FEATURES, TARGET_COL
from src.models import get_model
from src.features import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, ConfusionMatrixDisplay

# Load CSV
X, y, df = load_iris_csv(Path("data") / "Iris.csv")
df.head()

In [None]:
# Cell 2: Basic info and class balance
display(df.info())
display(df.describe(numerical_only=True))

sns.countplot(x=TARGET_COL, data=df)
plt.title("Class Balance")
plt.show()

In [None]:
# Cell 3: Pairplot (EDA)
sns.pairplot(df, vars=IRIS_FEATURES, hue=TARGET_COL, corner=True, diag_kind="hist")
plt.suptitle("Pairwise Feature Distributions", y=1.02)
plt.show()

In [None]:
# Cell 4: Baseline model (SVM + scaling)
from sklearn.model_selection import StratifiedKFold, cross_val_score

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

model = get_model("svm", random_state=42)
pipe = make_pipeline(model, scale=True)

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(pipe, X_train, y_train, cv=cv, scoring="accuracy")
print(f"CV Accuracy: {cv_scores.mean():.3f} ± {cv_scores.std():.3f}")

pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)

print(classification_report(y_test, y_pred))

ConfusionMatrixDisplay.from_predictions(y_test, y_pred, cmap="Blues")
plt.title("Confusion Matrix — SVM Baseline")
plt.tight_layout()
plt.show()