In [None]:
# Komórka 1: dane + podział train/test (EX03/EX01)

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Iris: wbudowany dataset (nie pobierasz nic z internetu)
X, y = load_iris(return_X_y=True, as_frame=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X.head(), y.head()


In [None]:
# Komórka 2: Pipeline + preprocessing + trening (EX04 + EX05)

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

# StandardScaler = preprocessing
# SVC(probability=True) -> umożliwia endpoint /predict_proba
model = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", SVC(kernel="linear", probability=True))
])

model.fit(X_train, y_train)

acc = model.score(X_test, y_test)
print("Accuracy:", acc)


In [None]:
# Komórka 3: zapis modelu do model/model.joblib (EX06)

import joblib
from pathlib import Path

out_path = Path("..") / "model" / "model.joblib"
out_path.parent.mkdir(parents=True, exist_ok=True)

joblib.dump(model, out_path)
print("Saved:", out_path.resolve())


In [None]:
# Komórka 4: zapis metadanych do model/metadata.json (bonus na 5.0)

import json
from datetime import datetime, timezone
from sklearn.datasets import load_iris

iris = load_iris(as_frame=True)

meta = {
    "problem": "iris_classification",
    "trained_at_utc": datetime.now(timezone.utc).isoformat(),
    "feature_names": list(iris.feature_names),
    "class_names": list(iris.target_names),
    "n_features": len(iris.feature_names),
}

meta_path = Path("..") / "model" / "metadata.json"
meta_path.write_text(json.dumps(meta, indent=2), encoding="utf-8")
print("Saved:", meta_path.resolve())

meta
