In [None]:
# Cell 1 - imports and load dataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib

iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target, name="species")
df = pd.concat([X, y], axis=1)
df['species_name'] = df['species'].map({i:name for i,name in enumerate(iris.target_names)})
df.head()

In [None]:
# Cell 2 - EDA
print("Shape:", df.shape)
display(df.describe())

# Pairplot (may be slow in some environments; you can comment out if needed)
sns.pairplot(df, hue="species_name", diag_kind="hist")
plt.show()

In [None]:
# Cell 3 - train/test split
X_vals = iris.data
y_vals = iris.target
X_train, X_test, y_train, y_test = train_test_split(X_vals, y_vals, test_size=0.2, random_state=42, stratify=y_vals)
print("Train:", X_train.shape, "Test:", X_test.shape)

In [None]:
# Cell 4 - train & evaluate
models = {
    "Logistic Regression": LogisticRegression(max_iter=200),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "SVM": SVC(probability=False)  # set probability=True if you need predict_proba
}

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results[name] = {"accuracy": acc, "model": model, "y_pred": y_pred}
    print(f"=== {name} ===")
    print("Accuracy:", acc)
    print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred, target_names=iris.target_names))
    print()

In [None]:
# Cell 5 - comparison & save best
# Bar plot of accuracies
names = list(results.keys())
accs = [results[n]["accuracy"] for n in names]
plt.figure(figsize=(6,4))
plt.bar(names, accs)
plt.ylabel("Accuracy")
plt.ylim(0,1)
plt.title("Model comparison")
plt.show()

# pick best
best_name = max(results, key=lambda n: results[n]["accuracy"])
best_model = results[best_name]["model"]
print("Best model:", best_name, "with accuracy", results[best_name]["accuracy"])

# save model
import os
os.makedirs("models", exist_ok=True)
joblib.dump(best_model, "models/iris_model.joblib")
print("Saved best model to models/iris_model.joblib")