In [1]:
# iris_decision_tree.py
# Requirements: scikit-learn, pandas, numpy
# pip install scikit-learn pandas numpy

import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder

# 1. Load dataset
iris = datasets.load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target)  # numeric labels 0,1,2
target_names = iris.target_names

# 2. (Demonstrate handling missing values) -- artificially show imputer usage
# If there were missing values, we would impute them. For demonstration, let's keep as is.
# Example: imputer = SimpleImputer(strategy='mean'); X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

# 3. Encode labels (already numeric). If labels were strings:
# le = LabelEncoder(); y = le.fit_transform(y_strings)

# 4. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 5. Train Decision Tree
clf = DecisionTreeClassifier(random_state=42, max_depth=4)  # max_depth to avoid overfitting on tiny dataset
clf.fit(X_train, y_train)

# 6. Predict & Evaluate
y_pred = clf.predict(X_test)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='macro')  # macro for multiclass
rec = recall_score(y_test, y_pred, average='macro')

print("Accuracy:", acc)
print("Precision (macro):", prec)
print("Recall (macro):", rec)
print("\nClassification report:\n", classification_report(y_test, y_pred, target_names=target_names))

# 7. Model inspection (text)
r = export_text(clf, feature_names=list(X.columns))
print("\nDecision tree rules:\n", r)


Accuracy: 0.9333333333333333
Precision (macro): 0.9333333333333332
Recall (macro): 0.9333333333333332

Classification report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       0.90      0.90      0.90        10
   virginica       0.90      0.90      0.90        10

    accuracy                           0.93        30
   macro avg       0.93      0.93      0.93        30
weighted avg       0.93      0.93      0.93        30


Decision tree rules:
 |--- petal length (cm) <= 2.45
|   |--- class: 0
|--- petal length (cm) >  2.45
|   |--- petal width (cm) <= 1.65
|   |   |--- petal length (cm) <= 4.95
|   |   |   |--- class: 1
|   |   |--- petal length (cm) >  4.95
|   |   |   |--- sepal length (cm) <= 6.15
|   |   |   |   |--- class: 1
|   |   |   |--- sepal length (cm) >  6.15
|   |   |   |   |--- class: 2
|   |--- petal width (cm) >  1.65
|   |   |--- petal length (cm) <= 4.85
|   |   |   |--- sepal width (cm