In [11]:
# --- 1. Imports ---
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [12]:
# --- 2. Load dataset ---
iris = load_iris()
X = iris.data          # features: sepal length/width, petal length/width
y = iris.target        # species

In [13]:
# --- 3. Train-test split ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.2, stratify = y, random_state = 42
)

In [14]:
# --- 4. Create pipeline (scaler + logistic regression) ---
model = Pipeline([
    ("dectree", DecisionTreeClassifier(random_state = 42))
])

In [15]:
# --- 5. Train model ---
model.fit(X_train, y_train)

Pipeline(steps=[('dectree', DecisionTreeClassifier(random_state=42))])

In [16]:
# --- 6. Evaluate ---
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=iris.target_names))

Accuracy: 0.9333333333333333

Classification Report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       0.90      0.90      0.90        10
   virginica       0.90      0.90      0.90        10

    accuracy                           0.93        30
   macro avg       0.93      0.93      0.93        30
weighted avg       0.93      0.93      0.93        30



In [17]:
# --- 7. Save model for inference (useful for Docker deployment) ---
joblib.dump(model, "iris_decision_tree_pipeline.joblib")
print("\nModel saved to iris_decision_tree_pipeline.joblib")


Model saved to iris_decision_tree_pipeline.joblib


In [18]:
# Ploting the decision tree

#from sklearn.tree import plot_tree
#import matplotlib.pyplot as plt
#model = DecisionTreeClassifier(random_state=42)
#model.fit(X_train, y_train)
#plt.figure(figsize=(14, 10))   # adjust size as needed

#plot_tree(
#    model,
#    feature_names=iris.feature_names,   # names for input features
#    class_names=iris.target_names,       # names for output classes
#    filled=True                          # colors the nodes
#)

#plt.show()

In [22]:
# Testing the saved model
#model = joblib.load("iris_decision_tree_pipeline.joblib")
#print(model.predict([[5.1, 3.5, 1.4, 0.2]]))
# expected answer: [0] -> it means "setosa"

[0]
