In [1]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import mlflow
from mlflow.models import infer_signature

In [2]:
DATASET_URI = "data.csv"
data = pd.read_csv(DATASET_URI)
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [3]:
train, test = train_test_split(data, test_size = 0.4, stratify = data['species'], random_state = 42)
X_train = train[['sepal_length','sepal_width','petal_length','petal_width']]
y_train = train.species
X_test = test[['sepal_length','sepal_width','petal_length','petal_width']]
y_test = test.species

In [6]:
MLFLOW_TRACKING_URI = "http://127.0.0.1:8100"
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [7]:
MLFLOW_EXPERIMENT_NAME = "Iris Species Classification"
mlflow.set_experiment(MLFLOW_EXPERIMENT_NAME)

2025/10/31 07:31:20 INFO mlflow.tracking.fluent: Experiment with name 'Iris Species Classification' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/122906547719919630', creation_time=1761895880710, experiment_id='122906547719919630', last_update_time=1761895880710, lifecycle_stage='active', name='Iris Species Classification', tags={}>

In [8]:
def execute_training_pipeline(hyperparams, X_train, y_train, X_test, y_test):
    """
    Execute the training pipeline: train and evaluate the model. Log parameters, metrics, and model to MLflow.
    Args:
        hyperparams (dict): Hyperparameters for the DecisionTreeClassifier.
        X_train (pd.DataFrame): Training features.
        y_train (pd.Series): Training labels.
        X_test (pd.DataFrame): Testing features.
        y_test (pd.Series): Testing labels.
    Returns:
        None
    """

    # Train model
    model = DecisionTreeClassifier(**hyperparams)
    model.fit(X_train, y_train)

    # Evaluate model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')

    metrics_dict = {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1
    }

    # Log parameters, metrics, and model to MLflow
    with mlflow.start_run():
        mlflow.log_params(hyperparams)
        mlflow.log_metrics(metrics_dict)
        mlflow.sklearn.log_model(
            sk_model=model,
            name="decision_tree_model",
            signature=infer_signature(X_train, model.predict(X_train)),
            registered_model_name="IrisDecisionTreeModel",
        )
    
    print("Training pipeline executed and logged to MLflow.")


In [9]:
hyperparams_v1 = {
    "criterion": "gini",
    "max_depth": 3,
    "min_samples_split": 3,
    "min_samples_leaf": 1
}
execute_training_pipeline(hyperparams_v1, X_train, y_train, X_test, y_test)

Successfully registered model 'IrisDecisionTreeModel'.
2025/10/31 07:33:35 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: IrisDecisionTreeModel, version 1


üèÉ View run carefree-goose-912 at: http://127.0.0.1:8100/#/experiments/122906547719919630/runs/d2e90eccede44ae8b10a5eccbdd9fa7d
üß™ View experiment at: http://127.0.0.1:8100/#/experiments/122906547719919630
Training pipeline executed and logged to MLflow.


Created version '1' of model 'IrisDecisionTreeModel'.


In [10]:
hyperparams_v2 = {
    "criterion": "gini",
    "max_depth": 5,
    "min_samples_split": 3,
    "min_samples_leaf": 1
}
execute_training_pipeline(hyperparams_v2, X_train, y_train, X_test, y_test)

Registered model 'IrisDecisionTreeModel' already exists. Creating a new version of this model...
2025/10/31 07:34:51 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: IrisDecisionTreeModel, version 2


üèÉ View run adorable-bee-424 at: http://127.0.0.1:8100/#/experiments/122906547719919630/runs/142ae077ec5d418ca6e205cfc541a6a7
üß™ View experiment at: http://127.0.0.1:8100/#/experiments/122906547719919630
Training pipeline executed and logged to MLflow.


Created version '2' of model 'IrisDecisionTreeModel'.


In [11]:
hyperparams_v3 = {
    "criterion": "gini",
    "max_depth": 8,
    "min_samples_split": 3,
    "min_samples_leaf": 1
}
execute_training_pipeline(hyperparams_v3, X_train, y_train, X_test, y_test)

Registered model 'IrisDecisionTreeModel' already exists. Creating a new version of this model...
2025/10/31 07:34:54 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: IrisDecisionTreeModel, version 3


üèÉ View run upset-wren-845 at: http://127.0.0.1:8100/#/experiments/122906547719919630/runs/b2e58482d290488c965f808aa9b56405
üß™ View experiment at: http://127.0.0.1:8100/#/experiments/122906547719919630
Training pipeline executed and logged to MLflow.


Created version '3' of model 'IrisDecisionTreeModel'.
