 Prepare the Iris Dataset

In [3]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import mlflow
import mlflow.sklearn

# Load Iris dataset
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = iris.target

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Train and save Models with MLflow

In [5]:
# Enable MLflow autologging
mlflow.sklearn.autolog()

mlflow.set_tracking_uri('http://localhost:5000') 
with mlflow.start_run():
    # Train model
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Predict and evaluate
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy}")

    # Log accuracy as a metric
    mlflow.log_metric("accuracy", accuracy)

    # Check registered model or use a threshold
    best_accuracy = 0.9  # Replace with actual logic if needed

    if accuracy > best_accuracy:
        # Save model
        mlflow.sklearn.log_model(model, "model", registered_model_name="IrisClassifier")

        print("New best model registered!")
    else:
        print("Current model did not outperform the best model.")

Accuracy: 1.0


Successfully registered model 'IrisClassifier'.
2024/12/01 16:52:14 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: IrisClassifier, version 1


New best model registered!
🏃 View run nervous-tern-180 at: http://localhost:5000/#/experiments/0/runs/453a6cdb21fa4c50b8386be0676422f4
🧪 View experiment at: http://localhost:5000/#/experiments/0


Created version '1' of model 'IrisClassifier'.
