In [1]:
import warnings
warnings.filterwarnings("ignore", category = DeprecationWarning)

import mlflow
from mlflow.models import infer_signature

import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [2]:

mlflow.set_tracking_uri(uri = "http://127.0.0.1:8080")

Load the Iris dataset.

In [3]:
X, y = datasets.load_iris(return_X_y = True)

#Split the Datatests into training and test sets.

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size = 0.2,
    random_state = 42
)

Define modle parameters.

In [4]:
params = {
    "solver": "lbfgs",
    "max_iter" : 1000,
    "multi_class": "auto",
    "random_state": 4040
}

Train the model

In [5]:
model = LogisticRegression(**params)
model.fit(X_train, y_train)

Predict on the test set

In [6]:
y_pred = model.predict(X_test)

Calculate metrics

In [7]:
accuracy = accuracy_score(y_true = y_test, y_pred = y_pred)

In [8]:
precision = precision_score(y_true = y_test, y_pred = y_pred, average = "weighted")

recall = recall_score(y_true = y_test, y_pred = y_pred, average = "weighted")

f1 = f1_score(y_test, y_pred, average = "weighted")

Log the model and its metadata to MLglow.

* Initiate an MLflow **run** context to start a new run that will we log the model and metadata to.
* **Log** model **parameters** and performance **metrics**
* **Tag** the run for easy retrieval.
* **Register** the model in the MLflow Model Registry while **logging** (saving) the model.

In [10]:
# Create an MLflow Experiment
mlflow.set_experiment("MLFlow Mini")

# Start an MLflow run 
with mlflow.start_run(log_system_metrics = True):
    
    # Log hyperparameters
    mlflow.log_params(params = params)
    
    # Log the loss metric
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("F1_score", f1)
    
    # Set a tag that we can be used to remind ourselves with what this run was for.
    mlflow.set_tag("Training Info", "Logistic-Regression for Iris Dataset")
    
    # Infer the model signature
    signature = infer_signature(X_train, model.predict(X_train))
    
    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model = model,
        artifact_path = "iris_model",
        signature = signature,
        input_example = X_train,
        registered_model_name = "tracking mlflow-mini"
    )

2024/09/05 09:25:22 INFO mlflow.tracking.fluent: Experiment with name 'MLFlow Mini' does not exist. Creating a new experiment.
2024/09/05 09:25:22 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
Successfully registered model 'tracking mlflow-mini'.
2024/09/05 09:25:30 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking mlflow-mini, version 1
Created version '1' of model 'tracking mlflow-mini'.
2024/09/05 09:25:30 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2024/09/05 09:25:30 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


Load the model as a Python Function **(pyfunc)** & use it for inference

After logging the model, we can perform inference by:

- **Loading** the model using the MLflow's pyfunc flavor.
- Running **Predict** on new data using the loaded model.

In [None]:
# Load the model back for prediction as a generic Python Function model.
loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)

predictions = loaded_model.predict(X_test)

iris_feature_names = datasets.load_iris().feature_names

result = pd.DataFrame(X_test, columns = iris_feature_names)
result["actual_class"] = y_test
result["predicted_class"] = predictions

result[: 10]