#MLFlow Quickstart Demo

In [1]:
# install the latest release candidate
!pip install --pre mlflow

Collecting importlib-metadata!=4.7.0,<9,>=3.7.0 (from mlflow-skinny==2.17.0->mlflow)
  Obtaining dependency information for importlib-metadata!=4.7.0,<9,>=3.7.0 from https://files.pythonhosted.org/packages/c0/14/362d31bf1076b21e1bcdcb0dc61944822ff263937b804a79231df2774d28/importlib_metadata-8.4.0-py3-none-any.whl.metadata
  Using cached importlib_metadata-8.4.0-py3-none-any.whl.metadata (4.7 kB)
Using cached importlib_metadata-8.4.0-py3-none-any.whl (26 kB)
Installing collected packages: importlib-metadata
  Attempting uninstall: importlib-metadata
    Found existing installation: importlib_metadata 8.5.0
    Uninstalling importlib_metadata-8.5.0:
      Successfully uninstalled importlib_metadata-8.5.0
Successfully installed importlib-metadata-8.4.0


In [2]:
# Run MlFlow server on external terminal on machine
# !mlflow server --host 127.0.0.1 --port 8080

[2024-10-24 10:26:33 +0000] [2972182] [INFO] Starting gunicorn 23.0.0
[2024-10-24 10:26:33 +0000] [2972182] [INFO] Listening at: http://127.0.0.1:8080 (2972182)
[2024-10-24 10:26:33 +0000] [2972182] [INFO] Using worker: sync
[2024-10-24 10:26:33 +0000] [2972183] [INFO] Booting worker with pid: 2972183
[2024-10-24 10:26:33 +0000] [2972184] [INFO] Booting worker with pid: 2972184
[2024-10-24 10:26:33 +0000] [2972185] [INFO] Booting worker with pid: 2972185
[2024-10-24 10:26:33 +0000] [2972186] [INFO] Booting worker with pid: 2972186
^C
[2024-10-24 10:26:54 +0000] [2972182] [INFO] Handling signal: int
[2024-10-24 10:26:54 +0000] [2972184] [INFO] Worker exiting (pid: 2972184)
[2024-10-24 10:26:54 +0000] [2972183] [INFO] Worker exiting (pid: 2972183)
[2024-10-24 10:26:54 +0000] [2972185] [INFO] Worker exiting (pid: 2972185)
[2024-10-24 10:26:54 +0000] [2972186] [INFO] Worker exiting (pid: 2972186)


###Set the Tracking Server URI

In [3]:
import mlflow

mlflow.set_tracking_uri(uri="http://localhost:8080")


###Train a model and prepare metadata for logging

In [4]:
import mlflow
from mlflow.models import infer_signature

import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# Load the Iris dataset
X, y = datasets.load_iris(return_X_y=True)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Define the model hyperparameters
params = {
    "solver": "lbfgs",
    "max_iter": 1000,
    "multi_class": "auto",
    "random_state": 8888,
}

# Train the model
lr = LogisticRegression(**params)
lr.fit(X_train, y_train)

# Predict on the test set
y_pred = lr.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)




- Initiate an MLflow run context to start a new run that we will log the model and metadata to.
- Log model parameters and performance metrics.
- Tag the run for easy retrieval.
- Register the model in the MLflow Model Registry while logging (saving) the model.

In [5]:
# Set our tracking server uri for logging
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

# Create a new MLflow Experiment
mlflow.set_experiment("MLflow Quickstart")

# Start an MLflow run
with mlflow.start_run():
    # Log the hyperparameters
    mlflow.log_params(params)

    # Log the loss metric
    mlflow.log_metric("accuracy", accuracy)

    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "Basic LR model for iris data")

    # Infer the model signature
    signature = infer_signature(X_train, lr.predict(X_train))

    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="iris_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="tracking-quickstart",
    )


2024/10/24 10:28:00 INFO mlflow.tracking.fluent: Experiment with name 'MLflow Quickstart' does not exist. Creating a new experiment.
Successfully registered model 'tracking-quickstart'.
2024/10/24 10:28:02 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-quickstart, version 1
Created version '1' of model 'tracking-quickstart'.
  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 998.00it/s]  
2024/10/24 10:28:02 INFO mlflow.tracking._tracking_service.client: 🏃 View run nimble-snake-785 at: http://127.0.0.1:8080/#/experiments/540310481050719949/runs/b6b883b337cf4d91a8ca5515e81870a0.
2024/10/24 10:28:02 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/540310481050719949.


###Load the model as a Python Function (pyfunc) and use it for inference

In [6]:
# Load the model back for predictions as a generic Python Function model
loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)

predictions = loaded_model.predict(X_test)

iris_feature_names = datasets.load_iris().feature_names

result = pd.DataFrame(X_test, columns=iris_feature_names)
result["actual_class"] = y_test
result["predicted_class"] = predictions

result[:4]


Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 237.62it/s]  


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),actual_class,predicted_class
0,6.1,2.8,4.7,1.2,1,1
1,5.7,3.8,1.7,0.3,0,0
2,7.7,2.6,6.9,2.3,2,2
3,6.0,2.9,4.5,1.5,1,1


###View the Run in the MLflow UI
http://localhost:8080