In [1]:
import pandas as pd
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import mlflow
from mlflow.models import infer_signature
import warnings 
warnings.filterwarnings('ignore')

# Model Training

In [2]:
# load the dataset
X, y = datasets.load_iris(return_X_y = True)
print(f"shape(X) = {X.shape} || shape(y) = {y.shape}")

# split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2)
print(f"shape(X_train) = {X_train.shape} || shape(y_train) = {y_train.shape}")
print(f"shape(X_test) = {X_test.shape} || shape(y_test) = {y_test.shape}")

# define the model hyperparameters
params = {"penalty": "l2", "solver": "lbfgs", "max_iter": 1000, "multi_class": "auto", "random_state": 8888}

# train the model
lr = LogisticRegression(**params)
lr.fit(X_train, y_train)

# prediction on the test set
y_pred = lr.predict(X_test)
print(f"shape(y_pred) = {y_pred.shape}")

accuracy = accuracy_score(y_test, y_pred)
print(f"{'-'*50}\nAccuracy = {accuracy*100:.2f}%")

shape(X) = (150, 4) || shape(y) = (150,)
shape(X_train) = (120, 4) || shape(y_train) = (120,)
shape(X_test) = (30, 4) || shape(y_test) = (30,)
shape(y_pred) = (30,)
--------------------------------------------------
Accuracy = 100.00%


# MLflow Tracking

- Before setting tracking uri activate the mlflow ui
    - Activate the venv: `conda activate MLOps`
    - command: `mlflow ui`

In [3]:
# set the MLflow tracking uri
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000") # localhost / cloud

# create a new MLflow experiment
mlflow.set_experiment("MLflow Sklearn")

# start an MLflow run
with mlflow.start_run():
    ## log the hyperparameters
    mlflow.log_params(params)

    ## log the accuracy metrics
    mlflow.log_metric("accuracy", accuracy)

    ## set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "A Logistic Regression model for Iris data")

    ## infer the model signature
    signature = infer_signature(X_train, lr.predict(X_train))

    ## log the model
    model_info=mlflow.sklearn.log_model(
        sk_model = lr,
        artifact_path = "iris_model",
        signature = signature,
        input_example = X_train,
        registered_model_name = "tracking-iris-model", # good practice is to manually register the best model after comparing all the versions
    )

2025/03/06 14:41:12 INFO mlflow.tracking.fluent: Experiment with name 'MLflow Sklearn' does not exist. Creating a new experiment.
Successfully registered model 'tracking-iris-model'.
2025/03/06 14:41:19 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-iris-model, version 1


🏃 View run gregarious-snipe-652 at: http://127.0.0.1:5000/#/experiments/510523818335139074/runs/285c91f9ee2a48b1886d043162dc7bc1
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/510523818335139074


Created version '1' of model 'tracking-iris-model'.


# Model Training
Training another model with different hyperparametes

In [4]:
# model hyperparameters
params = {"solver": "newton-cg", "max_iter": 1000, "multi_class": "auto", "random_state": 1000}

# train the model
lr = LogisticRegression(**params)
lr.fit(X_train,y_train)

# make prediction using the model
y_pred = lr.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"{'-'*50}\nAccuracy = {accuracy*100:.2f}%")

--------------------------------------------------
Accuracy = 100.00%


# MLflow Tracking for the new model

In [5]:
# start an MLflow run
with mlflow.start_run():
    ## log the hyperparameters
    mlflow.log_params(params)

    ## log the accuracy metrics
    mlflow.log_metric("accuracy", accuracy)

    ## set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "A Logistic Regression model for Iris data")

    ## infer the model signature
    signature = infer_signature(X_train, lr.predict(X_train))

    ## log the model
    model_info=mlflow.sklearn.log_model(
        sk_model = lr,
        artifact_path = "iris_model",
        signature = signature,
        input_example = X_train,
        registered_model_name = "tracking-iris-model", # good practice is to manually register the best model after comparing all the versions
    )

Registered model 'tracking-iris-model' already exists. Creating a new version of this model...
2025/03/06 14:47:46 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-iris-model, version 2


🏃 View run useful-smelt-5 at: http://127.0.0.1:5000/#/experiments/510523818335139074/runs/3fa36db4abfe4ff9bbb6b2a74b8a46e4
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/510523818335139074


Created version '2' of model 'tracking-iris-model'.


In [6]:
model_info.model_uri

'runs:/3fa36db4abfe4ff9bbb6b2a74b8a46e4/iris_model'

# Inferencing and Validating Model

In [None]:
from mlflow.models import validate_serving_input

model_uri = model_info.model_uri

# The model is logged with an input example. MLflow converts
# it into the serving payload format for the deployed model endpoint,
# and saves it to 'serving_input_payload.json'

serving_payload = """{
  "inputs": [
    [
      5.7,
      3.8,
      1.7,
      0.3
    ],
    [
      4.8,
      3.4,
      1.6,
      0.2
    ],
    [
      5.6,
      2.9,
      3.6,
      1.3
    ]
  ]
}"""

# Validate the serving payload works on the model
validate_serving_input(model_uri, serving_payload)

array([0, 0, 1])

# Load the Model for Prediction as a Generic Python Function

In [11]:
loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)

predictions = loaded_model.predict(X_test)

iris_features_name = datasets.load_iris().feature_names
print(f"iris_features_name: {iris_features_name}")

result = pd.DataFrame(X_test, columns = iris_features_name)
result["actual_class"] = y_test
result["predcited_class"] = predictions

result

iris_features_name: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),actual_class,predcited_class
0,4.8,3.1,1.6,0.2,0,0
1,4.6,3.2,1.4,0.2,0,0
2,6.8,3.2,5.9,2.3,2,2
3,5.7,3.8,1.7,0.3,0,0
4,6.3,2.5,5.0,1.9,2,2
5,6.4,3.1,5.5,1.8,2,2
6,5.8,2.8,5.1,2.4,2,2
7,5.6,2.9,3.6,1.3,1,1
8,5.1,3.5,1.4,0.3,0,0
9,6.3,3.3,6.0,2.5,2,2


# Inference Using a Specific Version of the Model from Model Registry

In [14]:
import mlflow.sklearn

model_name="tracking-iris-model"
model_version="latest"

model_uri = f"models:/{model_name}/{model_version}"

model = mlflow.sklearn.load_model(model_uri)
model

In [18]:
y_pred_new = model.predict(X_test)

pd.DataFrame({
    "actual": y_test,
    "predicted": y_pred_new
}).head(5)

Unnamed: 0,actual,predicted
0,0,0
1,0,0
2,2,2
3,0,0
4,2,2
