In [41]:
import mlflow
from mlflow.models import infer_signature

import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [42]:
X,y = datasets.load_iris(return_X_y=True)
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("y_train : ",len(y_train))
print("X_train : ",X_train.shape)
print("y_test : ", len(y_test))
print("X_test : ",X_test.shape)

y_train :  120
X_train :  (120, 4)
y_test :  30
X_test :  (30, 4)


In [43]:
#logistic parameters
# Define the model hyperparameters
params = {
    "solver": "lbfgs",
    "max_iter": 1000,
    "multi_class": "auto",
    "random_state": 8888,
}
# train the model
lr = LogisticRegression(**params)
lr.fit(X_train,y_train)

In [44]:
# Predict on the test set
y_pred = lr.predict(X_test)
# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
print("accuracy : ",accuracy)

accuracy :  1.0


In [45]:
# Set our tracking server uri for logging
mlflow_uri ="http://127.0.0.1:8080"
mlflow.set_experiment(mlflow_uri)

# # Create a new MLflow Experiment
mlflow.set_experiment("v1_getting_start_mlflow")

2024/09/19 22:18:24 INFO mlflow.tracking.fluent: Experiment with name 'http://127.0.0.1:8080' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///Users/tharhtet/Documents/github/ML-in-Prod-batch-1/8_Experiment_Tracking/mlruns/188622278058537407', creation_time=1726750877159, experiment_id='188622278058537407', last_update_time=1726750877159, lifecycle_stage='active', name='v1_getting_start_mlflow', tags={}>

In [46]:
# Start an MLflow run
with mlflow.start_run():
   # Log the hyperparameters
    mlflow.log_params(params)

    # Log the loss metric
    mlflow.log_metric("accuracy", accuracy)

       # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "Basic LR model for iris data")

    # Infer the model signature
    signature = infer_signature(X_train, lr.predict(X_train))

    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="iris_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="v1_getting_start_mlflow",
    )


Registered model 'tracking-quickstart' already exists. Creating a new version of this model...
Created version '3' of model 'tracking-quickstart'.


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

In [47]:
print("Model URI : ",model_info.model_uri)

Model URI :  runs:/c5d0eae23e87442dbc09182020b91c6e/iris_model


### Load and predict 

In [48]:
X,y = datasets.load_iris(return_X_y=True)
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("y_train : ",len(y_train))
print("X_train : ",X_train.shape)
print("y_test : ", len(y_test))
print("X_test : ",X_test.shape)

y_train :  120
X_train :  (120, 4)
y_test :  30
X_test :  (30, 4)


In [49]:

model_uri = "runs:/ce1986c938e44953ab542b0488cb7738/iris_model"

# Load the model back for predictions as a generic Python Function model
loaded_model = mlflow.pyfunc.load_model(model_uri)

predictions = loaded_model.predict(X_test)



In [50]:
import numpy as np
print(np.sum(predictions==y_test)/len(y_test) * 100)

100.0
