In [1]:
import pandas as pd
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import mlflow
from mlflow.models import infer_signature

In [2]:
##set the tracking uri
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")

In [3]:
##load the dataset
X,y=datasets.load_iris(return_X_y=True)
#split ihe datainto tarining and testing
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20)

#Define model hyperparameters
params={"penalty":"l2","solver":"lbfgs","max_iter":1000,"multi_class":"auto","random_state":8888}

#train the model
lr=LogisticRegression(**params)#means ** is key argument
lr.fit(X_train,y_train)



In [4]:
X_test

array([[7.3, 2.9, 6.3, 1.8],
       [6.5, 3.2, 5.1, 2. ],
       [5.9, 3. , 5.1, 1.8],
       [5.6, 3. , 4.1, 1.3],
       [6.1, 2.6, 5.6, 1.4],
       [6.5, 3. , 5.8, 2.2],
       [6.4, 2.9, 4.3, 1.3],
       [4.9, 2.5, 4.5, 1.7],
       [6.6, 3. , 4.4, 1.4],
       [6.2, 2.8, 4.8, 1.8],
       [7.7, 3.8, 6.7, 2.2],
       [7.6, 3. , 6.6, 2.1],
       [5.8, 2.7, 5.1, 1.9],
       [5. , 3.5, 1.3, 0.3],
       [6.8, 2.8, 4.8, 1.4],
       [4.7, 3.2, 1.6, 0.2],
       [5. , 2. , 3.5, 1. ],
       [4.4, 3.2, 1.3, 0.2],
       [6.3, 3.4, 5.6, 2.4],
       [4.6, 3.2, 1.4, 0.2],
       [5.6, 2.5, 3.9, 1.1],
       [6. , 3.4, 4.5, 1.6],
       [5.1, 3.8, 1.9, 0.4],
       [6.5, 3. , 5.2, 2. ],
       [6.3, 3.3, 4.7, 1.6],
       [4.6, 3.4, 1.4, 0.3],
       [4.8, 3. , 1.4, 0.3],
       [5.9, 3. , 4.2, 1.5],
       [6.1, 3. , 4.6, 1.4],
       [5.7, 2.8, 4.5, 1.3]])

In [5]:
#prediction on test data
y_pred=lr.predict(X_test)
y_pred

array([2, 2, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 0, 1, 0, 1, 0, 2, 0, 1, 1,
       0, 2, 1, 0, 0, 1, 1, 1])

In [6]:
accuracy=accuracy_score(y_test,y_pred)
accuracy

0.9666666666666667

In [8]:
##we need to save these model in form of artifacts so that we can keep track of them
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")

##create a new mlflow experiment
mlflow.set_experiment("Mlflow quickstart")

##start an mlflow run
with mlflow.start_run():
    ## log hyperparameters
    mlflow.log_params(params)

    ##log the accuracy matrix
    mlflow.log_metric("accuracy",accuracy)

    #set a tag that we can use to remind oourselves what this run was for
    mlflow.set_tag("training info","Basic Lr model for iris data")

    ##infer the model signature: schema of ip and op is fixed over here
    signature=infer_signature(X_train,lr.predict(X_train))

    ##log the model
    model_info=mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="iris_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="tracking-quickstart",
      )

2025/04/02 18:20:37 INFO mlflow.tracking.fluent: Experiment with name 'Mlflow quickstart' does not exist. Creating a new experiment.
Successfully registered model 'tracking-quickstart'.
2025/04/02 18:20:49 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-quickstart, version 1
Created version '1' of model 'tracking-quickstart'.


🏃 View run puzzled-panda-817 at: http://127.0.0.1:5000/#/experiments/1/runs/7f7d6bee2e094b06bf360f99932ef7bc
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1


In [9]:
#Define model hyperparameters
params={"solver":"lbfgs","max_iter":1000,"multi_class":"auto","random_state":1088}

#train the model
lr=LogisticRegression(**params)#means ** is key argument
lr.fit(X_train,y_train)



In [10]:
#prediction on test data
y_pred=lr.predict(X_test)
y_pred

array([2, 2, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 0, 1, 0, 1, 0, 2, 0, 1, 1,
       0, 2, 1, 0, 0, 1, 1, 1])

In [11]:
accuracy=accuracy_score(y_test,y_pred)
accuracy

0.9666666666666667

In [12]:
##we need to save these model in form of artifacts so that we can keep track of them
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")

##create a new mlflow experiment
mlflow.set_experiment("Mlflow quickstart")

##start an mlflow run
with mlflow.start_run():
    ## log hyperparameters
    mlflow.log_params(params)

    ##log the accuracy matrix
    mlflow.log_metric("accuracy",accuracy)

    #set a tag that we can use to remind oourselves what this run was for
    mlflow.set_tag("training info","Basic Lr model for iris data")

    ##infer the model signature: schema of ip and op is fixed over here
    signature=infer_signature(X_train,lr.predict(X_train))

    ##log the model
    model_info=mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="iris_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="tracking-quickstart",
      )

Registered model 'tracking-quickstart' already exists. Creating a new version of this model...
2025/04/02 18:21:08 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-quickstart, version 2


🏃 View run thundering-dog-711 at: http://127.0.0.1:5000/#/experiments/1/runs/33e6d609055541d98285d08fbc1d16eb
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1


Created version '2' of model 'tracking-quickstart'.


In [13]:
model_info.model_uri

'runs:/33e6d609055541d98285d08fbc1d16eb/iris_model'

In [14]:
##Inferencing and validating my model


In [15]:
###load the model back for prediction as a generic python unction model


In [16]:

# Load the trained model
loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)

# Make predictions
predictions = loaded_model.predict(X_test)

# Load feature names
iris_feature_names = datasets.load_iris().feature_names

# Ensure X_test is in the correct format
result = pd.DataFrame(X_test, columns=iris_feature_names)

# Add actual and predicted class columns
result["actual_class"] = y_test
result["predicted_class"] = predictions

# Display the first few rows
print(result)

    sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                 7.3               2.9                6.3               1.8   
1                 6.5               3.2                5.1               2.0   
2                 5.9               3.0                5.1               1.8   
3                 5.6               3.0                4.1               1.3   
4                 6.1               2.6                5.6               1.4   
5                 6.5               3.0                5.8               2.2   
6                 6.4               2.9                4.3               1.3   
7                 4.9               2.5                4.5               1.7   
8                 6.6               3.0                4.4               1.4   
9                 6.2               2.8                4.8               1.8   
10                7.7               3.8                6.7               2.2   
11                7.6               3.0 

In [17]:
result[:5]

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),actual_class,predicted_class
0,7.3,2.9,6.3,1.8,2,2
1,6.5,3.2,5.1,2.0,2,2
2,5.9,3.0,5.1,1.8,2,2
3,5.6,3.0,4.1,1.3,1,1
4,6.1,2.6,5.6,1.4,2,2


In [18]:
###Model registry:
###

In [19]:
##log the model
model_information=mlflow.sklearn.log_model(
    sk_model=lr,
    artifact_path="iris_model",
    signature=signature,
    input_example=X_train,
    registered_model_name="tracking-quickstart"
)

Registered model 'tracking-quickstart' already exists. Creating a new version of this model...
2025/04/02 18:21:18 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-quickstart, version 3
Created version '3' of model 'tracking-quickstart'.


In [20]:
##create a new MLFLOW experiment
mlflow.set_experiment("MLFLOW Quickstart")

if mlflow.active_run():
    mlflow.end_run()

## start an mlflow run
with mlflow.start_run():
    ##log the hyperparamters
    mlflow.log_params(params)

    ##log the accuracy metrucs
    mlflow.log_metric("accuracy",1.0)

    ##set a tag we can use to remind ourseleves what this run was for
    mlflow.set_tag("Training Info2","Basic LR model for iris data")

    ##Infer the model signature
    signature=infer_signature(X_train,lr.predict(X_train))

    ##log the model
        ##log the model
    model_information=mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="iris_model",
        signature=signature,
        input_example=X_train,
    )

2025/04/02 18:21:18 INFO mlflow.tracking.fluent: Experiment with name 'MLFLOW Quickstart' does not exist. Creating a new experiment.


🏃 View run polite-conch-651 at: http://127.0.0.1:5000/#/experiments/1/runs/2a3b319e0d534f45b68066ac5f95f1c2
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1
🏃 View run intrigued-sloth-517 at: http://127.0.0.1:5000/#/experiments/2/runs/7d9868d55b2f4418abc2ec88f7a63a19
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/2


In [21]:
##inferencinf from model registry
import mlflow.sklearn
model_name="tracking-quickstart"
model_version="latest"

model_uri=f"models:/{model_name}/{model_version}"

model=mlflow.sklearn.load_model(model_uri)

In [22]:
model_uri

'models:/tracking-quickstart/latest'

In [25]:
y_pred_new=model.predict(X_test)
print(y_pred_new)
print(y_test)

[2 2 2 1 2 2 1 1 1 2 2 2 2 0 1 0 1 0 2 0 1 1 0 2 1 0 0 1 1 1]
[2 2 2 1 2 2 1 2 1 2 2 2 2 0 1 0 1 0 2 0 1 1 0 2 1 0 0 1 1 1]
