In [1]:
import pandas as pd
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import mlflow
from mlflow.models import infer_signature

In [2]:
# Set the tracking uri

mlflow.set_tracking_uri(uri="Enter your uri")

In [3]:
# Load the dataset

X,y = datasets.load_iris(return_X_y=True)
X

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [4]:
# Split the data into training and test sets

X_train,X_test,y_train,y_test =train_test_split(X,y,test_size=0.20)  # Here X,y are independent and dependent feature


In [5]:
# Define model hyperparameters
params = {"penalty":"l2",
          "solver":"lbfgs", 
          "max_iter": 1000, 
          "multi_class": "auto", 
          "random_state": 8888
}
# The params dictionary stores hyperparameters for Logistic Regression.
# "l2" regularization prevents overfitting.
# "lbfgs" is a solver suited for large-scale problems.
# max_iter=1000 ensures sufficient optimization steps.
# "auto" selects the best multiclass strategy.
# random_state=8888 ensures consistent results.

# Train the model
lr = LogisticRegression(**params)
lr.fit(X_train, y_train)



In [6]:
# Prediction on the test set
y_pred = lr.predict(X_test)
y_pred

array([1, 0, 0, 2, 0, 1, 2, 2, 0, 1, 2, 1, 1, 2, 0, 0, 0, 1, 0, 1, 2, 1,
       2, 2, 2, 0, 0, 0, 2, 2])

In [7]:
accuracy = accuracy_score(y_test, y_pred)
print(accuracy)

0.9666666666666667


In [8]:
# MLFLOW tracking

mlflow.set_tracking_uri(uri="Enter your uri")

# Create a new MLFLOW experiment

mlflow.set_experiment("MLFLOW Quickstart")

# Start an MLFLOW run

with mlflow.start_run():

    # Log the hyperparameters
    mlflow.log_params(params)

    # Log the accuracy metrics
    mlflow.log_metric("accuracy",accuracy)

    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "Basic LR model for iris data")

    # Infer the model signature
    signature = infer_signature(X_train,lr.predict(X_train))
     
    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model = lr,
        artifact_path = "iris_model",
        signature = signature,
        input_example = X_train,
        registered_model_name = "tracking-quickstart",

    )



2025/03/10 02:09:17 INFO mlflow.tracking.fluent: Experiment with name 'MLFLOW Quickstart' does not exist. Creating a new experiment.
Successfully registered model 'tracking-quickstart'.
2025/03/10 02:09:26 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-quickstart, version 1


🏃 View run placid-snake-382 at: http://127.0.0.1:5000/#/experiments/382151501872381422/runs/d0793d84e2cb412f8cb557ce4b01d6e8
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/382151501872381422


Created version '1' of model 'tracking-quickstart'.


## Another Model with different parameters

In [17]:
# Define the parameters
params = {"solver":"newton-cg", 
          "max_iter": 1000, 
          "multi_class": "auto", 
          "random_state": 1000
}

In [18]:
# Train the model
lr = LogisticRegression(**params)
lr.fit(X_train, y_train)



In [19]:
# Prediction on the test set
y_pred = lr.predict(X_test)
y_pred

array([1, 0, 0, 2, 0, 1, 2, 2, 0, 1, 2, 1, 1, 2, 0, 0, 0, 1, 0, 1, 2, 1,
       2, 2, 2, 0, 0, 0, 2, 2])

In [20]:
accuracy = accuracy_score(y_test, y_pred)
print(accuracy)

0.9666666666666667


In [21]:
# Create a new MLFLOW experiment

mlflow.set_experiment("MLFLOW Quickstart")

# Start an MLFLOW run

with mlflow.start_run():

    # Log the hyperparameters
    mlflow.log_params(params)

    # Log the accuracy metrics
    mlflow.log_metric("accuracy",accuracy)

    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "Basic LR model for iris data")

    # Infer the model signature
    signature = infer_signature(X_train,lr.predict(X_train))
     
    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model = lr,
        artifact_path = "iris_model",
        signature = signature,
        input_example = X_train,
        registered_model_name = "tracking-quickstart",

    )


Registered model 'tracking-quickstart' already exists. Creating a new version of this model...
2025/03/10 02:18:40 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-quickstart, version 2


🏃 View run receptive-zebra-644 at: http://127.0.0.1:5000/#/experiments/382151501872381422/runs/12b848947b4d4a2b8afca07bf6d7d070
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/382151501872381422


Created version '2' of model 'tracking-quickstart'.
