In [1]:
!pip install mlflow

Collecting mlflow
  Downloading mlflow-2.15.1-py3-none-any.whl (26.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26.3/26.3 MB[0m [31m40.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting alembic!=1.10.0,<2
  Downloading alembic-1.13.2-py3-none-any.whl (232 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.0/233.0 kB[0m [31m23.9 MB/s[0m eta [36m0:00:00[0m
Collecting sqlalchemy<3,>=1.4.0
  Downloading SQLAlchemy-2.0.32-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m85.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m
Collecting querystring-parser<2
  Downloading querystring_parser-1.2.4-py2.py3-none-any.whl (7.9 kB)
Collecting mlflow-skinny==2.15.1
  Downloading mlflow_skinny-2.15.1-py3-none-any.whl (5.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m64.8 MB/s[0m eta [36m0:00:00[0mta [36m0:

In [2]:
!pip install azureml-mlflow



In [28]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score,roc_curve
import mlflow
from sklearn.linear_model import LogisticRegression
from matplotlib import pyplot as plt
from sklearn.tree import DecisionTreeClassifier

In [4]:
try:
    credential=DefaultAzureCredential()
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    credential=InteractiveBrowserCredential()

In [5]:
mlClient=MLClient.from_config(credential=credential)

Found the config file in: /config.json


In [6]:
!pip show mlflow

Name: mlflow
Version: 2.15.1
Summary: MLflow is an open source platform for the complete machine learning lifecycle
Home-page: 
Author: 
Author-email: 
License: Copyright 2018 Databricks, Inc.  All rights reserved.
        
                                        Apache License
                                   Version 2.0, January 2004
                                http://www.apache.org/licenses/
        
           TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
        
           1. Definitions.
        
              "License" shall mean the terms and conditions for use, reproduction,
              and distribution as defined by Sections 1 through 9 of this document.
        
              "Licensor" shall mean the copyright owner or entity authorized by
              the copyright owner that is granting the License.
        
              "Legal Entity" shall mean the union of the acting entity and all
              other entities that contr

In [9]:
!mkdir data

In [10]:
!cp azure-ml-labs/Labs/07/data/diabetes.csv data/diabetes.csv

In [12]:
df=pd.read_csv(filepath_or_buffer="./data/diabetes.csv")
df.head()

Unnamed: 0,PatientID,Pregnancies,PlasmaGlucose,DiastolicBloodPressure,TricepsThickness,SerumInsulin,BMI,DiabetesPedigree,Age,Diabetic
0,1354778,0,171,80,34,23,43.509726,1.213191,21,0
1,1147438,8,92,93,47,36,21.240576,0.158365,23,0
2,1640031,7,115,47,52,35,41.511523,0.079019,23,0
3,1883350,9,103,78,25,304,29.582192,1.28287,43,1
4,1424119,1,85,59,27,35,42.604536,0.549542,22,0


In [13]:
FEATS=[
    'Pregnancies','PlasmaGlucose','DiastolicBloodPressure',
    'TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age'
    ]
TARGET="Diabetic"
X=df[FEATS].values
y=df[TARGET].values

In [15]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=0)

<h3> Creating an ML Flow Experiment</h3>

In [17]:
experimentName="mlflow-experiment-ritish"
mlflow.set_experiment(experiment_name=experimentName)

2024/08/25 11:59:08 INFO mlflow.tracking.fluent: Experiment with name 'mlflow-experiment-ritish' does not exist. Creating a new experiment.


<Experiment: artifact_location='', creation_time=1724587150073, experiment_id='a4421bcd-406e-42f4-82b1-3a9c982b6440', last_update_time=None, lifecycle_stage='active', name='mlflow-experiment-ritish', tags={}>

In [19]:
# A job is already created and all the metrics are automatically created (ROC/AUC, Confusion, etc)
with mlflow.start_run():
    mlflow.sklearn.autolog()  # Enabling Autologging
    model=LogisticRegression(C=1/0.1,solver="liblinear")
    model.fit(X=X_train,y=y_train)



In [21]:
# Disabling MLFlow Autologging

mlflow.sklearn.autolog(disable=True)

In [23]:
with mlflow.start_run():
    regRate=0.1
    model=LogisticRegression(C=1/regRate,solver="liblinear")
    model.fit(X=X_train,y=y_train)

    yHat=model.predict(X=X_test)
    acc=np.average(yHat==y_test)

    mlflow.log_param("Regularization Rate",regRate)
    mlflow.log_metric("Accuracy",acc)

In [24]:
with mlflow.start_run():
    regRate=0.01
    model=LogisticRegression(C=1/regRate,solver="liblinear")
    model.fit(X=X_train,y=y_train)

    yHat=model.predict(X=X_test)
    acc=np.average(yHat==y_test)

    mlflow.log_param("Regularization Rate",regRate)
    mlflow.log_metric("Accuracy",acc)

<h3> Log an Artifact </h3>

In [32]:
with mlflow.start_run():
    model=DecisionTreeClassifier()
    model.fit(X=X_train,y=y_train)
    yHat=model.predict(X=X_test)
    acc=np.average(a=yHat==y_test)
    yScores=model.predict_proba(X=X_test)

    fpr,tpr,thresholds=roc_curve(y_true=y_test,y_score=yScores[:,1])
    fig=plt.figure(figsize=(6,4))
    # Plot the diagonal - 50% line
    plt.plot([0,1],[0,1],"k--")
    plt.plot(fpr,tpr)
    plt.xlabel(xlabel="False Positive Rate")
    plt.ylabel(ylabel="True Positive Rate")
    plt.title(label="ROC Curve")
    plt.savefig("ROC-Curve.png")
    mlflow.log_param("Estimator","Decision TreeClassifier")
    mlflow.log_metric("Accuracy",acc)
    mlflow.log_artifact("ROC-Curve.png")