In [1]:
pip show azure-ai-ml

Name: azure-ai-ml
Version: 1.8.0
Summary: Microsoft Azure Machine Learning Client Library for Python
Home-page: https://github.com/Azure/azure-sdk-for-python
Author: Microsoft Corporation
Author-email: azuresdkengsysadmins@microsoft.com
License: MIT License
Location: /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages
Requires: azure-common, azure-core, azure-mgmt-core, azure-storage-blob, azure-storage-file-datalake, azure-storage-file-share, colorama, isodate, jsonschema, marshmallow, msrest, opencensus-ext-azure, pydash, pyjwt, pyyaml, strictyaml, tqdm, typing-extensions
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [2]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient

try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential()

In [3]:
ml_client=MLClient.from_config(credential=credential)

Found the config file in: /config.json


In [7]:
import os

script_folder="src"
os.makedirs(name=script_folder,exist_ok=True)
print(f"{script_folder} folder created")

src folder created


In [13]:
%%writefile $script_folder/train-model-mlflow.py
import mlflow
import argparse
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt


def main(args):
    df=get_data(args.training_data)
    X_train, X_test, y_train, y_test=split_data(df)
    model=train_model(args.reg_rate,X_train,y_train)
    evaluate_model(model,X_test,y_test)


def parse_args():
    parser=argparse.ArgumentParser()
    parser.add_argument("--training_data",dest="training_data",type=str)
    parser.add_argument("--reg_rate",dest="reg_rate",type=float, default=0.01)
    args=parser.parse_args()
    return args

def get_data(path):
    df=pd.read_csv(path)
    return df

def split_data(df):
    X= df[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness',
    'SerumInsulin','BMI','DiabetesPedigree','Age']].values 
    y=df['Diabetic'].values
    X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.30, random_state=0)
    return X_train, X_test, y_train, y_test

def train_model(reg_rate,X_train,y_train):
    mlflow.log_param(key="Regularization Rate", value=reg_rate)
    model=LogisticRegression(C=1/reg_rate,solver="liblinear")
    model.fit(X_train,y_train)
    return model

def evaluate_model(model,X_test,y_test):
    y_hat=model.predict(X_test)
    acc=np.average(y_test==y_hat)
    print(f"Accuracy:{acc}")
    mlflow.log_metric(key="Accuracy", value=acc)

    y_scores=model.predict_proba(X_test)
    auc=roc_auc_score(y_test, y_scores[:,1])
    print(f"AUC:{auc}")
    mlflow.log_metric(key="AUC", value=auc)

    fpr, tpr, thresholds=roc_curve(y_test,y_scores[:,1])
    fig=plt.figure(figsize=(6,4))
    plt.plot([0,1],[1,0],"k--")
    plt.plot(fpr, tpr)
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title('ROC Curve')
    plt.savefig("ROC-Curve.png")
    mlflow.log_artifact(local_path="ROC-Curve.png")

if __name__=="__main__":
    args=parse_args()
    main(args)
    print("*" * 60)
    print("\n\n")

Overwriting src/train-model-mlflow.py


In [14]:
from azure.ai.ml import command


job=command(
    code="./src",
    command="python train-model-mlflow.py --training_data diabetes.csv",
    environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest",
    compute="aml-cluster",
    display_name="diabetes-train-mlflow",
    experiment_name="diabetes_training",
    tags={"model_type":"LogisticRegression"}
    )

returned_job=ml_client.create_or_update(entity=job)

aml_url=returned_job.studio_url

print(f"Monitor your Job at: {aml_url}")

[32mUploading src (0.53 MBs):   0%|          | 0/529814 [00:00<?, ?it/s][32mUploading src (0.53 MBs):   2%|▏         | 10098/529814 [00:00<00:07, 74119.07it/s][32mUploading src (0.53 MBs): 100%|██████████| 529814/529814 [00:00<00:00, 3831599.00it/s]
[39m



Monitor your Job at: https://ml.azure.com/runs/magenta_basket_56g5msb8md?wsid=/subscriptions/18a1f27f-edf5-495e-9acb-753c93335294/resourcegroups/rg-dp100-lc88cedc90b76441796/workspaces/mlw-dp100-lc88cedc90b76441796&tid=6a1d2f96-8cdf-4d1a-943d-7b73f4dfbb6d


In [11]:
%%writefile $script_folder/train-model-autolog.py
import mlflow
import argparse
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt


def main(args):
    mlflow.autolog()
    df=get_data(args.training_data)
    X_train, X_test, y_train, y_test=split_data(df)
    model=train_model(args.reg_rate,X_train,y_train)
    evaluate_model(model,X_test,y_test)

def parse_args():
    parser=argparse.ArgumentParser()
    parser.add_argument("--training_data",dest="training_data",type=str)
    parser.add_argument("--reg_rate",dest="reg_rate",type=float, default=0.01)
    args=parser.parse_args()
    return args

def get_data(path):
    df=pd.read_csv(path)
    return df

def split_data(df):
    X= df[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness',
    'SerumInsulin','BMI','DiabetesPedigree','Age']].values 
    y=df['Diabetic'].values
    X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.30, random_state=0)
    return X_train, X_test, y_train, y_test

def train_model(reg_rate,X_train,y_train):
    mlflow.log_param(key="Regularization Rate", value=reg_rate)
    model=LogisticRegression(C=1/reg_rate,solver="liblinear")
    model.fit(X_train,y_train)
    return model

def evaluate_model(model,X_test,y_test):
    y_hat=model.predict(X_test)
    acc=np.average(y_test==y_hat)
    print(f"Accuracy:{acc}")

    y_scores=model.predict_proba(X_test)
    auc=roc_auc_score(y_test, y_scores[:,1])
    print(f"AUC:{auc}")

    fpr, tpr, thresholds=roc_curve(y_test,y_scores[:,1])
    fig=plt.figure(figsize=(6,4))
    plt.plot([0,1],[1,0],"k--")
    plt.plot(fpr, tpr)
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title('ROC Curve')

if __name__=="__main__":
    args=parse_args()
    main(args)
    print("*" * 60)
    print("\n\n")

Writing src/train-model-autolog.py


In [12]:
from azure.ai.ml import command

job=command(
    code="./src",
    command="python train-model-autolog.py --training_data=diabetes.csv",
    environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest",
    compute="aml-cluster",
    display_name="diabetes-train-autolog",
    experiment_name="diabetes_training",
    )

returned_job=ml_client.create_or_update(entity=job)
aml_url=returned_job.studio_url
print(f"Monitor Your Job at: {aml_url}")

[32mUploading src (0.53 MBs): 100%|██████████| 529814/529814 [00:00<00:00, 3717317.05it/s]
[39m



Monitor Your Job at: https://ml.azure.com/runs/happy_yak_958khbzz4g?wsid=/subscriptions/18a1f27f-edf5-495e-9acb-753c93335294/resourcegroups/rg-dp100-lc88cedc90b76441796/workspaces/mlw-dp100-lc88cedc90b76441796&tid=6a1d2f96-8cdf-4d1a-943d-7b73f4dfbb6d


<h2> List the Experiments in the Workspace </h2>

In [15]:
import mlflow

experiments=mlflow.search_experiments()

for exp in experiments:
    print(exp.name)

diabetes_training


<h2> Retrieve a Specific Experiment by Name</h2>

In [16]:
experiment_name="diabetes_training"
exp=mlflow.get_experiment_by_name(name=experiment_name)
print(exp)

<Experiment: artifact_location='', creation_time=1690533650515, experiment_id='e3592d90-8f22-464a-9fd1-7e5aac494842', last_update_time=None, lifecycle_stage='active', name='diabetes_training', tags={}>


<h2> Retrieve all jobs of a specific Experiment </h2>

In [17]:
mlflow.search_runs(experiment_ids=exp.experiment_id)

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.AUC,metrics.Accuracy,metrics.training_precision_score,metrics.training_accuracy_score,...,params.n_jobs,params.solver,params.intercept_scaling,params.fit_intercept,tags.mlflow.user,tags.model_type,tags.mlflow.rootRunId,tags.mlflow.runName,tags.estimator_name,tags.estimator_class
0,sincere_planet_11yjdlybsd,e3592d90-8f22-464a-9fd1-7e5aac494842,FAILED,,2023-07-28 08:43:30.273000+00:00,2023-07-28 08:44:37.925000+00:00,0.848321,0.774,,,...,,,,,Ritish Adhikari,LogisticRegression,sincere_planet_11yjdlybsd,diabetes-train-mlflow,,
1,happy_yak_958khbzz4g,e3592d90-8f22-464a-9fd1-7e5aac494842,FINISHED,,2023-07-28 08:51:07.119000+00:00,2023-07-28 08:52:32.727000+00:00,,,0.78576,0.790857,...,,liblinear,1.0,True,Ritish Adhikari,,happy_yak_958khbzz4g,diabetes-train-autolog,LogisticRegression,sklearn.linear_model._logistic.LogisticRegression
2,magenta_basket_56g5msb8md,e3592d90-8f22-464a-9fd1-7e5aac494842,FINISHED,,2023-07-28 08:52:37.560000+00:00,2023-07-28 08:52:50.731000+00:00,0.84832,0.774,,,...,,,,,Ritish Adhikari,LogisticRegression,magenta_basket_56g5msb8md,diabetes-train-mlflow,,


<h2> Sorts by Start Time and only Generates 2 Results</h2>

In [22]:
mlflow.search_runs(experiment_ids=exp.experiment_id,order_by=['start_time DESC'], max_results=2)

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.AUC,metrics.Accuracy,metrics.training_precision_score,metrics.training_accuracy_score,...,params.n_jobs,params.solver,params.intercept_scaling,params.fit_intercept,tags.mlflow.user,tags.model_type,tags.mlflow.rootRunId,tags.mlflow.runName,tags.estimator_name,tags.estimator_class
0,magenta_basket_56g5msb8md,e3592d90-8f22-464a-9fd1-7e5aac494842,FINISHED,,2023-07-28 08:52:37.560000+00:00,2023-07-28 08:52:50.731000+00:00,0.84832,0.774,,,...,,,,,Ritish Adhikari,LogisticRegression,magenta_basket_56g5msb8md,diabetes-train-mlflow,,
1,happy_yak_958khbzz4g,e3592d90-8f22-464a-9fd1-7e5aac494842,FINISHED,,2023-07-28 08:51:07.119000+00:00,2023-07-28 08:52:32.727000+00:00,,,0.78576,0.790857,...,,liblinear,1.0,True,Ritish Adhikari,,happy_yak_958khbzz4g,diabetes-train-autolog,LogisticRegression,sklearn.linear_model._logistic.LogisticRegression


In [37]:
query="metrics.AUC > 0.8 and tags.model_type='LogisticRegression' and attributes.status = 'Failed' "

mlflow.search_runs(experiment_ids=exp.experiment_id, filter_string=query)

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.AUC,metrics.Accuracy,params.Regularization Rate,tags.mlflow.user,tags.model_type,tags.mlflow.rootRunId,tags.mlflow.runName
0,sincere_planet_11yjdlybsd,e3592d90-8f22-464a-9fd1-7e5aac494842,FAILED,,2023-07-28 08:43:30.273000+00:00,2023-07-28 08:44:37.925000+00:00,0.848321,0.774,0.01,Ritish Adhikari,LogisticRegression,sincere_planet_11yjdlybsd,diabetes-train-mlflow


<a href="https://learn.microsoft.com/en-us/azure/machine-learning/how-to-track-experiments-mlflow?view=azureml-api-2">  Link to Azure MLFlow  </a>