In [1]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient

try:
    credential=DefaultAzureCredential()
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    credential=InteractiveBrowserCredential()

mlClient=MLClient.from_config(credential=credential)

Found the config file in: /config.json


In [29]:
from azure.ai.ml import command
import mlflow
from azure.ai.ml.entities import Model
from azure.ai.ml.constants import AssetTypes

In [2]:
scriptFolder="src"

In [4]:
%%writefile $scriptFolder/train-model-autolog.py
import mlflow
import argparse
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score,roc_curve
from matplotlib import pyplot as plt

def main(args):
    mlflow.autolog()
    df=getData(path=args.trainingData)
    XTrain, XTest, yTrain, yTest=splitData(df=df)
    model=trainModel(regRate=args.regRate,XTrain=XTrain,XTest=XTest,yTrain=yTrain,yTest=yTest)
    evalModel(model=model,XTest=XTest,yTest=yTest)


def getData(path):
    print("Reading Data")
    df=pd.read_csv(filepath_or_buffer=path)
    return df

def splitData(df:pd.DataFrame):
    print("Splitting Data")
    X=df[
            [
                'Pregnancies','PlasmaGlucose','DiastolicBloodPressure',
                'TricepsThickness', 'SerumInsulin','BMI','DiabetesPedigree','Age'
            ]
        ].values
    y=df['Diabetic'].values
    XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=0.30, random_state=0)
    return XTrain, XTest, yTrain, yTest

def trainModel(regRate,XTrain, XTest, yTrain, yTest):
    print("Training Model")
    model=LogisticRegression(C=1/regRate,solver="liblinear")
    model.fit(X=XTrain,y=yTrain)
    return model

def evalModel(model,XTest,yTest):
    yHat=model.predict(X=XTest)
    acc=np.average(yHat==yTest)
    print(f"Accuracy:{acc}")

def parseArgs():
    parser=argparse.ArgumentParser()
    parser.add_argument("--trainingData",dest="trainingData",type=str)
    parser.add_argument("--regRate",dest="regRate",type=float,default=0.01)
    
    args=parser.parse_args()
    return args


if __name__=="__main__":
    print("*"*60)
    args=parseArgs()
    main(args=args)
    print("*"*60)

Writing src/train-model-autolog.py


In [11]:
job=command(
    code="./src",
    command="python train-model-autolog.py --trainingData data/diabetes.csv",
    environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest",
    compute="aml-cluster",
    display_name="diabetes-train-autolog",
    experiment_name="diabetes-training"
)

returnedJobAuto=mlClient.jobs.create_or_update(job=job)
amlURLAuto=returnedJobAuto.studio_url
print(amlURLAuto)

[32mUploading src (0.52 MBs): 100%|██████████| 523899/523899 [00:01<00:00, 436461.86it/s]
[39m



https://ml.azure.com/runs/joyful_fish_m6jxy8m9c4?wsid=/subscriptions/18a1f27f-edf5-495e-9acb-753c93335294/resourcegroups/rg-dp100-labs/workspaces/mlw-dp100-labs&tid=6a1d2f96-8cdf-4d1a-943d-7b73f4dfbb6d


Enabling AutoLog with SKLearn

In [7]:
%%writefile $scriptFolder/train-model-sklearn-autolog.py
import mlflow
import argparse
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score,roc_curve
from matplotlib import pyplot as plt

def main(args):
    mlflow.sklearn.autolog()
    df=getData(path=args.trainingData)
    XTrain, XTest, yTrain, yTest=splitData(df=df)
    model=trainModel(regRate=args.regRate,XTrain=XTrain,XTest=XTest,yTrain=yTrain,yTest=yTest)
    evalModel(model=model,XTest=XTest,yTest=yTest)


def getData(path):
    print("Reading Data")
    df=pd.read_csv(filepath_or_buffer=path)
    return df

def splitData(df:pd.DataFrame):
    print("Splitting Data")
    X=df[
            [
                'Pregnancies','PlasmaGlucose','DiastolicBloodPressure',
                'TricepsThickness', 'SerumInsulin','BMI','DiabetesPedigree','Age'
            ]
        ].values
    y=df['Diabetic'].values
    XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=0.30, random_state=0)
    return XTrain, XTest, yTrain, yTest

def trainModel(regRate,XTrain, XTest, yTrain, yTest):
    print("Training Model")
    model=LogisticRegression(C=1/regRate,solver="liblinear")
    model.fit(X=XTrain,y=yTrain)
    return model

def evalModel(model,XTest,yTest):
    yHat=model.predict(X=XTest)
    acc=np.average(yHat==yTest)
    print(f"Accuracy:{acc}")

def parseArgs():
    parser=argparse.ArgumentParser()
    parser.add_argument("--trainingData",dest="trainingData",type=str)
    parser.add_argument("--regRate",dest="regRate",type=float,default=0.01)
    
    args=parser.parse_args()
    return args


if __name__=="__main__":
    print("*"*60)
    args=parseArgs()
    main(args=args)
    print("*"*60)

Writing src/train-model-sklearn-autolog.py


In [12]:
job=command(
    code="./src",
    command="python train-model-sklearn-autolog.py --trainingData data/diabetes.csv",
    environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest",
    compute="aml-cluster",
    display_name="diabetes-train-autolog-sklearn",
    experiment_name="diabetes-training"
)

returnedJobSklearn=mlClient.jobs.create_or_update(job=job)
amlURLSklearn=returnedJobSklearn.studio_url
print(amlURLSklearn)

https://ml.azure.com/runs/khaki_vase_brfgpzbh34?wsid=/subscriptions/18a1f27f-edf5-495e-9acb-753c93335294/resourcegroups/rg-dp100-labs/workspaces/mlw-dp100-labs&tid=6a1d2f96-8cdf-4d1a-943d-7b73f4dfbb6d


<h5> Customizing Models with Inferred Signatures </h5>

In [24]:
%%writefile $scriptFolder/train-model-inferred.py
import mlflow
import argparse
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score,roc_curve
from matplotlib import pyplot as plt
import mlflow.sklearn
from mlflow.models.signature import infer_signature

def main(args):
    mlflow.autolog(log_models=False)
    df=getData(path=args.trainingData)
    XTrain, XTest, yTrain, yTest=splitData(df=df)
    model=trainModel(regRate=args.regRate,XTrain=XTrain,XTest=XTest,yTrain=yTrain,yTest=yTest)
    yHat=evalModel(model=model,XTest=XTest,yTest=yTest)
    signature=infer_signature(XTrain,yHat)
    mlflow.sklearn.log_model(sk_model=model,artifact_path="modelRitishLearn",signature=signature)


def getData(path):
    print("Reading Data")
    df=pd.read_csv(filepath_or_buffer=path)
    return df

def splitData(df:pd.DataFrame):
    print("Splitting Data")
    X=df[
            [
                'Pregnancies','PlasmaGlucose','DiastolicBloodPressure',
                'TricepsThickness', 'SerumInsulin','BMI','DiabetesPedigree','Age'
            ]
        ].values
    y=df['Diabetic'].values
    XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=0.30, random_state=0)
    return XTrain, XTest, yTrain, yTest

def trainModel(regRate,XTrain, XTest, yTrain, yTest):
    print("Training Model")
    model=LogisticRegression(C=1/regRate,solver="liblinear")
    model.fit(X=XTrain,y=yTrain)
    return model

def evalModel(model,XTest,yTest):
    yHat=model.predict(X=XTest)
    acc=np.average(yHat==yTest)
    print(f"Accuracy:{acc}")
    return yHat

def parseArgs():
    parser=argparse.ArgumentParser()
    parser.add_argument("--trainingData",dest="trainingData",type=str)
    parser.add_argument("--regRate",dest="regRate",type=float,default=0.01)
    
    args=parser.parse_args()
    return args


if __name__=="__main__":
    print("*"*60)
    args=parseArgs()
    main(args=args)
    print("*"*60)

Overwriting src/train-model-inferred.py


In [25]:
job=command(
    code="./src",
    command="python train-model-inferred.py --trainingData data/diabetes.csv",
    environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest",
    compute="aml-cluster",
    display_name="diabetes-train-inferred",
    experiment_name="diabetes-training"
)

returnedJobInferred=mlClient.jobs.create_or_update(job=job)
amlURLInferred=returnedJobInferred.studio_url
print(amlURLInferred)

[32mUploading src (0.53 MBs):   0%|          | 0/525898 [00:00<?, ?it/s][32mUploading src (0.53 MBs): 100%|██████████| 525898/525898 [00:00<00:00, 7508104.83it/s]
[39m



https://ml.azure.com/runs/cyan_pear_ws0nncz2mx?wsid=/subscriptions/18a1f27f-edf5-495e-9acb-753c93335294/resourcegroups/rg-dp100-labs/workspaces/mlw-dp100-labs&tid=6a1d2f96-8cdf-4d1a-943d-7b73f4dfbb6d


<h3> Manually Create Signature </h3>

In [27]:
%%writefile $scriptFolder/train-model-manualSignature.py
import mlflow
import argparse
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score,roc_curve
from matplotlib import pyplot as plt
import mlflow.sklearn
from mlflow.models.signature import ModelSignature
from mlflow.types.schema import Schema, ColSpec

def main(args):
    mlflow.autolog(log_models=False)
    df=getData(path=args.trainingData)
    XTrain, XTest, yTrain, yTest=splitData(df=df)
    model=trainModel(regRate=args.regRate,XTrain=XTrain,XTest=XTest,yTrain=yTrain,yTest=yTest)
    yHat=evalModel(model=model,XTest=XTest,yTest=yTest)
    signature=ModelSignature(
        Schema([
            ColSpec("integer","Pregnancies"),
            ColSpec("integer", "PlasmaGlucose"),
            ColSpec("integer", "DiastolicBloodPressure"),
            ColSpec("integer", "TricepsThickness"),
            ColSpec("integer", "DiastolicBloodPressure"),
            ColSpec("integer", "SerumInsulin"),
            ColSpec("double", "BMI"),
            ColSpec("double", "DiabetesPedigree"),
            ColSpec("integer", "Age"),
        ]),
        Schema([ColSpec("boolean")])
    )
    mlflow.sklearn.log_model(sk_model=model,artifact_path="modelRitishLearnModel",signature=signature)


def getData(path):
    print("Reading Data")
    df=pd.read_csv(filepath_or_buffer=path)
    return df

def splitData(df:pd.DataFrame):
    print("Splitting Data")
    X=df[
            [
                'Pregnancies','PlasmaGlucose','DiastolicBloodPressure',
                'TricepsThickness', 'SerumInsulin','BMI','DiabetesPedigree','Age'
            ]
        ].values
    y=df['Diabetic'].values
    XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=0.30, random_state=0)
    return XTrain, XTest, yTrain, yTest

def trainModel(regRate,XTrain, XTest, yTrain, yTest):
    print("Training Model")
    model=LogisticRegression(C=1/regRate,solver="liblinear")
    model.fit(X=XTrain,y=yTrain)
    return model

def evalModel(model,XTest,yTest):
    yHat=model.predict(X=XTest)
    acc=np.average(yHat==yTest)
    print(f"Accuracy:{acc}")
    return yHat

def parseArgs():
    parser=argparse.ArgumentParser()
    parser.add_argument("--trainingData",dest="trainingData",type=str)
    parser.add_argument("--regRate",dest="regRate",type=float,default=0.01)
    
    args=parser.parse_args()
    return args


if __name__=="__main__":
    print("*"*60)
    args=parseArgs()
    main(args=args)
    print("*"*60)

Writing src/train-model-manualSignature.py


In [28]:
job=command(
    code="./src",
    command="python train-model-manualSignature.py --trainingData data/diabetes.csv",
    environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest",
    compute="aml-cluster",
    display_name="diabetes-train-ModelSignature",
    experiment_name="diabetes-training"
)

returnedJobManual=mlClient.jobs.create_or_update(job=job)
amlURLManual=returnedJobManual.studio_url
print(amlURLManual)

[32mUploading src (0.53 MBs):   0%|          | 0/530996 [00:00<?, ?it/s][32mUploading src (0.53 MBs):  99%|█████████▉| 524693/530996 [00:00<00:00, 5178754.19it/s][32mUploading src (0.53 MBs): 100%|██████████| 530996/530996 [00:00<00:00, 4166971.29it/s]
[39m



https://ml.azure.com/runs/amusing_planet_cs33z0dcn6?wsid=/subscriptions/18a1f27f-edf5-495e-9acb-753c93335294/resourcegroups/rg-dp100-labs/workspaces/mlw-dp100-labs&tid=6a1d2f96-8cdf-4d1a-943d-7b73f4dfbb6d


<h3> Register the Model </h3>

In [41]:
jobName=returnedJobSklearn.name
jobName

'khaki_vase_brfgpzbh34'

In [42]:
runModel = Model(
    path=f"azureml://jobs/{jobName}/outputs/artifacts/paths/model/",
    name="mlflow-diabetes",
    description="Model created from run.",
    type=AssetTypes.MLFLOW_MODEL,
)
mlClient.models.create_or_update(runModel)

Model({'job_name': 'khaki_vase_brfgpzbh34', 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'mlflow-diabetes', 'description': 'Model created from run.', 'tags': {}, 'properties': {}, 'print_as_yaml': False, 'id': '/subscriptions/18a1f27f-edf5-495e-9acb-753c93335294/resourceGroups/rg-dp100-labs/providers/Microsoft.MachineLearningServices/workspaces/mlw-dp100-labs/models/mlflow-diabetes/versions/1', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/ritishadhikaricompute/code/Users/ritishadhikari', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7fdbb98991e0>, 'serialize': <msrest.serialization.Serializer object at 0x7fdbb98996f0>, 'version': '1', 'latest_version': None, 'path': 'azureml://subscriptions/18a1f27f-edf5-495e-9acb-753c93335294/resourceGroups/rg-dp100-labs/workspaces/mlw-dp100-labs/datastores/workspaceartifactstore/paths/ExperimentR