In [1]:
pip show azure-ai-ml

Name: azure-ai-ml
Version: 1.8.0
Summary: Microsoft Azure Machine Learning Client Library for Python
Home-page: https://github.com/Azure/azure-sdk-for-python
Author: Microsoft Corporation
Author-email: azuresdkengsysadmins@microsoft.com
License: MIT License
Location: /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/site-packages
Requires: azure-common, azure-core, azure-mgmt-core, azure-storage-blob, azure-storage-file-datalake, azure-storage-file-share, colorama, isodate, jsonschema, marshmallow, msrest, opencensus-ext-azure, pydash, pyjwt, pyyaml, strictyaml, tqdm, typing-extensions
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [3]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient


try:
    credential=DefaultAzureCredential()
    credential.get_token("https://management.azure.com/.default")
except Exception as Ex:
    credential=InteractiveBrowserCredential()

In [4]:
ml_client=MLClient.from_config(credential=credential)

Found the config file in: /config.json


In [6]:
import os

script_folder="src"
os.makedirs(name=script_folder,exist_ok=True)
print(script_folder, 'folder created')

src folder created


In [7]:
%%writefile $script_folder/train.py
import mlflow
import argparse
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt


def main(args):
    df=get_data(args.training_data)
    X_train, X_test, y_train, y_test=split_data(df)
    model=train_model(args.reg_rate,X_train,y_train)
    evaluate_model(model,X_test,y_test)

def parse_args():
    parser=argparse.ArgumentParser()
    parser.add_argument("--training_data",dest="training_data",type=str)
    parser.add_argument("--reg_rate",dest="reg_rate",type=float, default=0.01)
    args=parser.parse_args()
    return args

def get_data(path):
    df=pd.read_csv(path)
    return df

def split_data(df):
    X= df[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness',
    'SerumInsulin','BMI','DiabetesPedigree','Age']].values 
    y=df['Diabetic'].values
    X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.30, random_state=0)
    return X_train, X_test, y_train, y_test

def train_model(reg_rate,X_train,y_train):
    mlflow.log_param(key="Regularization Rate", value=reg_rate)
    model=LogisticRegression(C=1/reg_rate,solver="liblinear")
    model.fit(X_train,y_train)
    return model

def evaluate_model(model,X_test,y_test):
    y_hat=model.predict(X_test)
    acc=np.average(y_test==y_hat)
    print(f"Accuracy:{acc}")
    mlflow.log_metric(key="Accuracy", value=acc)

    y_scores=model.predict_proba(X_test)
    auc=roc_auc_score(y_test, y_scores[:,1])
    print(f"AUC:{auc}")
    mlflow.log_metric(key="AUC", value=auc)

    fpr, tpr, thresholds=roc_curve(y_test,y_scores[:,1])
    fig=plt.figure(figsize=(6,4))
    plt.plot([0,1],[1,0],"k--")
    plt.plot(fpr, tpr)
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title('ROC Curve')
    plt.savefig("ROC-Curve.jpeg")
    mlflow.log_artifact("ROC-Curve.jpeg")

if __name__=="__main__":
    args=parse_args()
    main(args)
    print("*" * 60)
    print("\n\n")

Writing src/train.py


In [11]:
from azure.ai.ml import command, Input
from azure.ai.ml.constants import AssetTypes

job=command(
    code="./src",
    inputs={
        "diabetes_data":Input(type=AssetTypes.URI_FILE,
                            path="azureml:diabetes-data:1"),
        "reg_rate":0.01
    },
    command="python train.py --training_data=${{inputs.diabetes_data}} --reg_rate=${{inputs.reg_rate}}",
    environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest",
    compute="aml-cluster",
    display_name="diabetes-train-mlflow",
    experiment_name="diabetes-training",
    tags={"model_type":"LogosticRegression"}

)

returned_job=ml_client.create_or_update(entity=job)
aml_url=returned_job.studio_url
print(f"Monitor Your Job at: {aml_url}")

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
[32mUploading src (0.0 MBs): 100%|███

Monitor Your Job at: {aml_url}


<h2> Define the Search Space </h2>

In [13]:
from azure.ai.ml.sweep import Choice

command_job_for_sweep=job(reg_rate=Choice(values=[0.01,0.1,1]))

In [14]:
sweep_job=command_job_for_sweep.sweep(
    primary_metric="Accuracy",
    compute="aml-cluster",
    sampling_algorithm="grid",
    goal="Maximize"
    )

sweep_job.experiment_name="Sweep-Diabetes"

sweep_job.set_limits(
                    max_total_trials=6, 
                    max_concurrent_trials=2, 
                    timeout=7200
                    )

# Submit the Sweep Job

In [15]:
returned_sweep_job=ml_client.create_or_update(entity=sweep_job)
aml_url=returned_sweep_job.studio_url
print(f"Monitor URL {aml_url}")

Monitor URL https://ml.azure.com/runs/honest_box_lgpn76wrcs?wsid=/subscriptions/18a1f27f-edf5-495e-9acb-753c93335294/resourcegroups/rg-dp100-l6898101080734451a3/workspaces/mlw-dp100-l6898101080734451a3&tid=6a1d2f96-8cdf-4d1a-943d-7b73f4dfbb6d
