In [1]:
pip show azure-ai-ml

Name: azure-ai-ml
Version: 1.26.0
Summary: Microsoft Azure Machine Learning Client Library for Python
Home-page: https://github.com/Azure/azure-sdk-for-python
Author: Microsoft Corporation
Author-email: azuresdkengsysadmins@microsoft.com
License: MIT License
Location: /anaconda/envs/azureml_py38/lib/python3.10/site-packages
Requires: azure-common, azure-core, azure-mgmt-core, azure-monitor-opentelemetry, azure-storage-blob, azure-storage-file-datalake, azure-storage-file-share, colorama, isodate, jsonschema, marshmallow, msrest, pydash, pyjwt, pyyaml, strictyaml, tqdm, typing-extensions
Required-by: 
Note: you may need to restart the kernel to use updated packages.


Connect to your workspace
With the required SDK packages installed, now you're ready to connect to your workspace.

To connect to a workspace, we need identifier parameters - a subscription ID, resource group name, and workspace name. Since you're working with a compute instance, managed by Azure Machine Learning, you can use the default values to connect to the workspace.

In [2]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient

try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential()

In [3]:

# Get a handle to workspace
ml_client = MLClient.from_config(credential=credential)

Found the config file in: /config.json


Use the Python SDK to train a model
To train a model, you'll first create the diabetes_training.py script in the src folder. The script uses the diabetes.csv file in the same folder as the training data.

## Creating a Linear regression model python file

In [4]:
%%writefile src/expenses-training-linear-regression.py

# import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error


# load the diabetes dataset
print("Loading Data...")
expenses = pd.read_csv('azureml://subscriptions/56fbbfce-6dab-4d62-af16-ccd107f4d9d3/resourcegroups/DEV-EA-EASTUS-DF-RG/workspaces/mlw-uel-cn-7000/datastores/ds_healthdata_2/paths/data/ons_nhs_expenditure.csv')

print(expenses)

# separate features and labels
X, y = expenses[['Year']].values, expenses['Total_current_healthcare_expenditure'].values

# set regularization hyperparameter
reg = 0.01

# split data into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

# train a linear regression model
print('Training a linear regression model with regularization rate of', reg)
model = LinearRegression().fit(X_train, y_train)

# calculate accuracy
y_hat = model.predict(X_test)
acc = model.score(X_test, y_test)
print('R^2 Score:', acc)

# calculate AUC (not applicable for regression, but you can calculate RMSE or MAE)
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(y_test, y_hat))
print('RMSE:', rmse)

#  continuous labels to discrete classes, you can use the following approach, Classification



Overwriting src/expenses-training-linear-regression.py


## Creating a logistic regression model python file

In [5]:
%%writefile src/expenses-training-LogisticRegression-classification.py

# import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error


# load the diabetes dataset
print("Loading Data...")
expenses = pd.read_csv('azureml://subscriptions/56fbbfce-6dab-4d62-af16-ccd107f4d9d3/resourcegroups/DEV-EA-EASTUS-DF-RG/workspaces/mlw-uel-cn-7000/datastores/ds_healthdata_2/paths/data/ons_nhs_expenditure.csv')

print(expenses)

# separate features and labels
X = expenses[['Year']].values
y = pd.cut(expenses['Total_current_healthcare_expenditure'], bins=3, labels=[0, 1, 2])

# set regularization hyperparameter
reg = 0.01

# split data into training set and test set with stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0, stratify=y)

# train a logistic regression model
print('Training a logistic regression model with regularization rate of', reg)
model = LogisticRegression(C=1/reg, solver="liblinear").fit(X_train, y_train)

# calculate accuracy
y_hat = model.predict(X_test)
acc = np.average(y_hat == y_test)
print('Accuracy:', acc)

# calculate AUC
y_scores = model.predict_proba(X_test)
auc = roc_auc_score(y_test, y_scores, multi_class='ovr', labels=[0, 1, 2])
print('AUC: ' + str(auc))


Overwriting src/expenses-training-LogisticRegression-classification.py


Run the cell below to submit the job that trains a classification model to predict diabetes.

## Classification Modeling Run using Logistic regression

In [6]:
from azure.ai.ml import MLClient, command, Input
from azureml.core import Workspace, Environment
from azure.identity import DefaultAzureCredential
import fsspec
from azureml.core import Workspace

from azureml.core.conda_dependencies import CondaDependencies

# Load the workspace from the saved config file
ws = Workspace.from_config()

# Retrieve the existing curated environment
curated_env = Environment.get(workspace=ws, name="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu")

# Create a copy of the curated environment
env = curated_env.clone("my-custom-sklearn-env")



# Create a CondaDependencies object if it doesn't exist
if env.python.conda_dependencies is None:
    env.python.conda_dependencies = CondaDependencies()

# Add fsspec to the environment
env.python.conda_dependencies.add_pip_package("fsspec")

# Set the name and version on the environment object
env.name = "my-custom-sklearn-env"
env.version = "1"

# Register the updated environment with a version
env.register(workspace=ws)

# configure job
job = command(
    code="./src",
    command="python expenses-training-LogisticRegression-classification.py",
    environment="my-custom-sklearn-env:1",
    compute="amlclcn7000",
    display_name="expense-pythonv2-train-log_reg",
    experiment_name="expense-training-log-reg"
)

# submit job
returned_job = ml_client.create_or_update(job)
aml_url = returned_job.studio_url
print("Monitor your job at", aml_url)

Environment version is set. Attempting to register desired version. To auto-version, reset version to None.
Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time

Monitor your job at https://ml.azure.com/runs/happy_star_31d7kdr556?wsid=/subscriptions/56fbbfce-6dab-4d62-af16-ccd107f4d9d3/resourcegroups/dev-ea-eastus-df-rg/workspaces/mlw-uel-cn-7000&tid=9258a771-d6bc-4486-adea-e939c450d791


## Linear regression

In [7]:
# configure job
job = command(
    code="./src",
    command="python expenses-training-linear-regression.py",
    environment="my-custom-sklearn-env:1",
    compute="amlclcn7000",
    display_name="expense-pythonv2-train-lin-reg",
    experiment_name="expense-training-lin-reg"
)

# submit job
returned_job = ml_client.create_or_update(job)
aml_url = returned_job.studio_url
print("Monitor your job at", aml_url)

Monitor your job at https://ml.azure.com/runs/jovial_fork_d08mtwz2dy?wsid=/subscriptions/56fbbfce-6dab-4d62-af16-ccd107f4d9d3/resourcegroups/dev-ea-eastus-df-rg/workspaces/mlw-uel-cn-7000&tid=9258a771-d6bc-4486-adea-e939c450d791
