In [1]:
%load_ext watermark
%watermark -v -d -p sklearn,azureml.core

Python implementation: CPython
Python version       : 3.9.7
IPython version      : 7.28.0

sklearn     : 1.0.1
azureml.core: 1.35.0



In [2]:
import azureml.core

## Initialize Workspace

Create a ML workspace in **Azure ML Studio** and export `config.json`

In [3]:
from azureml.core.workspace import Workspace
import json

with open('config.json', 'r') as f:
    config = json.load(f)
    
ws = Workspace(workspace_name=config['workspace_name'], subscription_id=config['subscription_id'], resource_group=config['resource_group'])
ws.write_config()

print(ws.name, ws.resource_group, ws.location, sep='\n')

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


thomdml
thomd
eastus


## Experiment

In [4]:
from azureml.core import Experiment

exp = Experiment(workspace=ws, name='train-on-local')

Write the training script `train.py` to file

In [5]:
%%writefile train.py
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from azureml.core.run import Run
import os
import numpy as np
import joblib

os.makedirs('./outputs', exist_ok=True)   # to save model in the outputs folder so it automatically get uploaded

X, y = load_diabetes(return_X_y=True)

run = Run.get_context()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
data = {"train": {"X": X_train, "y": y_train},
        "test": {"X": X_test, "y": y_test}}

for alpha in np.arange(0.0, 1.0, 0.05):
    reg = Ridge(alpha=alpha)
    reg.fit(data["train"]["X"], data["train"]["y"])
    preds = reg.predict(data["test"]["X"])
    mse = mean_squared_error(preds, data["test"]["y"])
    run.log('alpha', alpha)
    run.log('mse', mse)
    model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)
    with open(model_file_name, "wb") as file:
        joblib.dump(value=reg, filename=os.path.join('./outputs/', model_file_name))

    print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))


Overwriting train.py


## Run Experiment in Azure ML

### User-Managed Environment

When using a **user-managed environment**, you are responsible for ensuring that all the necessary packages are available in the Python environment you choose to run the script in.

In [6]:
from azureml.core import Environment
from azureml.core import ScriptRunConfig

user_managed_env = Environment('user-managed-env')
user_managed_env.python.user_managed_dependencies = True

# You can choose a specific Python environment by pointing to a Python path 
#user_managed_env.python.interpreter_path = '/Users/duerr/.virtualenvs/jupyter/bin/python'

src = ScriptRunConfig(source_directory='./', script='train.py', environment=user_managed_env)
run = exp.submit(src)

Get run history details

In [8]:
# run.wait_for_completion(show_output=True)

In [9]:
run

Experiment,Id,Type,Status,Details Page,Docs Page
train-on-local,train-on-local_1635453356_7d0c869d,azureml.scriptrun,Finalizing,Link to Azure Machine Learning studio,Link to Documentation


### System-Managed Environment

Instead of managing the setup of the environment yourself, you can **ask the system to build a new local conda environment**. The environment is built once, and will be reused in subsequent executions as long as the conda dependencies remain unchanged.

In [18]:
from azureml.core.conda_dependencies import CondaDependencies

system_managed_env = Environment('system-managed-env')
system_managed_env.python.user_managed_dependencies = False

# Specify conda dependencies with scikit-learn
cd = CondaDependencies.create(conda_packages=['scikit-learn', 'pip'], pip_packages=['azureml-core'])
system_managed_env.python.conda_dependencies = cd
system_managed_env.environment_variables = {'PIP_REQUIRE_VIRTUALENV': 'false'}

# A new conda environment is built. If you are running this for the first time, this might take up to 5 minutes
#src.run_config.environment = system_managed_env
src = ScriptRunConfig(source_directory='./', script='train.py', environment=system_managed_env)

run = exp.submit(src)

In [19]:
#run.wait_for_completion(show_output=True)
run

Experiment,Id,Type,Status,Details Page,Docs Page
train-on-local,train-on-local_1635456024_61f86e7b,azureml.scriptrun,Starting,Link to Azure Machine Learning studio,Link to Documentation


### Docker-based Execution

Train the models in a **Docker container on your local machine**. For this, you then need to have the Docker engine installed locally.

If your kernel is already running in a Docker container, such as Azure Notebooks, this mode will **NOT** work.

In [28]:
docker_env = Environment('docker-env')

docker_env.python.user_managed_dependencies = False
docker_env.docker.enabled = True

# use the default CPU-based Docker image from Azure ML
print(docker_env.docker.base_image)

# Specify conda dependencies with scikit-learn
cd = CondaDependencies.create(conda_packages=['scikit-learn'])
docker_env.python.conda_dependencies = cd

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210922.v1


In [31]:
import subprocess

src.run_config.environment = docker_env

# Check if Docker is installed and Linux containers are enabled
if subprocess.run("docker -v", shell=True).returncode == 0:
    out = subprocess.check_output("docker system info", shell=True).decode('ascii')
    if not "OSType: linux" in out:
        print("Switch Docker engine to use Linux containers.")
    else:
        run = exp.submit(src)
else:
    print("Docker engine is not installed.")

Docker version 20.10.8, build 3967b7d
running ...


In [30]:
run

Experiment,Id,Type,Status,Details Page,Docs Page
train-on-local,train-on-local_1635343117_aa73c104,azureml.scriptrun,Starting,Link to Azure Machine Learning studio,Link to Documentation


In [2]:
#run.wait_for_completion(show_output=True)