In [None]:
!pip install --upgrade azureml-sdk

In [1]:
import azureml.core
from azureml.core import Workspace

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)


Azure ML SDK Version:  1.5.0


## Initialize workspace

In [2]:
# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config()

ws


Workspace.create(name='eacbmlservicews', subscription_id='f80606e5-788f-4dc3-a9ea-2eb9a7836082', resource_group='adlsgen2')

## Create an experiment

In [3]:
experiment_name = 'diabetes_dxd'

from azureml.core import Experiment
exp = Experiment(workspace=ws, name=experiment_name)

exp


Name,Workspace,Report Page,Docs Page
diabetes_dxd,eacbmlservicews,Link to Azure Machine Learning studio,Link to Documentation


## Upload dataset

In [4]:
from azureml.core import Dataset

diabetes_dataset = Dataset.get_by_name(ws, name='diabetes')


In [5]:
diabetes = diabetes_dataset.to_pandas_dataframe().drop("Path", axis=1)

In [6]:
diabetes.head()

Unnamed: 0,AGE,SEX,BMI,BP,S1,S2,S3,S4,S5,S6,Y
0,59,2,32.1,101.0,157,93.2,38.0,4.0,4.8598,87,151
1,48,1,21.6,87.0,183,103.2,70.0,3.0,3.8918,69,75
2,72,2,30.5,93.0,156,93.6,41.0,4.0,4.6728,85,141
3,24,1,25.3,84.0,198,131.4,40.0,5.0,4.8903,89,206
4,50,1,23.0,101.0,192,125.4,52.0,4.0,4.2905,80,135


In [7]:
diabetes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 442 entries, 0 to 441
Data columns (total 11 columns):
AGE    442 non-null int64
SEX    442 non-null int64
BMI    442 non-null float64
BP     442 non-null float64
S1     442 non-null int64
S2     442 non-null float64
S3     442 non-null float64
S4     442 non-null float64
S5     442 non-null float64
S6     442 non-null int64
Y      442 non-null int64
dtypes: float64(6), int64(5)
memory usage: 38.1 KB


In [8]:
diabetes.describe()

Unnamed: 0,AGE,SEX,BMI,BP,S1,S2,S3,S4,S5,S6,Y
count,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0
mean,48.5181,1.468326,26.375792,94.647014,189.140271,115.43914,49.788462,4.070249,4.641411,91.260181,152.133484
std,13.109028,0.499561,4.418122,13.831283,34.608052,30.413081,12.934202,1.29045,0.522391,11.496335,77.093005
min,19.0,1.0,18.0,62.0,97.0,41.6,22.0,2.0,3.2581,58.0,25.0
25%,38.25,1.0,23.2,84.0,164.25,96.05,40.25,3.0,4.2767,83.25,87.0
50%,50.0,1.0,25.7,93.0,186.0,113.0,48.0,4.0,4.62005,91.0,140.5
75%,59.0,2.0,29.275,105.0,209.75,134.5,57.75,5.0,4.9972,98.0,211.5
max,79.0,2.0,42.2,133.0,301.0,242.4,99.0,9.09,6.107,124.0,346.0


In [9]:
features_names = ['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']
target = "Y"

## Create a (remote) target compute

In [10]:
# Compute target creation

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cpu_cluster_name = "myComputeCluster"

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print(" Cluster already exists")
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           min_nodes=0, max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True, min_node_count=0, timeout_in_minutes=30)

 Cluster already exists
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [17]:
# Retrieve existing compute target

from azureml.core.compute import ComputeTarget


compute_target_name = "ds3-compute-cls"
cpu_cluster_name = ComputeTarget(workspace=ws, name=compute_target_name)

print(cpu_cluster_name.provisioning_state)

Succeeded


## Train the model

### Train the model on a remote target compute

In [11]:
import os
script_folder = os.path.join(os.getcwd(), "remote_train")
os.makedirs(script_folder, exist_ok=True)

In [12]:
%%writefile $script_folder/train.py

import argparse
import os
import numpy as np
import glob

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
import joblib
import pickle

from azureml.core import Run
from azureml.core import Dataset
from utils import load_data

# get hold of the current run
run = Run.get_context()
exp = run.experiment
ws = run.experiment.workspace

parser = argparse.ArgumentParser()
parser.add_argument('--regularization', type=float, dest='reg', default=0.5, help='regularization strength')
args = parser.parse_args()

# load train and test set into numpy arrays
diabetes_dataset = Dataset.get_by_name(ws, name='diabetes')
diabetes = diabetes_dataset.to_pandas_dataframe().drop("Path", axis=1)
target = "Y"
X = diabetes.drop(target, axis=1)
y = diabetes["Y"].values.reshape(-1,1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print('Train a Ridge regression model with regularization strength of', args.reg)
model = Ridge(alpha=args.reg, solver="auto", random_state=42)
model.fit(X_train, y_train)

print('Predict the test set')
y_hat = model.predict(X_test)

# calculate score on the prediction
score = model.score(X_test, y_test)
print('Score is ', score)

run.log('regularization strength', np.float(args.reg))
run.log('score', np.float(score))

os.makedirs('outputs', exist_ok=True)
# note file saved in the outputs folder is automatically uploaded into experiment record
joblib.dump(value=model, filename='outputs/diabetes_HD_remote_model.pkl')

Writing /mnt/batch/tasks/shared/LS_root/mounts/clusters/computeinstance/code/Users/diabetes/remote_train/train.py


In [16]:
import shutil
shutil.copy('utils.py', script_folder)

'/mnt/batch/tasks/shared/LS_root/mounts/clusters/computeinstance/code/Users/diabetes/remote_train/utils.py'

In [14]:
# Set up the (compute target) environnement

from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies


env = Environment("diabetes_remote_env")

env.docker.enabled = True
env.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn',
                                                                          'pandas',
                                                                          'numpy',
                                                                          'joblib'
                                                                         ])
env.python.conda_dependencies.add_pip_package("inference-schema[numpy-support]")

env.python.conda_dependencies.save_to_file(".", "diabetes_env.yml")


'diabetes_env.yml'

In [15]:
from azureml.train.estimator import Estimator

#script_params = {
#    '--regularization': 0.5
#}

estimator = Estimator(source_directory=script_folder,
              #script_params=script_params,
              compute_target=cpu_cluster_name,
              environment_definition=env,
              entry_script='train.py')


## hyperparameters

In [17]:
from azureml.train.hyperdrive import *

# define hyperparameter sampling space
ps = RandomParameterSampling(
     {
         '--regularization': choice(0.01, 0.1, 0.5, 1.0),
     }
)

# define early termination policy
early_termination_policy = None #BanditPolicy(slack_factor = 0.15, evaluation_interval=10)

# configure the run
hyperdrive_run_config = HyperDriveConfig(estimator = estimator,
                                         hyperparameter_sampling = ps,
                                         policy = early_termination_policy,
                                         primary_metric_name = "score",
                                         primary_metric_goal = PrimaryMetricGoal.MAXIMIZE,
                                         max_total_runs = 20,
                                         max_concurrent_runs = 2)


In [18]:
# start the run
hd_run = exp.submit(hyperdrive_run_config)

In [21]:
# launch the widget to view the progress and results
from azureml.widgets import RunDetails

RunDetails(hd_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [19]:
# specify show_output to True for a verbose log
hd_run.wait_for_completion(show_output=True) 

RunId: HD_c19b511d-c899-4ef2-9f08-e3c6f5f6e7dd
Web View: https://ml.azure.com/experiments/diabetes_dxd/runs/HD_c19b511d-c899-4ef2-9f08-e3c6f5f6e7dd?wsid=/subscriptions/f80606e5-788f-4dc3-a9ea-2eb9a7836082/resourcegroups/adlsgen2/workspaces/eacbmlservicews

Streaming azureml-logs/hyperdrive.txt

"<START>[2020-06-01T06:48:37.143894][API][INFO]Experiment created<END>\n"<START>[2020-06-01T06:48:38.4940411Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>"<START>[2020-06-01T06:48:38.511309][GENERATOR][INFO]Trying to sample '2' jobs from the hyperparameter space<END>\n""<START>[2020-06-01T06:48:41.101161][GENERATOR][INFO]Successfully sampled '2' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_c19b511d-c899-4ef2-9f08-e3c6f5f6e7dd
Web View: https://ml.azure.com/experiments/diabetes_dxd/runs/HD_c19b511d-c899-4ef2-9f08-e3c6f5f6e7dd?wsid=/subscriptions/f80606e5-788f-4dc3-a9ea-2eb9a783

{'runId': 'HD_c19b511d-c899-4ef2-9f08-e3c6f5f6e7dd',
 'target': 'myComputeCluster',
 'status': 'Completed',
 'startTimeUtc': '2020-06-01T06:48:36.765215Z',
 'endTimeUtc': '2020-06-01T07:01:16.674336Z',
 'properties': {'primary_metric_config': '{"name": "score", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '5db72d9f-93a1-4ed1-8bc0-c7aa7ed90307',
  'score': '0.45259921776197887',
  'best_child_run_id': 'HD_c19b511d-c899-4ef2-9f08-e3c6f5f6e7dd_3',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://eacbmlservicew2479569759.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_c19b511d-c899-4ef2-9f08-e3c6f5f6e7dd/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=9U4mUyrWuWu2uhs2LWUMtqyXCNtmtVtmkK8LkbAGV3U%3D&st=2020-06-01T06%3A51%3A17Z&se=2020-06-01T15%3A01%3A17Z&sp=r'}}

In [22]:
best_run = hd_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()
parameter_values['runDefinition']

{'script': 'train.py',
 'useAbsolutePath': False,
 'arguments': ['--regularization', '0.01'],
 'sourceDirectoryDataStore': None,
 'framework': 'Python',
 'communicator': 'None',
 'target': 'myComputeCluster',
 'dataReferences': {},
 'data': {},
 'jobName': None,
 'maxRunDurationSeconds': None,
 'nodeCount': 1,
 'environment': {'name': 'diabetes_remote_env',
  'version': 'Autosave_2020-06-01T06:48:37Z_2b67a5f8',
  'python': {'interpreterPath': 'python',
   'userManagedDependencies': False,
   'condaDependencies': {'channels': ['anaconda', 'conda-forge'],
    'dependencies': ['python=3.6.2',
     {'pip': ['azureml-defaults', 'inference-schema[numpy-support]']},
     'scikit-learn',
     'pandas',
     'numpy',
     'joblib'],
    'name': 'azureml_789e4a31052d218b96e17e2da348e37b'},
   'baseCondaEnvironment': None},
  'environmentVariables': {'EXAMPLE_ENV_VAR': 'EXAMPLE_VALUE'},
  'docker': {'baseImage': 'mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04',
   'baseDockerfile': Non

In [23]:
parameter_values = best_run.get_details()['runDefinition']['arguments']
parameter_values

['--regularization', '0.01']

In [24]:
print('Best Run Id: ', best_run.id)
print('\n Score:', best_run_metrics['score'])
print('\n regularization: ',parameter_values[1])

Best Run Id:  HD_c19b511d-c899-4ef2-9f08-e3c6f5f6e7dd_3

 Score: 0.45259921776197887

 regularization:  0.01


## Register the BEST model

In [25]:
print(best_run.get_file_names())

['azureml-logs/55_azureml-execution-tvmps_d88853636b2da6c0ad1df16902a52468911b9ff91c9009cea7515e0a7dcdcba5_d.txt', 'azureml-logs/65_job_prep-tvmps_d88853636b2da6c0ad1df16902a52468911b9ff91c9009cea7515e0a7dcdcba5_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_d88853636b2da6c0ad1df16902a52468911b9ff91c9009cea7515e0a7dcdcba5_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/112_azureml.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/diabetes_HD_remote_model.pkl']


In [26]:
# register model 
model = best_run.register_model(model_name='diabetes_HD_best_model', model_path='outputs/diabetes_HD_remote_model.pkl')

print(model.name, model.id, model.version, sep='\t')

diabetes_HD_best_model	diabetes_HD_best_model:1	1


## Create a scoring script

In [43]:
%%writefile score.py

import joblib
import numpy as np
import os

from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType


# The init() method is called once, when the web service starts up.
#
# Typically you would deserialize the model file, as shown here using joblib,
# and store it in a global variable so your run() method can access it later.
def init():
    global model

    # The AZUREML_MODEL_DIR environment variable indicates
    # a directory containing the model file you registered.
    model_filename = 'diabetes_HD_remote_model.pkl'
    model_path = os.path.join(os.environ['AZUREML_MODEL_DIR'], model_filename)
#FileNotFoundError: [Errno 2] No such file or directory: 'azureml-models/diabetes_HD_best_model/1/diabetes_hyperdrive_model.pkl'

    model = joblib.load(model_path)


# The run() method is called each time a request is made to the scoring API.
#
# Shown here are the optional input_schema and output_schema decorators
# from the inference-schema pip package. Using these decorators on your
# run() method parses and validates the incoming payload against
# the example input you provide here. This will also generate a Swagger
# API document for your web service.
@input_schema('data', NumpyParameterType(np.array([[59, 2, 32.1, 101.0, 157, 93.2, 38.0, 4.0, 4.8598, 87]])))
@output_schema(NumpyParameterType(np.array([151.000])))
def run(data):
    # Use the model object loaded by init().
    result = model.predict(data)

    # You can return any JSON-serializable object.
    return result.tolist()

Overwriting score.py


## Define the (inference) environement

In [44]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies


environment = Environment('my-sklearn-environment')
environment.python.conda_dependencies = CondaDependencies.create(pip_packages=[
    'azureml-defaults',
    'inference-schema[numpy-support]',
    'joblib',
    'numpy',
    'scikit-learn'
])


## Define a inference configuration

In [45]:
from azureml.core.model import InferenceConfig


inference_config = InferenceConfig(entry_script='score.py', environment=environment)

## Deploy in a custom environment

In [46]:
from azureml.core import Webservice
from azureml.core.webservice import AciWebservice
from azureml.exceptions import WebserviceException
from azureml.core.model import Model, InferenceConfig # à ajouter ?


service_name = 'diabetes-custom-service2hd'

# Remove any existing service under the same name.
try:
    Webservice(ws, service_name).delete()
except WebserviceException:
    pass

aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

service = Model.deploy(workspace=ws,
                       name=service_name,
                       models=[model],
                       inference_config=inference_config,
                       deployment_config=aci_config)

service.wait_for_deployment(show_output=True)


Running.....................
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [54]:
print(service.state)

Transitioning


In [55]:
print(service.get_logs())

2020-05-27T06:45:44,128953160+00:00 - iot-server/run 
2020-05-27T06:45:44,134813858+00:00 - gunicorn/run 
2020-05-27T06:45:44,139708157+00:00 - rsyslog/run 
2020-05-27T06:45:44,144495856+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_fb6ef452c82dd8280844ad10cb54e4a8/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_fb6ef452c82dd8280844ad10cb54e4a8/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_fb6ef452c82dd8280844ad10cb54e4a8/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_fb6ef452c82dd8280844ad10cb54e4a8/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_fb6ef452c82dd8280844ad10cb54e4a8/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
EdgeHubC

In [58]:
print(service.scoring_uri)

http://1ab3d401-9a51-4305-8298-c01d4ce44faf.westeurope.azurecontainer.io/score


In [57]:
# reload the model

from azureml.core import Webservice


service_name = 'diabetes-custom-service2hd'
service = Webservice(ws, service_name)

print(service.name, service.scoring_uri, sep='\t')


diabetes-custom-service2hd	http://1ab3d401-9a51-4305-8298-c01d4ce44faf.westeurope.azurecontainer.io/score


## Test the service

In [59]:
import json

input_payload = json.dumps({
    'data': [
        [59, 2, 32.1, 101.0, 157, 93.2, 38.0, 4.0, 4.8598, 87],
        [69, 2, 32.1, 101.0, 157, 93.2, 38.0, 4.0, 4.8598, 87]
    ],
    'method': 'predict'  # If you have a classification model, you can get probabilities by changing this to 'predict_proba'.
})

output = service.run(input_payload)

print(output)


[[210.73488558853785], [212.11233401556842]]


In [None]:
#service.delete()