<img src='https://github.com/jtobelem-simplon/prepa-dp100/blob/master/top.png?raw=true'>

# Configuration (à lancer avant tous les notebooks)

In [36]:
# version de python
import platform
platform.python_version()

'3.8.5'

In [37]:
# la liste des packages installés
!conda list

# packages in environment at /home/lab/anaconda3/envs/azure:
#
# Name                    Version                   Build  Channel
_libgcc_mutex             0.1                        main  
adal                      1.2.3                    pypi_0    pypi
applicationinsights       0.11.9                   pypi_0    pypi
argon2-cffi               20.1.0           py38h7b6447c_1  
attrs                     20.1.0                     py_0  
azure-common              1.1.25                   pypi_0    pypi
azure-core                1.5.0                    pypi_0    pypi
azure-graphrbac           0.61.1                   pypi_0    pypi
azure-identity            1.2.0                    pypi_0    pypi
azure-mgmt-authorization  0.60.0                   pypi_0    pypi
azure-mgmt-containerregistry 2.8.0                    pypi_0    pypi
azure-mgmt-keyvault       2.2.0                    pypi_0    pypi
azure-mgmt-network        10.2.0                   pypi_0    pypi
azure-mgmt-

In [38]:
# version de la SDK azureml
import azureml.core
print("Ready to use Azure ML", azureml.core.VERSION)

Ready to use Azure ML 1.8.0


Si le notebook est executé en dehors d'Azure, il faut télécharger le fichier config.json depuis le portail https://portal.azure.com/, et le mettre dans le workspace qui contient le notebook.

Si le notebook est exécuté directement depuis le workspace Azure, le fichier de config devrait déjà être là.

In [39]:
# connexion au workspace
from azureml.core import Workspace

ws = Workspace.from_config()
print(ws.name, "loaded")

jt-dp100 loaded


# Création du script

In [40]:
import os, shutil

# Create a folder for the experiment files
training_folder = 'titanic-training'
os.makedirs(training_folder, exist_ok=True)

# Copy the data file into the experiment folder
shutil.copy('data/titanic/train.csv', os.path.join(training_folder, "titanic.csv"))

'titanic-training/titanic.csv'

In [41]:
%%writefile $training_folder/titanic_training.py
from azureml.core import Run
import joblib

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import accuracy_score

# Get the experiment run context
run = Run.get_context()

train_data = pd.read_csv("titanic.csv")

features = ["Age","Pclass","SibSp", "Parch", "Fare","Sex", "Embarked"]

X = pd.get_dummies(train_data[features])
y = train_data["Survived"]

# missing values
imputer = SimpleImputer(strategy='most_frequent')
imputed_X = pd.DataFrame(imputer.fit_transform(X))
imputed_X.columns = X.columns
imputed_X[["Age","Pclass","SibSp", "Parch", "Fare"]] = imputed_X[["Age","Pclass","SibSp", "Parch", "Fare"]].astype('int')


# Break off validation set from training data
X_train, X_valid, y_train, y_valid = train_test_split(imputed_X, y, train_size=0.8, test_size=0.2,
                                                                random_state=0)

# model
model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=1)
model.fit(X_train, y_train)


# score
predictions = model.predict(X_valid)

mae = mean_absolute_error(predictions.astype('int'), y_valid)
acc = accuracy_score(y_valid, predictions.astype('int'))
print("mae : {}, accuracy : {}".format(mae, acc))
run.log('mae', mae)
run.log('acc',acc)

# Save the trained model in the outputs folder
os.makedirs('outputs', exist_ok=True)
joblib.dump(value=model, filename='outputs/titanic_model.pkl')

run.complete()

Overwriting titanic-training/titanic_training.py


# Entrainement d'un modèle sur la machine locale

[tutoriel microsoft : partie 1](https://docs.microsoft.com/en-us/azure/machine-learning/tutorial-train-models-with-aml)

[tutoriel microsoft : partie 2](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-train-ml-models)

- [experiment dans la SDK](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.experiment.experiment?view=azure-ml-py)

- [estimator dans la SDK](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.estimator.estimator?view=azure-ml-py)

NB : en cas d'erreur avec docker, https://askubuntu.com/questions/477551/how-can-i-use-docker-without-sudo

In [42]:
from azureml.train.estimator import Estimator
from azureml.core import Experiment

# Create an Azure ML experiment in your workspace
experiment = Experiment(workspace = ws, name = "titanic-training-experiment")

# Create an estimator
estimator = Estimator(source_directory=training_folder,
                      entry_script='titanic_training.py',
                      compute_target='local',
                      conda_packages=['scikit-learn']
                      )

# Run the experiment based on the estimator
run = experiment.submit(config=estimator)
run.wait_for_completion(show_output=True)

RunId: titanic-training-experiment_1600076042_0f42535c
Web View: https://ml.azure.com/experiments/titanic-training-experiment/runs/titanic-training-experiment_1600076042_0f42535c?wsid=/subscriptions/9114a63e-9210-4e32-97ca-b7d9e8ac403d/resourcegroups/jt-dp100-ressources/workspaces/jt-dp100

Streaming azureml-logs/70_driver_log.txt

[2020-09-14T09:34:06.674081] Entering context manager injector.
[context_manager_injector.py] Command line Options: Namespace(inject=['ProjectPythonPath:context_managers.ProjectPythonPath', 'RunHistory:context_managers.RunHistory', 'TrackUserError:context_managers.TrackUserError'], invocation=['titanic_training.py'])
Starting the daemon thread to refresh tokens in background for process with pid = 10
Entering Run History Context Manager.
Current directory:  /azureml-run
Preparing to call script [ titanic_training.py ] with arguments: []
After variable expansion, calling script [ titanic_training.py ] with arguments: []

Script type = None
mae : 0.17877094972

{'runId': 'titanic-training-experiment_1600076042_0f42535c',
 'target': 'local',
 'status': 'Completed',
 'startTimeUtc': '2020-09-14T09:34:05.657792Z',
 'endTimeUtc': '2020-09-14T09:34:26.26466Z',
 'properties': {'_azureml.ComputeTargetType': 'local',
  'ContentSnapshotId': '4c2daaad-58b5-4f17-87a9-932769949d65',
  'azureml.git.repository_uri': 'https://github.com/jtobelem-simplon/prepa-dp100.git',
  'mlflow.source.git.repoURL': 'https://github.com/jtobelem-simplon/prepa-dp100.git',
  'azureml.git.branch': 'master',
  'mlflow.source.git.branch': 'master',
  'azureml.git.commit': '7a6e2d77ee3e6efefe618da25582a7aae76beed8',
  'mlflow.source.git.commit': '7a6e2d77ee3e6efefe618da25582a7aae76beed8',
  'azureml.git.dirty': 'True'},
 'inputDatasets': [],
 'runDefinition': {'script': 'titanic_training.py',
  'scriptType': None,
  'useAbsolutePath': False,
  'arguments': [],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'local',
  'dataRefer

In [43]:
from azureml.widgets import RunDetails

RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

# Entrainement d'un modèle sur un cluster distant


Provisionnement ou récupération d'un cluster existant appelé "aml-cluster". Voir [tutoriel microsoft : partie 1](https://docs.microsoft.com/en-us/azure/machine-learning/tutorial-train-models-with-aml#train-on-a-remote-cluster)

In [53]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

# choose a name for your cluster
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "aml-cluster")
compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)

# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('found compute target. just use it. ' + compute_name)
else:
    print('creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size=vm_size,
                                                                min_nodes=compute_min_nodes,
                                                                max_nodes=compute_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(
        ws, compute_name, provisioning_config)

    # can poll for a minimum number of nodes and for a specific timeout.
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(
        show_output=True, min_node_count=None, timeout_in_minutes=20)

    # For a more detailed view of current AmlCompute status, use get_status()
    print(compute_target.get_status().serialize())

found compute target. just use it. aml-cluster


Même estimator que dans la partie précédente mais avec le cluster dans la compute_target. Il va falloir provisionner le cluster (normalement 0 noeuds actifs initialement), cela peut prendre un peu de temps...

In [None]:
from azureml.train.estimator import Estimator
from azureml.core import Experiment

# Create an Azure ML experiment in your workspace
experiment = Experiment(workspace = ws, name = "titanic-training-experiment")

# Create an estimator
estimator = Estimator(source_directory=training_folder,
                      entry_script='titanic_training.py',
                      compute_target=compute_target,
                      conda_packages=['scikit-learn']
                      )

# Run the experiment based on the estimator
run = experiment.submit(config=estimator)
run.wait_for_completion(show_output=True)

RunId: titanic-training-experiment_1600087389_6791a945
Web View: https://ml.azure.com/experiments/titanic-training-experiment/runs/titanic-training-experiment_1600087389_6791a945?wsid=/subscriptions/9114a63e-9210-4e32-97ca-b7d9e8ac403d/resourcegroups/jt-dp100-ressources/workspaces/jt-dp100

Streaming azureml-logs/20_image_build_log.txt

2020/09/14 12:43:16 Downloading source code...
2020/09/14 12:43:17 Finished downloading source code
2020/09/14 12:43:18 Creating Docker network: acb_default_network, driver: 'bridge'
2020/09/14 12:43:18 Successfully set up Docker network: acb_default_network
2020/09/14 12:43:18 Setting up Docker configuration...
2020/09/14 12:43:19 Successfully set up Docker configuration
2020/09/14 12:43:19 Logging in to registry: jtdp1008320276c.azurecr.io
2020/09/14 12:43:20 Successfully logged into jtdp1008320276c.azurecr.io
2020/09/14 12:43:20 Executing step ID: acb_step_0. Timeout(sec): 5400, Working directory: '', Network: 'acb_default_network'
2020/09/14 12:43:2


mkl-2019.4           | 204.1 MB  | ########## | 100% 

threadpoolctl-2.1.0  | 16 KB     |            |   0% 
threadpoolctl-2.1.0  | 16 KB     | #########7 |  97% 
threadpoolctl-2.1.0  | 16 KB     | ########## | 100% 
Preparing transaction: ...working... done
Verifying transaction: ...working... done
Executing transaction: ...working... done

Ran pip subprocess with arguments:
['/azureml-envs/azureml_4b824bcb98517d791c41923f24d65461/bin/python', '-m', 'pip', 'install', '-U', '-r', '/azureml-environment-setup/condaenv.iapkxr7z.requirements.txt']
Pip subprocess output:
Collecting azureml-defaults
  Downloading azureml_defaults-1.13.0-py3-none-any.whl (3.0 kB)
Collecting json-logging-py==0.2
  Downloading json-logging-py-0.2.tar.gz (3.6 kB)
Collecting flask==1.0.3
  Downloading Flask-1.0.3-py2.py3-none-any.whl (92 kB)
Collecting azureml-model-management-sdk==1.0.1b6.post1
  Downloading azureml_model_management_sdk-1.0.1b6.post1-py2.py3-none-any.whl (130 kB)
Collecting azureml-core~=1.13.0

07eae5ad8c0b: Pushed

facf43cddd83: Pushed
63a67842a4c7: Pushed
4ae3adcb66cb: Pushed
aa6685385151: Pushed
0040d8f00d7e: Pushed
7e2b9752143f: Pushed
69a9bdc813b0: Pushed
9e6f810a2aab: Pushed
ae3a847dbd6b: Pushed
c02616795ede: Pushed
latest: digest: sha256:953448631083c160b338741f825c6c76f693e99ef289268b69d0d562f6a740be size: 3883
2020/09/14 12:49:04 Successfully pushed image: jtdp1008320276c.azurecr.io/azureml/azureml_18a2c352852de1e0e7ad8b589dd0927b:latest
2020/09/14 12:49:04 Step ID: acb_step_0 marked as successful (elapsed time in seconds: 213.376113)
2020/09/14 12:49:04 Populating digests for step ID: acb_step_0...
2020/09/14 12:49:05 Successfully populated digests for step ID: acb_step_0
2020/09/14 12:49:05 Step ID: acb_step_1 marked as successful (elapsed time in seconds: 130.055661)
2020/09/14 12:49:05 The following dependencies were found:
2020/09/14 12:49:05 
- image:
    registry: jtdp1008320276c.azurecr.io
    repository: azureml/azureml_18a2c352852de1e0e7ad8b589dd0927b
    t

In [None]:
from azureml.widgets import RunDetails

RunDetails(run).show()

# Enregistrer le modèle

In [46]:
from azureml.core import Model

# Register the model
run.register_model(model_path='outputs/titanic_model.pkl', model_name='titanic_model',
                   tags={'Training context':'Estimator'},
                   properties={'Mean Absolute Error': run.get_metrics()['mae'], 'Accuracy': run.get_metrics()['acc']})

Model(workspace=Workspace.create(name='jt-dp100', subscription_id='9114a63e-9210-4e32-97ca-b7d9e8ac403d', resource_group='jt-dp100-ressources'), name=titanic_model, id=titanic_model:1, version=1, tags={'Training context': 'Estimator'}, properties={'Mean Absolute Error': '0.1787709497206704', 'Accuracy': '0.8212290502793296'})

In [47]:
# List registered models
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

titanic_model version: 1
	 Training context : Estimator
	 Mean Absolute Error : 0.1787709497206704
	 Accuracy : 0.8212290502793296


diabetes_model version: 6
	 Training context : Inline Training
	 AUC : 0.8726180406985422
	 Accuracy : 0.8856666666666667


diabetes_model version: 5
	 Training context : Inline Training
	 AUC : 0.8751306035126125
	 Accuracy : 0.889


diabetes_model_automl version: 1
	 Training context : Auto ML
	 AUC : 0.9900438004392353
	 Accuracy : 0.9534308211473566


diabetes_model version: 4
	 Training context : Inline Training
	 AUC : 0.8753594706204287
	 Accuracy : 0.8883333333333333


diabetes_model version: 3
	 Training context : Inline Training
	 AUC : 0.8761057764067863
	 Accuracy : 0.889


diabetes_model version: 2
	 Training context : Pipeline


diabetes_model version: 1
	 Training context : Estimator
	 AUC : 0.8483377282451863
	 Accuracy : 0.774




# Ne pas oublier à la fin de l'expérience!!
(si votre travail à utilisé une instance de calcul)

<img src='https://github.com/jtobelem-simplon/prepa-dp100/blob/master/down.png?raw=true'>



In [44]:
# stop toutes les instances de calcul
from azureml.core.compute import ComputeTarget, AmlCompute, ComputeInstance
from azureml.core.compute_target import ComputeTargetException

for compute in ComputeTarget.list(ws):
    if type(compute) is ComputeInstance and compute.get_status().state == 'Running':
        print('try to stop compute', compute.name)
        compute.stop(show_output=True)

In [45]:
# liste tous les compute pour vérifier qu'elles sont éteintes
for compute in ComputeTarget.list(ws):
    if type(compute) is ComputeInstance:
        print(compute.name, compute.get_status())

vm-ds3-v2 {
  "errors": [],
  "creationTime": "2020-05-27T10:12:38.674242+00:00",
  "createdBy": {
    "userId": "c88a830e-65d5-4e6d-a890-6d4497d2e6bd",
    "userOrgId": "0840dabf-0881-4071-9392-f25b2728592f"
  },
  "modifiedTime": "2020-09-10T13:17:50.819127+00:00",
  "state": "Stopped",
  "vmSize": "STANDARD_DS3_V2"
}


# Ressources

[api azure](https://docs.microsoft.com/en-us/python/api/azureml-core)

[parcours d'apprentissage microsoft](https://docs.microsoft.com/fr-fr/learn/paths/build-ai-solutions-with-azure-ml-service/)

[le repository microsoft](https://github.com/MicrosoftDocs/mslearn-aml-labs.git)