# Connect to Your Workspace

In [14]:
import azureml.core
from azureml.core import Workspace

ws = Workspace.from_config()
print('Ready to use Azure ML -{} to work with -{}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML -1.6.0 to work with -dp101-workspace


# Create a Training Script
You're going to use a Python script to train a machine learning model based on the diabates data, so let's start by creating a folder for the script and data files.

In [15]:
import os, shutil

#create a folder for the experiment files
training_folder = 'diabetes-training'
os.makedirs(training_folder, exist_ok=True)

#copy the data file in the experiment folder
shutil.copy('../mslearn-aml-labs/data/diabetes.csv', os.path.join(training_folder, "diabetes.csv"))


'diabetes-training/diabetes.csv'

#### Now you're ready to create the training script and save it in the folder.

In [16]:
%%writefile $training_folder/diabetes_training.py
#import libraries
from azureml.core import Run
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

#get the experiment run context
run = Run.get_context()

#load diabetes dataset
print('loading data')
diabetes = pd.read_csv('diabetes.csv')

#print(diabetes.head(1))
#seperates features and labels
X, y = diabetes[['Pregnancies', 'PlasmaGlucose', 'DiastolicBloodPressure', 'TricepsThickness', 
                 'SerumInsulin', 'BMI','DiabetesPedigree','Age']].values, diabetes['Diabetic'].values

#Split data into training set and test set
X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size=0.30, random_state=0) 

#Set regularization hyperparameter
reg=0.01

#Train a logistic regression model
print('Training a logistic regression model with regularization rate of', reg)
run.log("Regularization Rate:", np.float(reg))
model = LogisticRegression(C=1/reg, solver="liblinear").fit(X_train, y_train)

# calculate accuracy
y_hat = model.predict(X_test)
acc = np.average(y_hat==y_test)
print('Accuracy:', acc)
run.log('Accuracy', np.float(acc))

#caculate AUC
y_scores = model.predict_proba(X_test)
auc = roc_auc_score(y_test, y_scores[:,1])
print('AUC: ' + str(auc))
run.log('AUC', np.float(auc))

# Save the trained model in the outputs folder
os.makedirs('outputs', exist_ok=True)
joblib.dump(value=model, filename='outputs/diabetes_model.pkl')

run.complete()

Overwriting diabetes-training/diabetes_training.py


# Use an Estimator to Run the Script as an Experiment
In this case, we'll use a **generic Estimator** object to run the training experiment. Note that the default environment for this estimator does not include the scikit-learn package, so you need to explicitly add that to the configuration. The conda environment is built on-demand the first time the estimator is used, and cached for future runs that use the same configuration; so the first run will take a little longer. On subsequent runs, the cached environment can be re-used so they'll complete more quickly.

In [20]:
from azureml.train.estimator import Estimator
from azureml.core import Experiment

# Create an estimator
estimator = Estimator(source_directory=training_folder, 
                      entry_script='diabetes_training.py', 
                      compute_target= 'local', 
                      conda_packages=['scikit-learn'])

# Create an experiment
experiment_name = 'diabetes_training'
experiment = Experiment(workspace=ws, name=experiment_name)

#Run the experiment
run = experiment.submit(config=estimator)
run.wait_for_completion(show_output=True)


RunId: diabetes_training_1591110535_e0f37bdb
Web View: https://ml.azure.com/experiments/diabetes_training/runs/diabetes_training_1591110535_e0f37bdb?wsid=/subscriptions/661de708-75b1-41ed-806d-85f9bef3c27d/resourcegroups/dp101-resources/workspaces/dp101-workspace

Streaming azureml-logs/60_control_log.txt

Streaming log file azureml-logs/60_control_log.txt
Starting the daemon thread to refresh tokens in background for process with pid = 47807
Running: ['/bin/bash', '/tmp/azureml_runs/diabetes_training_1591110535_e0f37bdb/azureml-environment-setup/docker_env_checker.sh']

Materialized image not found on target: azureml/azureml_c86bf9d3c3b717eea982d145df7cbfc0


Logging experiment preparation status in history service.
Running: ['/bin/bash', '/tmp/azureml_runs/diabetes_training_1591110535_e0f37bdb/azureml-environment-setup/docker_env_builder.sh']
Running: ['docker', 'build', '-f', 'azureml-environment-setup/Dockerfile', '-t', 'azureml/azureml_c86bf9d3c3b717eea982d145df7cbfc0', '.']
Sendi

Collecting cloudpickle>=1.1.0
  Downloading cloudpickle-1.4.1-py3-none-any.whl (26 kB)
Collecting azure-identity<1.3.0,>=1.2.0
  Downloading azure_identity-1.2.0-py2.py3-none-any.whl (58 kB)
Collecting fusepy>=3.0.1; extra == "fuse"
  Downloading fusepy-3.0.1.tar.gz (11 kB)
Collecting liac-arff>=2.1.1
  Downloading liac-arff-2.4.0.tar.gz (15 kB)
Collecting pandas>=0.20.2
  Downloading pandas-1.0.4-cp36-cp36m-manylinux1_x86_64.whl (10.1 MB)
Collecting dill>=0.2.7.1
  Downloading dill-0.3.1.1.tar.gz (151 kB)
Collecting backports.weakref
  Downloading backports.weakref-1.0.post1-py2.py3-none-any.whl (5.2 kB)
Collecting importlib-metadata
  Downloading importlib_metadata-1.6.0-py2.py3-none-any.whl (30 kB)
Collecting pyasn1>=0.1.1
  Downloading pyasn1-0.4.8-py2.py3-none-any.whl (77 kB)
Collecting websocket-client>=0.32.0
  Downloading websocket_client-0.57.0-py2.py3-none-any.whl (200 kB)
Collecting isodate>=0.6.0
  Downloading isodate-0.6.0-py2.py3-none-any.whl (45 kB)
Collecting requests-o

Removing intermediate container 8ef537740b86
 ---> 70d06fab3f3e
Successfully built 70d06fab3f3e
Successfully tagged azureml/azureml_c86bf9d3c3b717eea982d145df7cbfc0:latest




Logging experiment running status in history service.
Running: ['docker', 'run', '--name', 'diabetes_training_1591110535_e0f37bdb', '--rm', '-v', '/tmp/azureml_runs/diabetes_training_1591110535_e0f37bdb:/azureml-run', '--shm-size', '2g', '-e', 'EXAMPLE_ENV_VAR=EXAMPLE_VALUE', '-e', 'AZUREML_CONTEXT_MANAGER_TRACKUSERERROR=eyJTa2lwSGlzdG9yeUltcG9ydENoZWNrIjoiRmFsc2UifQ==', '-e', 'AZUREML_CONTEXT_MANAGER_RUNHISTORY=eyJPdXRwdXRDb2xsZWN0aW9uIjp0cnVlLCJEaXJlY3Rvcmllc1RvV2F0Y2giOlsibG9ncyJdLCJzbmFwc2hvdFByb2plY3QiOnRydWV9', '-e', 'AZUREML_CONTEXT_MANAGER_PROJECTPYTHONPATH=bnVsbA==', '-e', 'AZUREML_RUN_TOKEN_EXPIRY=1592924943', '-e', 'AZUREML_RUN_TOKEN=eyJhbGciOiJSUzI1NiIsImtpZCI6IkZDMUYyMjE5MzQ4MTA3MDcyQkE5N0M2MTUzNjlENTc0QkFDQjAzMEYiLCJ0eXAiOiJKV1QifQ.eyJyb2xlIjoiQ29udHJpYnV0b3IiLCJzY29wZSI6Ii9zdWJzY3JpcHRpb25zLzY2MWRl

{'runId': 'diabetes_training_1591110535_e0f37bdb',
 'target': 'local',
 'status': 'Completed',
 'startTimeUtc': '2020-06-02T15:15:07.501147Z',
 'endTimeUtc': '2020-06-02T15:15:22.941968Z',
 'properties': {'_azureml.ComputeTargetType': 'local',
  'ContentSnapshotId': '99953fa7-5653-425b-9195-cdc46643dabd'},
 'inputDatasets': [],
 'runDefinition': {'script': 'diabetes_training.py',
  'useAbsolutePath': False,
  'arguments': [],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'local',
  'dataReferences': {},
  'data': {},
  'outputData': {},
  'jobName': None,
  'maxRunDurationSeconds': None,
  'nodeCount': 1,
  'environment': {'name': 'Experiment diabetes_training Environment',
   'version': 'Autosave_2020-06-02T15:09:01Z_50a135ef',
   'python': {'interpreterPath': 'python',
    'userManagedDependencies': False,
    'condaDependencies': {'channels': ['anaconda', 'conda-forge'],
     'dependencies': ['python=3.6.2',
      {'pip': ['azurem

In [21]:
from azureml.widgets import RunDetails
RunDetails(run).show()

#You can also retrieve the metrics and outputs from the Run object.
# Get logged metrics
metrics=run.get_metrics()
for key in metrics.keys():
    print(key, metrics.get(key))
    print('\n')
for file in run.get_file_names():
    print(file)

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

Regularization Rate: 0.01


Accuracy 0.774


AUC 0.8483377282451863


azureml-logs/60_control_log.txt
azureml-logs/70_driver_log.txt
logs/azureml/8_azureml.log
outputs/diabetes_model.pkl


# Register the Trained Model
Note that the outputs of the experiment include the trained model file (**diabetes_model.pkl**). You can register this model in your Azure Machine Learning workspace, making it possible to track model versions and retrieve them later.

In [22]:
from azureml.core import Model
#Register the model
run.register_model(model_path='outputs/diabetes_model.pkl', 
                   model_name='diabetes_model', 
                   tags={'Training context' : 'Estimator'},
                   properties={'AUC' : run.get_metrics()['AUC'], 'Accuracy' : run.get_metrics()['Accuracy']})

# List registered models
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

diabetes_model version: 1
	 Training context : Estimator
	 AUC : 0.8483377282451863
	 Accuracy : 0.774


