In [None]:
#Connect to workspace
import azureml.core
from azureml.core import workspace
#Load workspace from saved config file
ws=workspace.from_config()
print('Read to use Azure ML {} to work with {}'.format(azure.core.VERSION, ws.name))

In [None]:
#Create a training script
import os, shutil
#Create a folder for the experiment files
training_folder='diabetes-training'
os.makedirs(training_folder,exist_ok=True)

#Copy the data file into the experiment folder
shutil.copy('data/diabetes.csv',os.path.join(training_folder,'diabetes.csv'))

In [None]:
%%writefile $training_folder/diabetes_training.py
from azureml.core import Run
import pandas as pd
import numpy as np
import joblib
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

#Get experiment run context
run=Run.get.context()

#Load diabetes data
print('Loading data...')
diabetes=pd.read_csv('diabetes.csv')

#Seperate features and labels
X, y = diabetes[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age']].values, diabetes['Diabetic'].values

#Split train and test data
X_train, X_test, y_train,y_test=train_test_split(X,y, test_size=0.30, random_state=0)

#Set regularization hyperparamter
reg=0.01

#Train a logistic regression model
print('Traning a logistic regression model with a regularization parameter', reg)
run.log('Regularization Rate', np.float(reg))
model=LogisticRegression(C=1/reg,solver=liblinear).fit(X_train,y_train)

#Calculate accuracy
y_hat=model.predict(X_test)
acc=np.average(y_hat==y_test)
print('Accuracy:', acc)
run.log('Accuracy',np.float(acc))

#Calculate AUC
y_scores=model.predict_proba(X_test)
auc = roc_auc_score(y_test,y_scores[:,1])
print('AUC: ' + str(auc))
run.log('AUC', np.float(auc))

#Save the trained model in an output folder
os.makedirs('Outputs',exist_ok=True)
joblib.dump(value=model, filename='outputs/diabetes_model.pkl')

run.complete()

In [None]:
#Use an estimator instance to run the script as an experiment
from azureml.train.estimator import Estimator
from azureml.core import Experiment

#Create an estimator
estiamator=Estimator(source_directory=training_folder,
                    entry_script='diabetes_training.py',
                    compute_target='local',
                    conda_packages=['scikit-learn'])

#Create an experiment
experiment_name='diabetes_training'
experiment=Experiment(workspace=ws,name=experiment_name)

#Run the experiment
run=experiment.submit(config=estimator)

#Show the run details while running
RunDetails(run).show()
run.wait_for_completion()

#Get the logged metrics from run 
metrics=run.get_metrics()

In [None]:
#Register the model as a new version in the workspace
from azureml.core import Model
#Register the model
run.register_model(model_path='outputs/diabetes_model.pkl', model_name='diabetes_model',
                   tags={'Training context':'Parameterized SKLearn Estimator'},
                   properties={'AUC': run.get_metrics()['AUC'], 'Accuracy': run.get_metrics()['Accuracy']})

#List registered models
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

#Clean up
#Shut down the compute instance