# Challenge 2

## Set up the experiment folder

In [1]:
import os, shutil

# Create a folder for the experiment files
training_folder = 'driver_training'
os.makedirs(training_folder, exist_ok=True)

# Copy the data file into the experiment folder
shutil.copy('data/porto_seguro_safe_driver_prediction_input.csv', os.path.join(training_folder, "porto_seguro_safe_driver_prediction_input.csv"))

'driver-training/porto_seguro_safe_driver_prediction_input.csv'

In [46]:
import lightgbm as lgb
"lightgbm=={}".format(lgb.__version__)

'lightgbm==2.3.0'

## train.py
This file defines the key functions required to train the model.  
The file can be invoked with `python train.py` for development purposes.

In [2]:
%%writefile $training_folder/train.py
import os
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
import lightgbm


def split_data(data_df):
    """Split a dataframe into training and validation datasets"""
    
    features = data_df.drop(['target', 'id'], axis = 1)
    labels = np.array(data_df['target'])
    features_train, features_valid, labels_train, labels_valid = train_test_split(features, labels, test_size=0.2, random_state=0)

    train_data = lightgbm.Dataset(features_train, label=labels_train)
    valid_data = lightgbm.Dataset(features_valid, label=labels_valid, free_raw_data=False)
    
    return (train_data, valid_data)


def train_model(data, parameters):
    """Train a model with the given datasets and parameters"""
    # The object returned by split_data is a tuple.
    # Access train_data with data[0] and valid_data with data[1]
    
    model = lightgbm.train(parameters,
                           data[0],
                           valid_sets=data[1],
                           num_boost_round=500,
                           early_stopping_rounds=20)
    
    return model


def get_model_metrics(model, data):
    """Construct a dictionary of metrics for the model"""
    
    valid_data = data[1]
    predictions = model.predict(valid_data.data)
    fpr, tpr, thresholds = metrics.roc_curve(valid_data.label, predictions)
    model_metrics = {"auc": (metrics.auc(fpr, tpr))}
    
    return model_metrics


def main():
    """This method invokes the training functions for development purposes"""
    
    # Read data from a file
    data_df = pd.read_csv('porto_seguro_safe_driver_prediction_input.csv')

    # Hard code the parameters for training the model
    parameters = {
        'learning_rate': 0.02,
        'boosting_type': 'gbdt',
        'objective': 'binary',
        'metric': 'auc',
        'sub_feature': 0.7,
        'num_leaves': 60,
        'min_data': 100,
        'min_hessian': 1,
        'verbose': 2
    }

    # Call the functions defined in this file
    data = split_data(data_df)
    model = train_model(data, parameters)
    model_metrics = get_model_metrics(model, data)
    
    # Print the resulting metrics for the model
    print(model_metrics)
    
if __name__ == '__main__':
    main()


Overwriting driver-training/train.py


## parameters.json
This file will specify the parameters used to train the model.

In [3]:
%%writefile $training_folder/parameters.json
{
    "training":
    {
        "learning_rate": 0.02,
        "boosting_type": "gbdt",
        "objective": "binary",
        "metric": "auc",
        "sub_feature": 0.7,
        "num_leaves": 60,
        "min_data": 100,
        "min_hessian": 1,
        "verbose": 0
    }
}


Overwriting driver-training/parameters.json


## driver_training.py
This file will be the entry script when running an Azure ML context.  
It calls the functions defined in train.py for data preparation and training, but reads parameters from a file, and logs output to the Azure ML context.  
The file can be invoked with `python driver_training.py` for development purposes.

In [4]:
%%writefile $training_folder/driver_training.py
# Import libraries
import argparse
from azureml.core import Run
import joblib
import json
import os
import pandas as pd
import shutil

# Import functions from train.py
from train import split_data, train_model, get_model_metrics

# Get the output folder for the model from the '--output_folder' parameter
parser = argparse.ArgumentParser()
parser.add_argument('--output_folder', type=str, dest='output_folder', default="outputs")
args = parser.parse_args()
output_folder = args.output_folder

# Get the experiment run context
run = Run.get_context()

# load the safe driver prediction dataset
train_df = pd.read_csv('porto_seguro_safe_driver_prediction_input.csv')

# Load the parameters for training the model from the file
with open("parameters.json") as f:
    pars = json.load(f)
    parameters = pars["training"]

# Log each of the parameters to the run
for param_name, param_value in parameters.items():
    run.log(param_name, param_value)
    
# Use the functions imported from train.py to prepare data, train the model, and calculate the metrics
data = split_data(train_df)
model = train_model(data, parameters)
model_metrics = get_model_metrics(model, data)
print(model_metrics)
run.log("model AUC", model_metrics)

# Save the trained model to the output folder
os.makedirs(output_folder, exist_ok=True)
output_path = output_folder + "/porto_seguro_safe_driver_model.pkl"
joblib.dump(value=model, filename=output_path)

run.complete()

Overwriting driver-training/driver_training.py


In [35]:
output_folder = "outputs"
os.makedirs(output_folder, exist_ok=True)
output_path = output_folder + "/porto_seguro_safe_driver_model.pkl"
print(output_path)

outputs/porto_seguro_safe_driver_model.pkl


In [5]:
import azureml.core
from azureml.core import Workspace, Experiment

# Load the workspace
ws = Workspace.from_config()
exp = Experiment(ws, 'driver-training2')

## Use an Estimator to Run the Script as an Experiment

See [this tutorial](https://github.com/MicrosoftDocs/mslearn-aml-labs/blob/master/02-Training_Models.ipynb) for a starting point

Use the pip, scikit-learn and lightgbm conda packages

In [7]:
## Create compute cluster

from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

# choose a name for your cluster
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpu-cluster")
compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)

# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('Found existing compute target. Using this one. ' + compute_name)
else:
    print('Creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size=vm_size,
                                                                min_nodes=compute_min_nodes,
                                                                max_nodes=compute_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(
        ws, compute_name, provisioning_config)

    # can poll for a minimum number of nodes and for a specific timeout.
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(
        show_output=True, min_node_count=None, timeout_in_minutes=20)

    # For a more detailed view of current AmlCompute status, use get_status()
    print(compute_target.get_status().serialize())

found compute target. just use it. cpu-cluster


In [8]:
##Create an environment

from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

conda = CondaDependencies()

# add conda packages
conda.add_conda_package('python=3.7')

# add pip packages
conda.add_pip_package('scikit-learn')
conda.add_pip_package('lightgbm')
conda.add_pip_package('pandas')
# create environment
env = Environment('myenv')
env.python.conda_dependencies = conda
env.register(ws)

{
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210806.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "myenv",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "channels": [
                "anaconda",
                "conda-forge"
        

In [17]:
from azureml.core import ScriptRunConfig, Experiment

# create or load an experiment
workspace = ws
experiment = Experiment(workspace, 'drivertraining2')
# create or retrieve a compute target
cluster = workspace.compute_targets['cpu-cluster']
# create or retrieve an environment
env = Environment.get(ws, name='myenv')
# define arguments
arg="outputs"
# configure and submit your training run
config = ScriptRunConfig(source_directory='driver-training',
                        script='driver_training.py',
                        arguments=['--output_folder', arg],
                        compute_target=cluster,
                        environment=env)

run = experiment.submit(config)

Submitting /mnt/batch/tasks/shared/LS_root/mounts/clusters/tg-dev-box/code/Users/tgokal/devops-for-data-science-artifacts/Challenge02/driver-training directory for run. The size of the directory >= 25 MB, so it can take a few minutes.


In [51]:
# Return model metrics
metrics = run.get_metrics()
for metric_name, metric_value in metrics.items():
    print(metric_name, '::', metric_value)

learning_rate :: 0.02
boosting_type :: gbdt
objective :: binary
metric :: auc
sub_feature :: 0.7
num_leaves :: 60
min_data :: 100
min_hessian :: 1
verbose :: 0
model AUC :: {'auc': 0.6388635476931616}


In [47]:
# Complete experiment run
run.complete()

# Challenge 3

## Deploy to ACI and test model endpoint

In [79]:
model = ws.models['driver-training']
print(model.name, 'version', model.version)

driver-training version 1


In [80]:
import os

folder_name = 'driver-service'

# Create a folder for the web service files
experiment_folder = './' + folder_name
os.makedirs(experiment_folder, exist_ok=True)

print(folder_name, 'folder created.')

# Set path for scoring script
script_file = os.path.join(experiment_folder,"score_driver.py")

driver-service folder created.


## Write a scoring script

In [128]:
%%writefile $script_file
import json
import joblib
import numpy as np
from azureml.core.model import Model

# Called when the service is loaded
def init():
    global model
    # Get the path to the deployed model file and load it
    model_path = Model.get_model_path('driver-training')
    model = joblib.load(model_path)

# Called when a request is received
def run(raw_data):
    # Get the input data as a numpy array
    data = np.array(json.loads(raw_data)['data'])
    # Get a prediction from the model
    predictions = model.predict(data)
    # Get the corresponding classname for each prediction (0 or 1)
    classnames = ['no claim', 'claim']
    predicted_classes = []
    probabilities = [prediction for prediction in predictions]
    for probability in probabilities:
        if probability < 0.5:
            predicted_classes.append(classnames[0])
        else:
            predicted_classes.append(classnames[1])

    # Return the predictions
    return (probabilities, predicted_classes)

Overwriting ./driver-service/score_driver.py


## Define container environment

In [129]:
from azureml.core.conda_dependencies import CondaDependencies 

# Add the dependencies for our model (AzureML defaults is already included)
myenv = CondaDependencies()
myenv.add_conda_package('scikit-learn')
myenv.add_conda_package('lightgbm')
myenv.add_conda_package('pandas')

# Save the environment config as a .yml file
env_file = os.path.join(experiment_folder,"driver_env.yml")
with open(env_file,"w") as f:
    f.write(myenv.serialize_to_string())
print("Saved dependency info in", env_file)

# Print the .yml file
with open(env_file,"r") as f:
    print(f.read())

Saved dependency info in ./driver-service/driver_env.yml
# Conda environment specification. The dependencies defined in this file will
# be automatically provisioned for runs with userManagedDependencies=False.

# Details about the Conda environment file format:
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually

name: project_environment
dependencies:
  # The python interpreter version.
  # Currently Azure ML only supports 3.5.2 and later.
- python=3.6.2

- pip:
    # Required packages for AzureML execution, history, and data preparation.
  - azureml-defaults

- scikit-learn
- lightgbm
- pandas
channels:
- anaconda
- conda-forge



## Configure Inference Config and deploy webservice to ACI

In [130]:
from azureml.core.webservice import AciWebservice
from azureml.core.model import InferenceConfig

# Configure the scoring environment
inference_config = InferenceConfig(runtime= "python",
                                   entry_script=script_file,
                                   conda_file=env_file)

deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)

service_name = "driver-service7"

service = Model.deploy(ws, service_name, [model], inference_config, deployment_config)

service.wait_for_deployment(True)
print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-10-23 12:15:37+00:00 Creating Container Registry if not exists.
2021-10-23 12:15:38+00:00 Registering the environment.
2021-10-23 12:15:40+00:00 Use the existing image.
2021-10-23 12:15:40+00:00 Generating deployment configuration.
2021-10-23 12:15:41+00:00 Submitting deployment to compute.
2021-10-23 12:15:49+00:00 Checking the status of deployment driver-service7..
2021-10-23 12:17:32+00:00 Checking the status of inference endpoint driver-service7.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [131]:
print(service.state)
print(service.get_logs())

Healthy
2021-10-23T12:17:24,219281900+00:00 - gunicorn/run 
Dynamic Python package installation is disabled.
Starting HTTP server
2021-10-23T12:17:24,222092900+00:00 - iot-server/run 
2021-10-23T12:17:24,250303100+00:00 - rsyslog/run 
2021-10-23T12:17:24,258165100+00:00 - nginx/run 
EdgeHubConnectionString and IOTEDGE_IOTHUBHOSTNAME are not set. Exiting...
2021-10-23T12:17:24,590908400+00:00 - iot-server/finish 1 0
2021-10-23T12:17:24,593299700+00:00 - Exit code 1 is normal. Not restarting iot-server.
Starting gunicorn 20.1.0
Listening at: http://127.0.0.1:31311 (74)
Using worker: sync
worker timeout is set to 300
Booting worker with pid: 100
SPARK_HOME not set. Skipping PySpark Initialization.
Initializing logger
2021-10-23 12:17:26,951 | root | INFO | Starting up app insights client
logging socket was found. logging is available.
logging socket was found. logging is available.
2021-10-23 12:17:26,956 | root | INFO | Starting up request id generator
2021-10-23 12:17:26,956 | root | IN

In [132]:
for webservice_name in ws.webservices:
    print(webservice_name)

driver-service7
driver-service6
driver-service5
driver-service4
driver-service3
driver-service2
driver-service1
driver-service


## Use Webservice for 1 driver

In [133]:
import json

x_new = [[0,1,8,1,0,0,1,0,0,0,0,0,0,0,12,1,0,0,0.5,0.3,0.610327781,7,1,-1,0,-1,1,1,1,2,1,65,1,0.316227766,0.669556409,0.352136337,3.464101615,0.1,0.8,0.6,1,1,6,3,6,2,9,1,1,1,12,0,1,1,0,0,1]]
print ('Patient: {}'.format(x_new[0]))

# Convert the array to a serializable list in a JSON document
input_json = json.dumps({"data": x_new})

# Call the web service, passing the input data (the web service will also accept the data in binary format)
predictions = service.run(input_data = input_json)

print("Probability of claim is:", predictions[0][0])
print("Predicted class is: ", predictions[1])

Patient: [0, 1, 8, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 12, 1, 0, 0, 0.5, 0.3, 0.610327781, 7, 1, -1, 0, -1, 1, 1, 1, 2, 1, 65, 1, 0.316227766, 0.669556409, 0.352136337, 3.464101615, 0.1, 0.8, 0.6, 1, 1, 6, 3, 6, 2, 9, 1, 1, 1, 12, 0, 1, 1, 0, 0, 1]
Probability of claim is: 0.029411417013210858
Predicted class is:  ['no claim']


## Use Webservice for multiple drivers

In [150]:
import json

# This time our input is an array of two feature arrays
x_new = [[0,1,8,1,0,0,1,0,0,0,0,0,0,0,12,1,0,0,0.5,0.3,0.610327781,7,1,-1,0,-1,1,1,1,2,1,65,1,0.316227766,0.669556409,0.352136337,3.464101615,0.1,0.8,0.6,1,1,6,3,6,2,9,1,1,1,12,0,1,1,0,0,1],
         [4,2,5,1,0,0,0,0,1,0,0,0,0,0,5,1,0,0,0.9,0.5,0.771362431,4,1,-1,0,0,11,1,1,0,1,103,1,0.316227766,0.60632002,0.358329457,2.828427125,0.4,0.5,0.4,3,3,8,4,10,2,7,2,0,3,10,0,0,1,1,0,1]]
# Convert the array or arrays to a serializable list in a JSON document
input_json = json.dumps({"data": x_new})

# Call the web service, passing the input data
predictions = service.run(input_data = input_json)

# Get the predicted classes.
for i in range(len(predictions)):
    print("Driver {} submits {} with a probability of {}".format(i+1, predictions[1][i], round(predictions[0][i], 4)))

Driver 1 submits no claim with a probability of 0.0294
Driver 2 submits no claim with a probability of 0.0259


## Return scoring endpoint

In [151]:
endpoint = service.scoring_uri
print(endpoint)

http://054f4f39-3e63-4213-80d6-97f63469bf5f.australiaeast.azurecontainer.io/score


## Submit POST Request to webservice

In [156]:
import requests
import json

x_new = [[0,1,8,1,0,0,1,0,0,0,0,0,0,0,12,1,0,0,0.5,0.3,0.610327781,7,1,-1,0,-1,1,1,1,2,1,65,1,0.316227766,0.669556409,0.352136337,3.464101615,0.1,0.8,0.6,1,1,6,3,6,2,9,1,1,1,12,0,1,1,0,0,1],[4,2,5,1,0,0,0,0,1,0,0,0,0,0,5,1,0,0,0.9,0.5,0.771362431,4,1,-1,0,0,11,1,1,0,1,103,1,0.316227766,0.60632002,0.358329457,2.828427125,0.4,0.5,0.4,3,3,8,4,10,2,7,2,0,3,10,0,0,1,1,0,1]]

# Convert the array to a serializable list in a JSON document
input_json = json.dumps({"data": x_new})

# Set the content type
headers = { 'Content-Type':'application/json' }

predictions = (requests.post(endpoint, input_json, headers = headers)).json()

# Get the predicted classes.
for i in range(len(predictions)):
    print("Driver {} submits {} with a probability of {}".format(i+1, predictions[1][i], round(predictions[0][i], 4)))

Driver 1 submits no claim with a probability of 0.0294
Driver 2 submits no claim with a probability of 0.0259
