## Task 1: Understand the training code 

In [1]:
import pandas as pd

train = pd.read_parquet('data/training_data.parquet')
test = pd.read_parquet('data/testing_data.parquet')

In [2]:
# Split train and test data into features X and targets Y.
target_column_name = 'readmit_status'
Y_train = train[target_column_name]
X_train = train.drop([target_column_name], axis = 1)  
Y_test = test[target_column_name]
X_test = test.drop([target_column_name], axis = 1)  

In [3]:
from sklearn.compose import make_column_selector as selector
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
import numpy as np

# Transform string data to numeric one-hot vectors
categorical_selector = selector(dtype_exclude=np.number)
categorical_columns = categorical_selector(X_train)
categorial_encoder = OneHotEncoder(handle_unknown="ignore")

# Standardize numeric data by removing the mean and scaling to unit variance
numerical_selector = selector(dtype_include=np.number)
numerical_columns = numerical_selector(X_train)
numerical_encoder = StandardScaler()

# Create a preprocessor that will preprocess both numeric and categorical data
preprocessor = ColumnTransformer([
('categorical-encoder', categorial_encoder, categorical_columns),
('standard_scaler', numerical_encoder, numerical_columns)])

clf = make_pipeline(preprocessor, LogisticRegression(max_iter=1000))

print("Training model...") 
model = clf.fit(X_train, Y_train)
print("Accuracy score: ", clf.score(X_test,Y_test))

Training model...
Accuracy score:  0.8390804597701149


## Task 2: Create a cloud client

In [4]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

credential = DefaultAzureCredential()
ml_client = MLClient.from_config(credential=credential)

Found the config file in: /home/bstollnitz/git/config.json


## Task 3: Register the training and test data

In [5]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

train_data_name = 'hospital_train_parquet'
test_data_name = 'hospital_test_parquet'

training_data = Data(
    name=train_data_name,
    path='data/training_data.parquet',
    type=AssetTypes.URI_FILE,
    description='RAI hospital train data'
)
tr_data = ml_client.data.create_or_update(training_data)

test_data = Data(
    name=test_data_name,
    path='data/testing_data.parquet',
    type=AssetTypes.URI_FILE,
    description='RAI hospital test data'
)
ts_data = ml_client.data.create_or_update(test_data)

## Task 4: Create a compute cluster

In [6]:
from azure.ai.ml.entities import AmlCompute

compute_name = 'trainingcompute'

my_compute = AmlCompute(
    name=compute_name,
    size='Standard_DS2_v2',
    min_instances=0,
    max_instances=4,
    idle_time_before_scale_down=3600
)
ml_client.compute.begin_create_or_update(my_compute)

<azure.core.polling._poller.LROPoller at 0x7feef8fe6550>

## Task 5: Create the job

In [7]:
from azure.ai.ml import command, Input, Output

target_column_name = 'readmit_status'

# Create the job.
job = command(
    description='Trains hospital readmission model',
    experiment_name='hospital_readmission',
    compute=compute_name,
    inputs=dict(training_data=Input(type='uri_file', path=f'{train_data_name}@latest'), 
                target_column_name=target_column_name),
    outputs=dict(model_output=Output(type=AssetTypes.MLFLOW_MODEL)),
    code='src/',
    environment='azureml://registries/azureml/environments/AzureML-responsibleai-0.20-ubuntu20.04-py38-cpu/versions/4',
    command='python train.py ' + 
            '--training_data ${{inputs.training_data}} ' +
            '--target_column_name ${{inputs.target_column_name}} ' +
            '--model_output ${{outputs.model_output}}'
)
job = ml_client.jobs.create_or_update(job)
ml_client.jobs.stream(job.name)

RunId: loyal_cord_2ff9qwl0v5
Web View: https://ml.azure.com/runs/loyal_cord_2ff9qwl0v5?wsid=/subscriptions/92a04f75-fa76-4b98-bc2e-ac45670d4044/resourcegroups/rg-bstollnitz/workspaces/mlws-bstollnitz

Execution Summary
RunId: loyal_cord_2ff9qwl0v5
Web View: https://ml.azure.com/runs/loyal_cord_2ff9qwl0v5?wsid=/subscriptions/92a04f75-fa76-4b98-bc2e-ac45670d4044/resourcegroups/rg-bstollnitz/workspaces/mlws-bstollnitz



## Task 6: Register the model

In [8]:
from azure.ai.ml.entities import Model

model_name = 'hospital_readmission_model'

# Register the model.
model_path = f"azureml://jobs/{job.name}/outputs/model_output"
model = Model(name=model_name,
                path=model_path,
                type=AssetTypes.MLFLOW_MODEL)
registered_model = ml_client.models.create_or_update(model)

## Task 7: Deploy the model

In [9]:
from azure.ai.ml.entities import ManagedOnlineDeployment, ManagedOnlineEndpoint 

endpoint_name = 'hospital-readmission-endpoint'
deployment_name = 'blue'

# Create the managed online endpoint.
endpoint = ManagedOnlineEndpoint(
    name=endpoint_name,
    auth_mode='key',
)
registered_endpoint = ml_client.online_endpoints.begin_create_or_update(
    endpoint)

# Create the managed online deployment.
deployment = ManagedOnlineDeployment(name=deployment_name,
                                        endpoint_name=endpoint_name,
                                        model=registered_model,
                                        instance_type='Standard_DS4_v2',
                                        instance_count=1)
ml_client.online_deployments.begin_create_or_update(deployment)

# Set deployment traffic to 100%.
registered_endpoint.traffic = {deployment_name: 100}
ml_client.online_endpoints.begin_create_or_update(
    registered_endpoint)

Check: endpoint hospital-readmission-endpoint exists
data_collector is not a known attribute of class <class 'azure.ai.ml._restclient.v2022_02_01_preview.models._models_py3.ManagedOnlineDeployment'> and will be ignored
Creating/updating online deployment blue 

....................................................................

Done (5m 45s)


ManagedOnlineEndpoint({'public_network_access': 'Enabled', 'provisioning_state': 'Succeeded', 'scoring_uri': 'https://hospital-readmission-endpoint.westus2.inference.ml.azure.com/score', 'swagger_uri': 'https://hospital-readmission-endpoint.westus2.inference.ml.azure.com/swagger.json', 'name': 'hospital-readmission-endpoint', 'description': None, 'tags': {}, 'properties': {'azureml.onlineendpointid': '/subscriptions/92a04f75-fa76-4b98-bc2e-ac45670d4044/resourcegroups/rg-bstollnitz/providers/microsoft.machinelearningservices/workspaces/mlws-bstollnitz/onlineendpoints/hospital-readmission-endpoint', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/92a04f75-fa76-4b98-bc2e-ac45670d4044/providers/Microsoft.MachineLearningServices/locations/westus2/mfeOperationsStatus/oe:4b0cadf3-7f66-44b6-ad51-ad8d6f93d8d5:b1d15fdd-ff05-4c13-8a0b-e8c13857a8ad?api-version=2022-02-01-preview'}, 'id': '/subscriptions/92a04f75-fa76-4b98-bc2e-ac45670d4044/resourceGroups/rg-bstollnitz/provide

## Task 8: Invoke the endpoint

In [10]:
test_data_path="test_data.json"

# Invoke the endpoint.
result = ml_client.online_endpoints.invoke(endpoint_name=endpoint_name, request_file=test_data_path)
print(result)

["not readmitted"]
