## Import the libraries, create an AzureML lazy client

In [1]:
import time
import pandas as pd
from azure.identity import ClientSecretCredential
from azure.ai.ml import MLClient
from azure.ai.ml import command
from azure.ai.ml.entities import Data
from azure.ai.ml import Input
from azure.ai.ml.constants import AssetTypes

credential = ClientSecretCredential('9a43d1e2-267b-4087-9e6a-1e8d0585ef1e', '25a6e569-51d3-4d78-ac63-1cfd8ee940c9', 'VHk8Q~gNBMuIgyGv~8mwPfbGQ2LX-WuOXcJUrcHs')
# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id="ed30c0d8-b03a-454d-96a8-6fe54400a879",
    resource_group_name="dev-iot-group",
    workspace_name="roymlws",
)
print(ml_client)

MLClient(credential=<azure.identity._credentials.client_secret.ClientSecretCredential object at 0x7fcf2e642bc0>,
         subscription_id=ed30c0d8-b03a-454d-96a8-6fe54400a879,
         resource_group_name=dev-iot-group,
         workspace_name=roymlws)


In [2]:
import os

train_src_dir = "./src"
os.makedirs(train_src_dir, exist_ok=True)

## Provide the scalable compute cluster

In [3]:
from azure.ai.ml.entities import AmlCompute

# Name assigned to the compute cluster
cpu_compute_target = "roycpucluster"

try:
    # let's see if the compute target already exists
    cpu_cluster = ml_client.compute.get(cpu_compute_target)
    print(
        f"You already have a cluster named {cpu_compute_target}, we'll reuse it as is."
    )

except Exception as e:
    print("Creating a new cpu compute target...")
    print(e)


You already have a cluster named roycpucluster, we'll reuse it as is.


## Create a training script python file. This will be uploaded to compute for execution.

In [4]:
%%writefile {train_src_dir}/main.py
import os
import argparse
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

def main():
    """Main function of the script."""

    # input and output arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("--data", type=str, help="path to input data")
    parser.add_argument("--test_train_ratio", type=float, required=False, default=0.25)
    # parser.add_argument("--n_estimators", required=False, default=100, type=int)
    parser.add_argument("--learning_rate", required=False, default=0.1, type=float)
    parser.add_argument("--registered_model_name", type=str, help="model name")
    args = parser.parse_args()
   
    # Start Logging
    mlflow.start_run()

    # enable autologging
    mlflow.sklearn.autolog()

    ###################
    #<prepare the data>
    ###################
    print(" ".join(f"{k}={v}" for k, v in vars(args).items()))

    print("input data:", args.data)
    
    # credit_df = pd.read_csv(args.data, header=1, index_col=0)
    stocks_df = pd.read_parquet(args.data)

    # Select features and target
    features = ['rolling_average', 'rolling_median']
    target = 'Volume'

    stocks_df = stocks_df[features+[target]]

    mlflow.log_metric("num_samples", stocks_df.shape[0])
    mlflow.log_metric("num_features", stocks_df.shape[1] - 1)

    train_df, test_df = train_test_split(
        stocks_df,
        test_size=args.test_train_ratio,
    )
    ####################
    #</prepare the data>
    ####################

    ##################
    #<train the model>
    ##################

    # Extracting the label column
    y_train = train_df.pop(target)

    # convert the dataframe values to array
    X_train = train_df.values

    # Extracting the label column
    y_test = test_df.pop(target)

    # convert the dataframe values to array
    X_test = test_df.values

    print(f"Training with data of shape {X_train.shape}")

    model = HistGradientBoostingRegressor( learning_rate=args.learning_rate,
        verbose=1, random_state=None
    )
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    # Calculate the Mean Absolute Error and Mean Squared Error
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    print("mae=",mae,"mse=",mse)
    ###################
    #</train the model>
    ###################

    ##########################
    #<save and register model>
    ##########################
    # Registering the model to the workspace
    print("Registering the model via MLFlow")
    mlflow.sklearn.log_model(
        sk_model=model,
        registered_model_name=args.registered_model_name,
        artifact_path=args.registered_model_name,
    )

    # Saving the model to a file
    mlflow.sklearn.save_model(
        sk_model=model,
        path=os.path.join(args.registered_model_name, "trained_model"),
    )
    ###########################
    #</save and register model>
    ###########################
    
    # Stop Logging
    mlflow.end_run()

if __name__ == "__main__":
    main()

Overwriting ./src/main.py


In [None]:
# %pip install -U azureml-fsspec

In [29]:
# from azure.ai.ml import command
# from azure.ai.ml.entities import Data
# from azure.ai.ml import Input
# from azure.ai.ml.constants import AssetTypes, InputOutputModes

# registered_model_name = "stock_prices_model"
# https://royadlsgen2.dfs.core.windows.net/royadlsfs/staging/prepared_stock_data.parquet
# abfss://royadlsfs@royadlsgen2.dfs.core.windows.net/staging/prepared_stock_data.parque
# azureml://subscriptions/ed30c0d8-b03a-454d-96a8-6fe54400a879/resourcegroups/dev-iot-group/workspaces/roymlws/datastores/workspaceworkingdirectory/paths/Users/kaushik.roy1984/stocksml/prepared_stock_data.parquet/
# filedataset_asset = ml_client.data.get(name="storcksdata1", version="3")
# print(f"Data asset URI: {filedataset_asset.path}")
# job = command(
#     inputs=dict(
#         data=Input(
#             type=AssetTypes.MLTABLE,
#             path=filedataset_asset,
#             mode=InputOutputModes.DIRECT
#         ),
#         test_train_ratio=0.2,
#         learning_rate=0.25,
#         registered_model_name=registered_model_name,
#     ),
#     code="./src/",  # location of source code
#     command="python main.py --data ${{inputs.data}} --test_train_ratio ${{inputs.test_train_ratio}} --learning_rate ${{inputs.learning_rate}} --registered_model_name ${{inputs.registered_model_name}}",
#     environment="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest",
#     compute="roycpucluster",
#     display_name="stock_prices_prediction",
# )

Data asset URI: azureml://subscriptions/ed30c0d8-b03a-454d-96a8-6fe54400a879/resourcegroups/dev-iot-group/workspaces/roymlws/datastores/workspaceworkingdirectory/paths/Users/kaushik.roy1984/stocksml/prepared_stock_data.parquet/


## Create a job profile to submit, and mention all the parameters

In [6]:
from azure.ai.ml import command
from azure.ai.ml import Input

registered_model_name = "stock_prices_model"
filedataset_asset = ml_client.data.get(name="storcksdata1", version="3")
print(f"Data asset URI: {filedataset_asset.path}")
job = command(
    inputs=dict(
        data=Input(
            type="uri_file",
            path=filedataset_asset.path,
        ),
        test_train_ratio=0.2,
        learning_rate=0.25,
        registered_model_name=registered_model_name,
    ),
    code="./src/",  # location of source code
    command="python main.py --data ${{inputs.data}} --test_train_ratio ${{inputs.test_train_ratio}} --learning_rate ${{inputs.learning_rate}} --registered_model_name ${{inputs.registered_model_name}}",
    environment="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest",
    compute="roycpucluster",
    display_name="stock_prices_prediction",
)

Data asset URI: azureml://subscriptions/ed30c0d8-b03a-454d-96a8-6fe54400a879/resourcegroups/dev-iot-group/workspaces/roymlws/datastores/workspaceworkingdirectory/paths/Users/kaushik.roy1984/stocksml/prepared_stock_data.parquet/


## Run the job. Every run produces a new job. 

In [None]:
# ml_client.create_or_update(job)

## Get ready to deploy

In [8]:
import uuid

# Creating a unique name for the endpoint
online_endpoint_name = "stocks-endpoint-" + str(uuid.uuid4())[:8]
online_endpoint_name

'stocks-endpoint-0422eadb'

## Start a managed endpoint. Takes a few minutes

In [9]:
# Expect the endpoint creation to take a few minutes
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
)

# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="this is an online endpoint",
    auth_mode="key",
    tags={
        "training_dataset": "stocks_prices",
        "model_type": "sklearn.ensemble.HistGradientBoostingRegressor",
    },
)

endpoint = ml_client.online_endpoints.begin_create_or_update(endpoint).result()

print(f"Endpoint {endpoint.name} provisioning state: {endpoint.provisioning_state}")

Endpoint stocks-endpoint-0422eadb provisioning state: Succeeded


## Check if endpoint is working

In [10]:
endpoint = ml_client.online_endpoints.get(name=online_endpoint_name)

print(
    f'Endpoint "{endpoint.name}" with provisioning state "{endpoint.provisioning_state}" is retrieved'
)

Endpoint "stocks-endpoint-0422eadb" with provisioning state "Succeeded" is retrieved


## Get the registered model

In [11]:
# Let's pick the latest version of the model
latest_model_version = max(
    [int(m.version) for m in ml_client.models.list(name=registered_model_name)]
)
print(f'Latest model is version "{latest_model_version}" ')

Latest model is version "1" 


## Submit deployment. Takes a few minutes

In [None]:
# picking the model to deploy. Here we use the latest version of our registered model
model = ml_client.models.get(name=registered_model_name, version=latest_model_version)

# Expect this deployment to take approximately 6 to 8 minutes.
# create an online deployment.
blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=online_endpoint_name,
    model=model,
    instance_type="Standard_DS2_v2",
    instance_count=1,
)

blue_deployment = ml_client.begin_create_or_update(blue_deployment).result()

In [13]:
deploy_dir = "./deploy"
os.makedirs(deploy_dir, exist_ok=True)

## Create a sample request json

In [14]:
%%writefile {deploy_dir}/sample-request.json
{
  "input_data": {
    "columns": [0,1],
    "index": [0],
    "data": [
            [124800,124880]
        ]
  }
}

Overwriting ./deploy/sample-request.json


## Test the deployed service

In [15]:
# test the blue deployment with some sample data
ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    request_file="./deploy/sample-request.json",
    deployment_name="blue",
)

'[125667.50795053323]'

## Optionally delete endpoint ot save costs

In [24]:
# ml_client.online_endpoints.begin_delete(name=online_endpoint_name)

<azure.core.polling._poller.LROPoller at 0x7fa0014d0fd0>

..............................................................................................

## Use this code in a score.py to get real time predictions. Use your own key

In [18]:
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

# Request data goes here
# The example below assumes JSON formatting which may be updated
# depending on the format your endpoint expects.
# More information can be found here:
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
data =  {
  "input_data": {
    "columns": [0,1],
    "index": [0],
    "data": [
            [124800,124880]
        ]
    }
}

body = str.encode(json.dumps(data))

url = 'https://stocks-endpoint-0422eadb.canadacentral.inference.ml.azure.com/score'
# Replace this with the primary/secondary key or AMLToken for the endpoint
api_key = 'V9Hz8RMuuiU7TS5rbeNdFnvmyUYnNCb6'
if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")

# The azureml-model-deployment header will force the request to go to a specific deployment.
# Remove this header to have the request observe the endpoint traffic rules
headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key), 'azureml-model-deployment': 'blue' }

req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)

    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))

b'[125667.50795053323]'
