In [3]:
# imports
import os
import mlflow
import argparse

import pandas as pd
import lightgbm as lgbm
import matplotlib.pyplot as plt

from sklearn.metrics import log_loss, accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from azureml.core import Workspace

# connect to your workspace
ws = Workspace.from_config()

In [1]:
!wget -O iris.csv https://azuremlexamples.blob.core.windows.net/datasets/iris.csv

--2021-11-13 16:45:24--  https://azuremlexamples.blob.core.windows.net/datasets/iris.csv
Resolving azuremlexamples.blob.core.windows.net (azuremlexamples.blob.core.windows.net)... 20.60.128.132
Connecting to azuremlexamples.blob.core.windows.net (azuremlexamples.blob.core.windows.net)|20.60.128.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4466 (4.4K) [text/csv]
Saving to: ‘iris.csv’


2021-11-13 16:45:25 (414 MB/s) - ‘iris.csv’ saved [4466/4466]



In [10]:
from azureml.core import Dataset

# read in data from local csv file
df = pd.read_csv("iris.csv")

# Getting workspace default datastore
datastore = ws.get_default_datastore()
# Uploading Pandas dataframe and registering it as a dataset
dataset = Dataset.Tabular.register_pandas_dataframe(
    df, datastore, "iris_dataset_fromsdk", show_progress=True
)

Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/d00482ed-fd6e-4c40-b801-b4650fe96219/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


In [45]:
# retrieve dataset
dataset = Dataset.get_by_name(ws, name='iris_dataset_fromsdk')
df = dataset.to_pandas_dataframe()

# split dataframe into X and y
X = df.drop(["species"], axis=1)
y = df["species"]

# encode label
enc = LabelEncoder()
y = enc.fit_transform(y)

# train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# create lightgbm datasets
train_data = lgbm.Dataset(X_train, label=y_train)
test_data = lgbm.Dataset(X_test, label=y_test)

In [8]:
# train the model
mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())
mlflow.set_experiment("iris_local_mlflow_sdk")
mlflow.autolog()

# setup parameters
num_boost_round = 10
params = {
    "objective": "multiclass",
    "num_class": 3,
    "boosting": "gbdt",
    "num_iterations": 16,
    "num_leaves": 31,
    "num_threads": 0,
    "learning_rate": 0.1,
    "metric": "multi_logloss",
    "seed": 42,
    "verbose": 0,
}

# train model
with mlflow.start_run() as run:
    model = lgbm.train(
        params,
        train_data,
        num_boost_round=num_boost_round,
        valid_sets=[test_data],
        valid_names=["test"],
    )


2021/11/13 16:56:42 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2021/11/13 16:56:42 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[1]	test's multi_logloss: 0.930558
[2]	test's multi_logloss: 0.795536
[3]	test's multi_logloss: 0.68756
[4]	test's multi_logloss: 0.593833
[5]	test's multi_logloss: 0.51883
[6]	test's multi_logloss: 0.454422
[7]	test's multi_logloss: 0.401051
[8]	test's multi_logloss: 0.353053
[9]	test's multi_logloss: 0.313256
[10]	test's multi_logloss: 0.276926
[11]	test's multi_logloss: 0.247315
[12]	test's multi_logloss: 0.221442
[13]	test's multi_logloss: 0.199252
[14]	test's multi_logloss: 0.177485
[15]	test's multi_logloss: 0.160641
[16]	test's multi_logloss: 0.144921




In [11]:
# register the model
model_uri = "runs:/{}/model".format(run.info.run_id)
model = mlflow.register_model(model_uri, "iris_local_mlflow_sdk")

Registered model 'iris_local_mlflow_sdk' already exists. Creating a new version of this model...
2021/11/13 16:57:43 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: iris_local_mlflow_sdk, version 2
Created version '2' of model 'iris_local_mlflow_sdk'.


In [16]:
# create environment for the deploy
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.webservice import AciWebservice

# get a curated environment
env = Environment.get(
    workspace=ws, 
    name="AzureML-lightgbm-3.2-ubuntu18.04-py37-cpu",
    version=14
)
env.inferencing_stack_version='latest'

# create deployment config i.e. compute resources
aciconfig = AciWebservice.deploy_configuration(
    cpu_cores=1,
    memory_gb=1,
    description="Predict iris with sklearn - SDK",
)

In [39]:
%%time
import uuid
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core.model import Model

# get the registered model
model = Model(ws, "iris_local_mlflow_sdk")

# create an inference config i.e. the scoring script and environment
inference_config = InferenceConfig(entry_script="score.py", environment=env)

# deploy the service
service_name = "iris-sdk-svc" + str(uuid.uuid4())[:4]
service = Model.deploy(
    workspace=ws,
    name=service_name,
    models=[model],
    inference_config=inference_config,
    deployment_config=aciconfig,
)

service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-11-13 18:24:08-08:00 Registering the environment.
2021-11-13 18:24:09-08:00 Use the existing image.
2021-11-13 18:24:10-08:00 Generating deployment configuration.
2021-11-13 18:24:11-08:00 Submitting deployment to compute.
2021-11-13 18:24:15-08:00 Checking the status of deployment iris-sdk-svc87a3..
2021-11-13 18:26:31-08:00 Checking the status of inference endpoint iris-sdk-svc87a3.
Succeeded
ACI service creation operation finished, operation "Succeeded"
CPU times: user 1.45 s, sys: 253 ms, total: 1.71 s
Wall time: 2min 30s


In [43]:
# send raw HTTP request to test the web service.
import requests

input_data = """{"input_data": {
    "columns": [
      "sepal_length",
      "sepal_width",
      "petal_length",
      "petal_width"
    ],
    "data": [
      [ 1.0,2.0,3.0,4.0],
      [ 10.0,2.0,9.0,8.0]
    ],
    "index": [0,1]
  }
}
"""
headers = {"Content-Type": "application/json"}

resp = requests.post(service.scoring_uri, input_data, headers=headers)
print("POST to url", service.scoring_uri)
print("Response: ", resp.text)




POST to url http://22e4ef93-f8aa-46e2-bc8a-053878780257.westus.azurecontainer.io/score
Response:  [[0.6537724440218525, 0.17880280139283433, 0.16742475458531322], [0.047420494345148216, 0.06112452452109818, 0.8914549811337537]]
