In [0]:
import azureml.core
from azureml.core import Workspace
from azureml.core import Datastore
from azureml.core import Dataset
from azureml.data.datapath import DataPath


# check core SDK version number
print(f'Azure ML SDK Version: {azureml.core.VERSION}')

In [0]:
%sh pip freeze | grep azureml

In [0]:
workspace_name = 'mlsalondata'
subscription_id = 'f80606e5-788f-4dc3-a9ea-2eb9a7836082'
resource_group_name = 'rg-salon-data'
tenant_id = '8e2e7c2d-4702-496d-af6c-96e4bfc9f667'

In [0]:
from azureml.core.authentication import InteractiveLoginAuthentication

interactive_auth = InteractiveLoginAuthentication(tenant_id=tenant_id)

In [0]:
# connect to Azure ML
ws = Workspace(workspace_name = workspace_name,
            subscription_id = subscription_id,
            resource_group = resource_group_name,
            auth=interactive_auth)

In [0]:
ws.get_details()

In [0]:
import mlflow
import mlflow.azureml
import azureml.mlflow
import azureml.core

import pandas as pd
import numpy as np

In [0]:
uri = ws.get_mlflow_tracking_uri()
mlflow.set_tracking_uri(uri)

In [0]:
print(f'MLFlow tracking URI is {uri}')

In [0]:
#Set MLflow experiment. 
#experimentName = "/Users/paul.peton@live.fr/mlflows/wine_experiment"
experimentName = "wine_aml_experiment" 
mlflow.set_experiment(experimentName) 

https://databricks.com/notebooks/gallery/MLEndToEndExampleAWS.html

In [0]:
white_wine = pd.read_csv('/dbfs/databricks-datasets/wine-quality/winequality-white.csv', sep=';')
red_wine = pd.read_csv('/dbfs/databricks-datasets/wine-quality/winequality-red.csv', sep=';')

In [0]:
display(white_wine)

fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
6.2,0.32,0.16,7.0,0.045,30.0,136.0,0.9949,3.18,0.47,9.6,6
7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
8.1,0.22,0.43,1.5,0.044,28.0,129.0,0.9938,3.22,0.45,11.0,6


In [0]:
red_wine['is_red'] = 1
white_wine['is_red'] = 0
 
data = pd.concat([red_wine, white_wine], axis=0)
 
# Remove spaces from column names
data.rename(columns=lambda x: x.replace(' ', '_'), inplace=True)

In [0]:
data.isna().any()

In [0]:
high_quality = (data.quality >= 7).astype(int)
data.quality = high_quality

In [0]:
from sklearn.model_selection import train_test_split
 
train, test = train_test_split(data, random_state=123)
X_train = train.drop(["quality"], axis=1)
X_test = test.drop(["quality"], axis=1)
y_train = train.quality
y_test = test.quality

In [0]:
import mlflow.pyfunc
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from mlflow.models.signature import infer_signature
 
# The predict method of sklearn's RandomForestClassifier returns a binary classification (0 or 1). 
# The following code creates a wrapper function, SklearnModelWrapper, that uses 
# the predict_proba method to return the probability that the observation belongs to each class. 
 
class SklearnModelWrapper(mlflow.pyfunc.PythonModel):
  def __init__(self, model):
    self.model = model
    
  def predict(self, context, model_input):
    return self.model.predict_proba(model_input)[:,1]
 
# mlflow.start_run creates a new MLflow run to track the performance of this model. 
# Within the context, you call mlflow.log_param to keep track of the parameters used, and
# mlflow.log_metric to record metrics like accuracy.
with mlflow.start_run(run_name='wine_rf_run') as run:
  
  runID = run.info.run_uuid
  experimentID = run.info.experiment_id
  print(f'This is the run {runID} in the experiment {experimentID}')
  
  n_estimators = 10
  model = RandomForestClassifier(n_estimators=n_estimators, random_state=np.random.RandomState(42))
  model.fit(X_train, y_train)
  
  #mlflow.sklearn.autolog()
 
  # predict_proba returns [prob_negative, prob_positive], so slice the output with [:, 1]
  predictions_test = model.predict_proba(X_test)[:,1]
  auc_score = roc_auc_score(y_test, predictions_test)
  mlflow.log_param('n_estimators', n_estimators)
  # Use the area under the ROC curve as a metric.
  mlflow.log_metric('auc', auc_score)
  wrappedModel = SklearnModelWrapper(model)
  # Log the model with a signature that defines the schema of the model's inputs and outputs. 
  # When the model is deployed, this signature will be used to validate inputs.
  signature = infer_signature(X_train, wrappedModel.predict(None, X_train))

  mlflow.pyfunc.log_model("wine_rf_model", python_model=wrappedModel, signature=signature)
  
  #run.register_model(model_name = 'wine_rf_model', model_path = 'model')
  # !!! 'ActiveRun' object has no attribute 'register_model'

In [0]:
feature_importances = pd.DataFrame(model.feature_importances_, index=X_train.columns.tolist(), columns=['importance'])
feature_importances.sort_values('importance', ascending=False)

Unnamed: 0,importance
alcohol,0.143085
density,0.120277
chlorides,0.107933
volatile_acidity,0.095391
sulphates,0.082023
residual_sugar,0.079612
total_sulfur_dioxide,0.07944
pH,0.076515
citric_acid,0.075548
free_sulfur_dioxide,0.071595


In [0]:
all_runs = mlflow.search_runs(max_results=10)  # Note : This is pandas dataframe
display(all_runs)

In [0]:
child_runs = all_runs.dropna(subset=["metrics.auc"])
display(child_runs[["status","params.n_estimators","metrics.auc"]])

In [0]:
%fs ls /mnt/gold/models/

https://tsmatz.github.io/azure-databricks-exercise/exercise10-mlflow.html

https://docs.microsoft.com/en-us/azure/machine-learning/how-to-deploy-mlflow-models

In [0]:
#https://www.mlflow.org/docs/latest/python_api/mlflow.azureml.html

import mlflow.azureml
from azureml.core.webservice import AciWebservice, Webservice

registered_image, registered_model = mlflow.azureml.build_image(
  model_uri="/mnt/gold/models/wine_rf_model",
  image_name="wine_predict_image",
  model_name="wine_rf_model",
  workspace=ws,
  synchronous=True)

# If your image build failed, you can access build logs at the following URI:
print("Access the following URI for build logs: {}".format(registered_image.image_build_log_uri))

# Deploy the image to Azure Container Instances (ACI) for real-time serving
webservice_deployment_config = AciWebservice.deploy_configuration()
webservice = Webservice.deploy_from_image(
                    image=azure_image, workspace=azure_workspace, name="wine-predict-aci")
webservice.wait_for_deployment()

DeprecationWarning: ``mlflow.azureml.build_image`` is deprecated since 1.19.0. This method will be removed in a near future release. Use ``the azureml deployment plugin, https://aka.ms/aml-mlflow-deploy`` instead.

In [0]:
from mlflow.deployments import get_deploy_client

# set the tracking uri as the deployment client
client = get_deploy_client(mlflow.get_tracking_uri())

# set the model path 
model_path = "model"

# define the model path and the name is the service name
# the model gets registered automatically and a name is autogenerated using the "name" parameter below 
client.create_deployment(model_uri='runs:/{}/{}'.format(runID, model_path),
                         name="wine-predict-aci")