## Track Azure Databricks run using MLflow in Azure Machine Learning

In order to execute the notebook:
 * You have a databricks workspace and cluster proivisioned
 * In the databricks cluster install azureml-mlflow package which should install azureml-core as per the [/how-to-use-mlflow-azure-databricks documentation page](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-use-mlflow-azure-databricks)
 * Import the notebook to your Azure Databricks workspace
 <br />
  <br />
   <br />


### Connect to Azure ML workspace

In [2]:
import mlflow
import azureml.mlflow
import azureml.core

from azureml.core import Workspace

subscription_id = ''

# Azure Machine Learning resource group 
resource_group = '' 

#Azure Machine Learning workspace name
workspace_name = ''

# Instantiate Azure Machine Learning workspace
ws = Workspace.get(name=workspace_name,
                   subscription_id=subscription_id,
                   resource_group=resource_group)


##Set Mlflow experiment

In [4]:
import mlflow
import azureml.mlflow

#Create MLflow experiment. 
experiment_name = "/Users/{adb_user_name}/mlflowexp" 

from mlflow.exceptions import RestException

try:
  mlflow.create_experiment(experiment_name)
except RestException:
  print(f'{experiment_name} already exists')
  

## Configure MLFlow tracking URL

In [6]:
uri = ws.get_mlflow_tracking_uri()
mlflow.set_tracking_uri(uri)
print(uri)

In [7]:
# Import the dataset from scikit-learn and create the training and test datasets. 
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes

db = load_diabetes()
X = db.data
y = db.target
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [8]:
import os
import joblib
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

model_save_path = "model"
experiment_name = 'experiment-with-mlflow-projects'
mlflow.set_experiment(experiment_name)

with mlflow.start_run():
  n_estimators = 100
  max_depth = 6
  max_features = 3
  # Create and train model
  rf = RandomForestRegressor(n_estimators = n_estimators, max_depth = max_depth, max_features = max_features)
  rf.fit(X_train, y_train)
  # Make predictions
  predictions = rf.predict(X_test)
  
  # Log parameters
  mlflow.log_param("num_trees", n_estimators)
  mlflow.log_param("maxdepth", max_depth)
  mlflow.log_param("max_feat", max_features)
  
  # Log model
  mlflow.sklearn.log_model(rf, "random-forest-model")
  
  # Create metrics
  mse = mean_squared_error(y_test, predictions)
    
  # Log metrics
  mlflow.log_metric("mse", mse)
  
  
  # Save the model to the outputs directory for capture
  mlflow.sklearn.log_model(rf, model_save_path)
 

##Create AKS cluster

 Documentation below can be used to use the other options such as ACI or attaching existing cluster.

https://docs.microsoft.com/en-us/azure/databricks/_static/notebooks/mlflow/mlflow-quick-start-deployment-azure.html

In [10]:
from azureml.core.compute import AksCompute, ComputeTarget
from azureml.exceptions import ComputeTargetException

# Use the default configuration (you can also provide parameters to customize this)
prov_config = AksCompute.provisioning_configuration()

aks_cluster_name = "aks-cluster" 

try:
  aks_target = ComputeTarget(workspace=ws, name=aks_cluster_name )
  print('Found existing cluster, use it.')
except ComputeTargetException:
  # Create the cluster
  aks_target = ComputeTarget.create(workspace = ws, 
                                    name = aks_cluster_name, 
                                    provisioning_configuration = prov_config)

  # Wait for the create process to complete
  aks_target.wait_for_completion(show_output = True)
  
  
print(aks_target.provisioning_state)
print(aks_target.provisioning_errors)

##Retreive Azure ML run ID

In [12]:
exp = ws.experiments[experiment_name]
runs = list(exp.get_runs())
print(runs)

runid = runs[0].id

## Deploy the model to AKS cluster

In [14]:
from azureml.core.webservice import Webservice, AksWebservice
import mlflow.azureml
# Set configuration and service name
prod_webservice_name = "model-prod"
prod_webservice_deployment_config = AksWebservice.deploy_configuration(compute_target_name = "aks-cluster" )

web_service, azure_model = mlflow.azureml.deploy(model_uri="runs:/{}/{}".format(runid, model_save_path),
                                                   service_name=prod_webservice_name,
                                                   deployment_config = prod_webservice_deployment_config,
                                                   workspace=ws,
                                                   synchronous=True)


## Invoke Webservice

In [16]:
test_rows = [
    [0.01991321,  0.05068012,  0.10480869,  0.07007254, -0.03596778,
     -0.0266789 , -0.02499266, -0.00259226,  0.00371174,  0.04034337],
    [-0.01277963, -0.04464164,  0.06061839,  0.05285819,  0.04796534,
     0.02937467, -0.01762938,  0.03430886,  0.0702113 ,  0.00720652],
    [ 0.03807591,  0.05068012,  0.00888341,  0.04252958, -0.04284755,
     -0.02104223, -0.03971921, -0.00259226, -0.01811827,  0.00720652]]

import json
import pandas as pd

test_rows_as_json = pd.DataFrame(test_rows).to_json(orient="split")

predictions = web_service.run(test_rows_as_json)
print(predictions)

In [17]:
#Delete mlflow experiment
mlflow_exp = mlflow.get_experiment_by_name(experiment_name)
mlflow.delete_experiment(mlflow_exp.experiment_id)

#Delete webservice
web_service.delete()