##Databricks MLFlow - manage models

In [0]:
import mlflow

mlflow.set_experiment('/MLFlow_wines_experiment/')

In [0]:
import mlflow.pyfunc
import mlflow.sklearn
import numpy as np
import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from mlflow.models.signature import infer_signature
from mlflow.utils.environment import _mlflow_conda_env
import cloudpickle
import time

for i in range(0, 6):
    with mlflow.start_run(run_name='random_forest_'+str(i), description='MLFlow_wines_experiment_description'):
      n_estimators = i + 10
      model = RandomForestClassifier(n_estimators=n_estimators, random_state=np.random.RandomState(123))
      model.fit(X_train, y_train)
      predictions_test = model.predict_proba(X_test)[:,1]
      auc_score = roc_auc_score(y_test, predictions_test)
      wrappedModel = SklearnModelWrapper(model)
      signature = infer_signature(X_train, wrappedModel.predict(None, X_train))  

      mlflow.log_param('n_estimators', n_estimators)
      mlflow.log_param('run_name', 'random_forest'+str(i))
      mlflow.log_metric('auc', auc_score)
      mlflow.log_metrics({"metric01": 2500.00, "metric02": 50.00})
      mlflow.set_experiment_tag("version", 1.1)
      mlflow.set_experiment_tags({"quantity_of_datasets": 2, "type": "wine"})
      mlflow.log_text("Text content", "text_file.txt")

      mlflow.set_tag("version", 1.1)
      mlflow.set_tags({"quantity_of_datasets": 2, "type": "wine"})
        
      conda_env =  _mlflow_conda_env(
            additional_conda_deps=None,
            additional_pip_deps=["cloudpickle=={}".format(cloudpickle.__version__), "scikit-learn=={}".format(sklearn.__version__)],
            additional_conda_channels=None,
        )
      mlflow.pyfunc.log_model("random_forest_model"+str(i), python_model=wrappedModel, conda_env=conda_env, signature=signature)

In [0]:
model_name = "wine_quality"
run_id = '2f4d59c1a16944f8aed0650aac75e650'
run_name = 'random_forest_model4'
model_version = mlflow.register_model(f"runs:/{run_id}/{run_name}", model_name)

In [0]:
from mlflow.tracking import MlflowClient

model_name = "wine_quality"
client = MlflowClient()

client.transition_model_version_stage(
  name=model_name,
  version=model_version.version,
  stage="Production",
)

client.transition_model_version_stage(
  name=model_name,
  version=3,
  stage="Archived"
)

In [0]:
import mlflow.pyfunc

model = mlflow.pyfunc.load_model(f"models:/{model_name}/production")
model_udf = mlflow.pyfunc.spark_udf(spark, f"models:/{model_name}/production")

##Databricks MLFlow - Python API a few examples

In [0]:
mlflow.delete_run('1c6bdbbeab384a1c8430f94e37094de6') # run_id
mlflow.delete_experiment('2277472460643381') #experiment id

In [0]:
experiments_list = mlflow.list_experiments()
display(experiments_list)

In [0]:
run_infos = mlflow.list_run_infos('2277472460643381') #experiment id
display(run_infos)

In [0]:
mlflow.get_experiment('2277472460643381') #experiment id

In [0]:
mlflow.get_run('8159fc7514bc4da59b405873cda174a6') #run id

In [0]:
import mlflow.pyfunc
import mlflow.sklearn
import numpy as np
import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from mlflow.models.signature import infer_signature
from mlflow.utils.environment import _mlflow_conda_env
import cloudpickle
import time

for i in range(0, 6):
    with mlflow.start_run(run_name='random_forest_'+str(i), description='MLFlow_wines_experiment_description'):
            
      print("artifact_uri: ", mlflow.get_artifact_uri())
      print("tracking_uri: ", mlflow.get_tracking_uri())
      print("last_active_run: ", mlflow.last_active_run().info)
    
    
      n_estimators = i + 10
      model = RandomForestClassifier(n_estimators=n_estimators, random_state=np.random.RandomState(123))
      model.fit(X_train, y_train)
      predictions_test = model.predict_proba(X_test)[:,1]
      auc_score = roc_auc_score(y_test, predictions_test)
      wrappedModel = SklearnModelWrapper(model)
      signature = infer_signature(X_train, wrappedModel.predict(None, X_train))  

      mlflow.log_param('n_estimators', n_estimators)
      mlflow.log_param('run_name', 'random_forest'+str(i))
      mlflow.log_metric('auc', auc_score)
      mlflow.log_metrics({"metric01": 2500.00, "metric02": 50.00})
      mlflow.set_experiment_tag("version", 1.1)
      mlflow.set_experiment_tags({"quantity_of_datasets": 2, "type": "wine"})
      mlflow.log_text("Text content", "text_file.txt")

      mlflow.set_tag("version", 1.1)
      mlflow.set_tags({"quantity_of_datasets": 2, "type": "wine"})


    
      conda_env =  _mlflow_conda_env(
            additional_conda_deps=None,
            additional_pip_deps=["cloudpickle=={}".format(cloudpickle.__version__), "scikit-learn=={}".format(sklearn.__version__)],
            additional_conda_channels=None,
        )
      mlflow.pyfunc.log_model("random_forest_model"+str(i), python_model=wrappedModel, conda_env=conda_env, signature=signature)