In [1]:
%load_ext autoreload
%autoreload 2

# MLflow Classification Recipe Notebook

This notebook runs the MLflow Classification Recipe on Databricks and inspects its results. For more information about the MLflow Classification Recipe, including usage examples, see the [Classification Recipe overview documentation](https://mlflow.org/docs/latest/recipes.html#classification-recipe) the [Classification Recipe API documentation](https://mlflow.org/docs/latest/python_api/mlflow.recipes.html#module-mlflow.recipes.classification.v1.recipe).

In [2]:
from mlflow.recipes import Recipe

r = Recipe(profile="local")
r.clean()

2023/04/12 16:23:36 INFO mlflow.recipes.recipe: Creating MLflow Recipe 'mlflow-recipes-titanic' with profile: 'local'


In [3]:
# Display the DAGs (Directed Acyclic Graph)
# r.inspect()

In [4]:
r.run("ingest")

2023/04/12 16:23:38 INFO mlflow.recipes.step: Running step ingest...


name,type
Survived,integer
Pclass,integer
Sex,string
SibSp,integer
Parch,integer
Fare,number
Age,number
Embarked,string

Survived,Pclass,Sex,SibSp,Parch,Fare,Age,Embarked
0,3,male,1,0,7.25,22.0,S
1,1,female,1,0,71.2833,38.0,C
1,3,female,0,0,7.925,26.0,S
1,1,female,1,0,53.1,35.0,S
0,3,male,0,0,8.05,35.0,S


In [5]:
r.run("split")

2023/04/12 16:23:39 INFO mlflow.recipes.utils.execution: ingest: No changes. Skipping.


Run MLFlow Recipe step: split
2023/04/12 16:23:40 INFO mlflow.recipes.step: Running step split...


In [6]:
training_data = r.get_artifact("training_data")
training_data.describe()

Unnamed: 0,Survived,Pclass,SibSp,Parch,Fare,Age
count,712.0,712.0,712.0,712.0,712.0,570.0
mean,0.386236,2.31882,0.544944,0.376404,31.362476,29.475877
std,0.487228,0.830694,1.142801,0.782297,47.619466,14.689461
min,0.0,1.0,0.0,0.0,0.0,0.67
25%,0.0,2.0,0.0,0.0,7.8958,20.0
50%,0.0,3.0,0.0,0.0,14.4542,28.0
75%,1.0,3.0,1.0,0.0,30.6958,38.0
max,1.0,3.0,8.0,6.0,512.3292,80.0


In [7]:
r.run("transform")

2023/04/12 16:23:41 INFO mlflow.recipes.utils.execution: ingest, split: No changes. Skipping.


Run MLFlow Recipe step: transform
2023/04/12 16:23:42 INFO mlflow.recipes.step: Running step transform...


Name,Type
Survived,int64
Pclass,int64
Sex,object
SibSp,int64
Parch,int64
Fare,float64
Age,float64
Embarked,object

Name,Type
onehot__Pclass_1,float64
onehot__Pclass_2,float64
onehot__Pclass_3,float64
onehot__Sex_female,float64
onehot__Sex_male,float64
onehot__Embarked_C,float64
onehot__Embarked_Q,float64
onehot__Embarked_S,float64
onehot__Embarked_None,float64
ordinal__Pclass,float64

onehot__Pclass_1,onehot__Pclass_2,onehot__Pclass_3,onehot__Sex_female,onehot__Sex_male,onehot__Embarked_C,onehot__Embarked_Q,onehot__Embarked_S,onehot__Embarked_None,ordinal__Pclass,ordinal__Sex,ordinal__Embarked,numerical__SibSp,numerical__Parch,numerical__Fare,numerical__Age,Survived
0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0,1.0,2.0,0.398474,-0.481491,-0.506714,-0.546333,0
0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0,1.0,2.0,-0.477184,-0.481491,-0.489902,0.442639,0
0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,2.0,1.0,1.0,-0.477184,-0.481491,-0.481322,-0.089885,0
1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,2.0,-0.477184,-0.481491,0.430799,1.888059,0
0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0,1.0,2.0,2.14979,0.797695,-0.216187,-2.067828,0


In [8]:
r.run("train")

2023/04/12 16:23:43 INFO mlflow.recipes.utils.execution: ingest, split, transform: No changes. Skipping.


Run MLFlow Recipe step: train
2023/04/12 16:23:44 INFO mlflow.recipes.step: Running step train...
2023/04/12 16:23:45 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2023/04/12 16:23:45 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
2023/04/12 16:23:45 INFO mlflow.recipes.steps.train: Training data has less than 5000 rows, skipping rebalancing.

  0%|          | 0/5 [00:00<?, ?trial/s, best loss=?]2023/04/12 16:23:51 INFO mlflow.models.evaluation.base: Evaluating the model with the default evaluator.

2023/04/12 16:23:51 INFO mlflow.models.evaluation.default_evaluator: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.


 20%|██        | 1/5 [00:06<00:27,  6.93s

Metric,training,validation
accuracy_score,0.810393,0.721519
example_count,712.0,79.0
f1_score,0.743833,0.576923
false_negatives,79.0,11.0
false_positives,56.0,11.0
log_loss,0.431264,0.50216
precision_recall_auc,0.838185,0.736431
precision_score,0.777778,0.576923
recall_score,0.712727,0.576923
roc_auc,0.86166,0.814224

Name,Type
Pclass,long
Sex,string
SibSp,long
Parch,long
Fare,double
Age,double
Embarked,string

Name,Type
predicted_score_0,double
predicted_score_1,double
predicted_score,double
predicted_label,long

absolute_error,prediction,Survived,Pclass,Sex,SibSp,Parch,Fare,Age,Embarked
0.972411,1,0,1,female,1,2,151.55,2.0,S
0.951579,0,1,3,male,0,0,8.05,45.0,S
0.949585,0,1,3,male,0,0,7.925,44.0,S
0.93827,0,1,3,male,0,0,7.925,39.0,S
0.929289,1,0,1,female,1,2,151.55,25.0,S
0.928207,0,1,3,male,4,2,31.3875,3.0,S
0.922836,0,1,1,male,0,0,30.0,80.0,S
0.920123,0,1,3,male,1,0,7.775,25.0,S
0.918416,0,1,3,male,0,0,7.925,32.0,S
0.918382,0,1,3,male,0,0,8.05,32.0,S

Unnamed: 0,Latest,Best,2nd Best
Model Rank,> 15,1,1
accuracy_score,0.721519,0.759494,0.759494
f1_score,0.576923,0.612245,0.612245
false_negatives,11,11,11
false_positives,11,8,8
log_loss,0.50216,0.539445,0.539605
precision_score,0.576923,0.652174,0.652174
recall_score,0.576923,0.576923,0.576923
roc_auc,0.814224,0.808418,0.808418
true_negatives,42,45,45


In [9]:
trained_model = r.get_artifact("model")
print(trained_model)

2023/04/12 16:24:21 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2023/04/12 16:24:21 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.


mlflow.pyfunc.loaded_model:
  artifact_path: train/model
  flavor: mlflow.pyfunc.model
  run_id: 4977a1dfd83e42bf986ca1d7a27cfc4a



In [10]:
r.run("evaluate")

2023/04/12 16:24:21 INFO mlflow.recipes.utils.execution: ingest, split, transform, train: No changes. Skipping.


Run MLFlow Recipe step: evaluate
2023/04/12 16:24:22 INFO mlflow.recipes.step: Running step evaluate...
2023/04/12 16:24:22 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2023/04/12 16:24:22 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
2023/04/12 16:24:23 INFO mlflow.models.evaluation.base: Evaluating the model with the default evaluator.
2023/04/12 16:24:23 INFO mlflow.models.evaluation.default_evaluator: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2023/04/12 16:24:26 INFO mlflow.models.evaluation.default_evaluator: Shap explainer _PatchedKernelExplainer is used.

  0%|          | 0/10 [00:00<?, ?it/s]
 40%|████      | 4/10 [00:00<00:00, 32.33it/s]
 8

Metric,validation,test
accuracy_score,0.721519,0.79
example_count,79.0,100.0
f1_score,0.576923,0.746988
false_negatives,11.0,10.0
false_positives,11.0,11.0
log_loss,0.50216,0.469276
precision_recall_auc,0.736431,0.827481
precision_score,0.576923,0.738095
recall_score,0.576923,0.756098
roc_auc,0.814224,0.846631

metric,greater_is_better,value,threshold,validated
accuracy_score,True,0.79,0.7,✅
f1_score,True,0.746988,0.5,✅


In [11]:
r.run("register")

2023/04/12 16:24:32 INFO mlflow.recipes.utils.execution: ingest, split, transform, train, evaluate: No changes. Skipping.


Run MLFlow Recipe step: register
2023/04/12 16:24:33 INFO mlflow.recipes.step: Running step register...
2023/04/12 16:24:33 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2023/04/12 16:24:33 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
2023/04/12 16:24:33 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2023/04/12 16:24:33 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
Registered model 'titanic-local' already exists. Creating a new version of this model...
2023/04/12 16:24:33 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for 

In [12]:
# Only works with Spark
# r.run("ingest_scoring")
# r.run("predict")