In [22]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# MLflow Classification Recipe Notebook

This notebook runs the MLflow Classification Recipe on Databricks and inspects its results. For more information about the MLflow Classification Recipe, including usage examples, see the [Classification Recipe overview documentation](https://mlflow.org/docs/latest/recipes.html#classification-recipe) the [Classification Recipe API documentation](https://mlflow.org/docs/latest/python_api/mlflow.recipes.html#module-mlflow.recipes.classification.v1.recipe).

In [23]:
from mlflow.recipes import Recipe

r = Recipe(profile="local")
r.clean()

2023/04/13 15:13:04 INFO mlflow.recipes.recipe: Creating MLflow Recipe 'mlflow-recipes-titanic' with profile: 'local'


In [24]:
# Display the DAGs (Directed Acyclic Graph)
# r.inspect()

In [25]:
r.run("ingest")

2023/04/13 15:13:05 INFO mlflow.recipes.step: Running step ingest...


name,type
Survived,integer
Pclass,integer
Sex,string
SibSp,integer
Parch,integer
Fare,number
Age,number
Embarked,string

Survived,Pclass,Sex,SibSp,Parch,Fare,Age,Embarked
0,3,male,1,0,7.25,22.0,S
1,1,female,1,0,71.2833,38.0,C
1,3,female,0,0,7.925,26.0,S
1,1,female,1,0,53.1,35.0,S
0,3,male,0,0,8.05,35.0,S


In [26]:
r.run("split")

2023/04/13 15:13:06 INFO mlflow.recipes.utils.execution: ingest: No changes. Skipping.


Run MLFlow Recipe step: split
2023/04/13 15:13:07 INFO mlflow.recipes.step: Running step split...


In [27]:
training_data = r.get_artifact("training_data")
training_data.sample(5)

Unnamed: 0_level_0,Survived,Pclass,Sex,SibSp,Parch,Fare,Age,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
31,0,1,male,0,0,27.7208,40.0,C
783,0,1,male,0,0,30.0,29.0,S
263,0,1,male,1,1,79.65,52.0,S
776,0,3,male,0,0,7.75,18.0,S
60,0,3,male,5,2,46.9,11.0,S


In [28]:
r.run("transform")

2023/04/13 15:13:08 INFO mlflow.recipes.utils.execution: ingest, split: No changes. Skipping.


Run MLFlow Recipe step: transform
2023/04/13 15:13:09 INFO mlflow.recipes.step: Running step transform...


Name,Type
Survived,int64
Pclass,int64
Sex,object
SibSp,int64
Parch,int64
Fare,float64
Age,float64
Embarked,object

Name,Type
onehot__Pclass_1,float64
onehot__Pclass_2,float64
onehot__Pclass_3,float64
onehot__Sex_female,float64
onehot__Sex_male,float64
onehot__Embarked_C,float64
onehot__Embarked_Q,float64
onehot__Embarked_S,float64
onehot__Embarked_None,float64
ordinal__Pclass,float64

onehot__Pclass_1,onehot__Pclass_2,onehot__Pclass_3,onehot__Sex_female,onehot__Sex_male,onehot__Embarked_C,onehot__Embarked_Q,onehot__Embarked_S,onehot__Embarked_None,ordinal__Pclass,ordinal__Sex,ordinal__Embarked,numerical__SibSp,numerical__Parch,numerical__Fare,numerical__Age,Survived
0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0,1.0,2.0,0.398474,-0.481491,-0.506714,-0.546333,0
0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0,1.0,2.0,-0.477184,-0.481491,-0.489902,0.442639,0
0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,2.0,1.0,1.0,-0.477184,-0.481491,-0.481322,-0.089885,0
1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,2.0,-0.477184,-0.481491,0.430799,1.888059,0
0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0,1.0,2.0,2.14979,0.797695,-0.216187,-2.067828,0


In [29]:
r.run("train")

2023/04/13 15:13:11 INFO mlflow.recipes.utils.execution: ingest, split, transform: No changes. Skipping.


Run MLFlow Recipe step: train
2023/04/13 15:13:12 INFO mlflow.recipes.step: Running step train...
2023/04/13 15:13:12 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2023/04/13 15:13:12 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
2023/04/13 15:13:12 INFO mlflow.recipes.steps.train: Training data has less than 5000 rows, skipping rebalancing.
INFO: GPU available: False, used: False
INFO  [lightning.pytorch.utilities.rank_zero] GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO  [lightning.pytorch.utilities.rank_zero] TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO  [lightning.pytorch.utilities.rank_zero] IPU available: False, usin

Metric,training,validation
accuracy_score,0.832865,0.746835
example_count,712.0,79.0
f1_score,0.774194,0.615385
false_negatives,71.0,10.0
false_positives,48.0,10.0
precision_score,0.809524,0.615385
recall_score,0.741818,0.615385
score,0.832865,0.746835
true_negatives,389.0,43.0
true_positives,204.0,16.0

Name,Type
Pclass,long
Sex,string
SibSp,long
Parch,long
Fare,double
Age,double
Embarked,string

Name,Type
-,"Tensor('int64', (-1,))"

absolute_error,prediction,Survived,Pclass,Sex,SibSp,Parch,Fare,Age,Embarked
True,0,1,3,female,1,1,20.25,35.0,S
True,0,1,3,male,0,0,8.05,32.0,S
True,1,0,1,male,1,0,108.9,18.0,C
True,1,0,3,female,0,0,9.5875,37.0,S
True,0,1,3,female,0,0,9.5875,63.0,S
True,1,0,1,female,1,2,151.55,25.0,S
True,0,1,3,male,0,0,7.8958,29.0,C
True,0,1,1,male,0,0,26.55,34.0,S
True,0,1,3,male,0,0,8.1125,,S
True,1,0,3,female,0,1,10.4625,2.0,S

Unnamed: 0,Latest,Best,2nd Best
Model Rank,11,1,1
accuracy_score,0.746835,0.759494,0.759494
f1_score,0.615385,0.612245,0.612245
false_negatives,10,11,11
false_positives,10,8,8
log_loss,,0.539445,0.539605
precision_score,0.615385,0.652174,0.652174
recall_score,0.615385,0.576923,0.576923
roc_auc,,0.808418,0.808418
true_negatives,43,45,45


In [30]:
trained_model = r.get_artifact("model")
print(trained_model)

mlflow.pyfunc.loaded_model:
  artifact_path: train/model
  flavor: mlflow.pyfunc.model
  run_id: 67808e89856a4545ae4eece1d1793afb



In [31]:
from sklearn.metrics import classification_report

test_data = r.get_artifact("training_data")
y_test = test_data["Survived"]
pred = trained_model.predict(test_data)

print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.85      0.89      0.87       437
           1       0.81      0.74      0.77       275

    accuracy                           0.83       712
   macro avg       0.83      0.82      0.82       712
weighted avg       0.83      0.83      0.83       712



In [32]:
r.run("evaluate")

2023/04/13 15:14:03 INFO mlflow.recipes.utils.execution: ingest, split, transform, train: No changes. Skipping.


Run MLFlow Recipe step: evaluate
2023/04/13 15:14:04 INFO mlflow.recipes.step: Running step evaluate...
2023/04/13 15:14:04 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2023/04/13 15:14:04 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
2023/04/13 15:14:07 INFO mlflow.models.evaluation.base: Evaluating the model with the default evaluator.
2023/04/13 15:14:07 INFO mlflow.models.evaluation.default_evaluator: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2023/04/13 15:14:07 INFO mlflow.models.evaluation.default_evaluator: Shap explainer _PatchedKernelExplainer is used.

  0%|          | 0/10 [00:00<?, ?it/s]
 40%|████      | 4/10 [00:00<00:00, 32.87it/s]
 8

Metric,validation,test
accuracy_score,0.746835,0.8
example_count,79.0,100.0
f1_score,0.615385,0.756098
false_negatives,10.0,10.0
false_positives,10.0,10.0
precision_score,0.615385,0.756098
recall_score,0.615385,0.756098
score,0.746835,0.8
true_negatives,43.0,49.0
true_positives,16.0,31.0

metric,greater_is_better,value,threshold,validated
accuracy_score,True,0.8,0.7,✅
f1_score,True,0.756098,0.5,✅


In [33]:
r.run("register")

2023/04/13 15:14:20 INFO mlflow.recipes.utils.execution: ingest, split, transform, train, evaluate: No changes. Skipping.


Run MLFlow Recipe step: register
2023/04/13 15:14:20 INFO mlflow.recipes.step: Running step register...
2023/04/13 15:14:21 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2023/04/13 15:14:21 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
2023/04/13 15:14:21 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2023/04/13 15:14:21 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
Registered model 'titanic-local' already exists. Creating a new version of this model...
2023/04/13 15:14:21 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for 

In [34]:
# Only works with Spark
# r.run("ingest_scoring")
# r.run("predict")