In [1]:
%load_ext autoreload
%autoreload 2

# MLflow Regression Recipe Notebook

This notebook runs the MLflow Regression Recipe on Databricks and inspects its results. For more information about the MLflow Regression Recipe, including usage examples, see the [Regression Recipe overview documentation](https://mlflow.org/docs/latest/recipes.html#regression-recipe) the [Regression Recipe API documentation](https://mlflow.org/docs/latest/python_api/mlflow.recipes.html#module-mlflow.recipes.regression.v1.recipe).

In [2]:
from mlflow.recipes import Recipe

r = Recipe(profile="local")


2022/11/21 14:23:20 INFO mlflow.recipes.recipe: Creating MLflow Recipe 'regression' with profile: 'local'


In [3]:
r.clean()

In [4]:
r.inspect()

In [8]:
r.run("ingest")

2022/11/21 14:25:57 INFO mlflow.recipes.step: Running step ingest...


name,type
tpep_pickup_datetime,datetime
tpep_dropoff_datetime,datetime
trip_distance,number
fare_amount,number
pickup_zip,integer
dropoff_zip,integer

tpep_pickup_datetime,tpep_dropoff_datetime,trip_distance,fare_amount,pickup_zip,dropoff_zip
2016-02-13 21:47:53,2016-02-13 21:57:15,1.4,8.0,10103,10110
2016-02-13 18:29:09,2016-02-13 18:37:23,1.31,7.5,10023,10023
2016-02-06 19:40:58,2016-02-06 19:52:32,1.8,9.5,10001,10018
2016-02-12 19:06:43,2016-02-12 19:20:54,2.3,11.5,10044,10111
2016-02-23 10:27:56,2016-02-23 10:58:33,2.6,18.5,10199,10022


In [9]:
r.run("split")

2022/11/21 14:26:03 INFO mlflow.recipes.utils.execution: ingest: No changes. Skipping.


2022/11/21 14:26:05 INFO mlflow.recipes.step: Running step split...


In [10]:
r.run("transform")

2022/11/21 14:26:11 INFO mlflow.recipes.utils.execution: ingest, split: No changes. Skipping.


2022/11/21 14:26:13 INFO mlflow.recipes.step: Running step transform...


Name,Type
tpep_pickup_datetime,datetime64[ns]
tpep_dropoff_datetime,datetime64[ns]
trip_distance,float64
fare_amount,float64
pickup_zip,int32
dropoff_zip,int32

Name,Type
hour_encoder__pickup_hour_0,float64
hour_encoder__pickup_hour_1,float64
hour_encoder__pickup_hour_2,float64
hour_encoder__pickup_hour_3,float64
hour_encoder__pickup_hour_4,float64
hour_encoder__pickup_hour_5,float64
hour_encoder__pickup_hour_6,float64
hour_encoder__pickup_hour_7,float64
hour_encoder__pickup_hour_8,float64
hour_encoder__pickup_hour_9,float64

hour_encoder__pickup_hour_0,hour_encoder__pickup_hour_1,hour_encoder__pickup_hour_2,hour_encoder__pickup_hour_3,hour_encoder__pickup_hour_4,hour_encoder__pickup_hour_5,hour_encoder__pickup_hour_6,hour_encoder__pickup_hour_7,hour_encoder__pickup_hour_8,hour_encoder__pickup_hour_9,hour_encoder__pickup_hour_10,hour_encoder__pickup_hour_11,hour_encoder__pickup_hour_12,hour_encoder__pickup_hour_13,hour_encoder__pickup_hour_14,hour_encoder__pickup_hour_15,hour_encoder__pickup_hour_16,hour_encoder__pickup_hour_17,hour_encoder__pickup_hour_18,hour_encoder__pickup_hour_19,hour_encoder__pickup_hour_20,hour_encoder__pickup_hour_21,hour_encoder__pickup_hour_22,hour_encoder__pickup_hour_23,day_encoder__pickup_dow_0,day_encoder__pickup_dow_1,day_encoder__pickup_dow_2,day_encoder__pickup_dow_3,day_encoder__pickup_dow_4,day_encoder__pickup_dow_5,day_encoder__pickup_dow_6,trip_distance
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.4
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.8
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.3
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.6
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,10.15


In [11]:
r.run("train")

2022/11/21 14:26:20 INFO mlflow.recipes.utils.execution: ingest, split, transform: No changes. Skipping.


2022/11/21 14:26:21 INFO mlflow.recipes.step: Running step train...
[flaml.automl: 11-21 14:26:30] {2599} INFO - task = regression
[flaml.automl: 11-21 14:26:30] {2601} INFO - Data split method: uniform
[flaml.automl: 11-21 14:26:30] {2604} INFO - Evaluation method: holdout
[flaml.automl: 11-21 14:26:30] {2726} INFO - Minimizing error metric: rmse
[flaml.automl: 11-21 14:26:30] {2870} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl: 11-21 14:26:30] {3166} INFO - iteration 0, current learner lgbm
[flaml.automl: 11-21 14:26:30] {3296} INFO - Estimated sufficient time budget=850s. Estimated necessary time budget=6s.
[flaml.automl: 11-21 14:26:31] {3343} INFO -  at 0.2s,	estimator lgbm's best error=3.8716,	best estimator lgbm's best error=3.8716
[flaml.automl: 11-21 14:26:31] {3166} INFO - iteration 1, current learner lgbm
[flaml.automl: 11-21 14:26:31] {3343} INFO -  at 0.9s,	estimator lgbm's best error=3.8716,	best estimat

Metric,training,validation
root_mean_squared_error,3.43453,3.86595
weighted_mean_squared_error,11.312,14.9799
example_count,8051.0,959.0
max_error,27.3936,20.823
mean_absolute_error,2.14017,2.35457
mean_absolute_percentage_error,47235900000000.0,54693600000000.0
mean_on_target,2.86689,3.13932
mean_squared_error,11.796,14.9456
r2_score,0.0277288,-0.0180791
score,0.0277288,-0.0180791

Name,Type
tpep_pickup_datetime,datetime
tpep_dropoff_datetime,datetime
fare_amount,double
pickup_zip,integer
dropoff_zip,integer

Name,Type
-,"Tensor('float32', (-1,))"

absolute_error,prediction,trip_distance,tpep_pickup_datetime,tpep_dropoff_datetime,fare_amount,pickup_zip,dropoff_zip
27.393573,3.206427,30.6,2016-02-22 21:17:27,2016-02-22 22:00:58,95.0,11371,7114
23.414336,2.385664,25.8,2016-02-06 12:18:51,2016-02-06 13:03:53,71.0,11422,11218
22.391628,3.158372,25.55,2016-02-16 22:30:58,2016-02-16 23:24:47,73.5,10009,11050
21.888149,2.611851,24.5,2016-01-05 16:07:58,2016-01-05 17:48:29,82.5,11422,11213
21.375736,3.124264,24.5,2016-01-16 23:05:32,2016-01-16 23:43:37,65.0,11422,10463
21.009275,3.490725,24.5,2016-01-21 00:58:34,2016-01-21 01:33:10,94.5,11371,10601
20.831038,4.168962,25.0,2016-02-21 03:31:58,2016-02-21 03:32:28,0.0,10502,10502
20.629068,5.670932,26.3,2016-01-07 04:07:58,2016-01-07 04:43:08,0.0,10018,10606
20.62731,3.57269,24.2,2016-02-01 01:01:57,2016-02-01 01:31:40,63.0,11422,10468
19.714052,2.585948,22.3,2016-01-19 12:46:55,2016-01-19 13:15:28,58.5,11422,10475

Unnamed: 0,Latest
Model Rank,> 0
root_mean_squared_error,3.86595
weighted_mean_squared_error,14.9799
max_error,20.823
mean_absolute_error,2.35457
mean_absolute_percentage_error,5.46936e+13
mean_squared_error,14.9456
Run Time,2022-11-21 14:26:28
Run ID,c01fb44745e948748c8964ea8a2839e5


In [12]:
r.run("evaluate")

2022/11/21 14:28:41 INFO mlflow.recipes.utils.execution: ingest, split, transform, train: No changes. Skipping.


2022/11/21 14:28:43 INFO mlflow.recipes.step: Running step evaluate...
2022/11/21 14:28:49 INFO mlflow.models.evaluation.base: Evaluating the model with the default evaluator.
2022/11/21 14:29:10 INFO mlflow.models.evaluation.default_evaluator: Shap explainer _PatchedKernelExplainer is used.

  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:00<00:01,  4.62it/s]
 40%|████      | 4/10 [00:00<00:00, 12.27it/s]
 60%|██████    | 6/10 [00:00<00:00, 13.77it/s]
 80%|████████  | 8/10 [00:00<00:00, 13.30it/s]
100%|██████████| 10/10 [00:00<00:00, 13.52it/s]
elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
2022/11/21 14:30:27 INFO mlflow.models.evaluation.base: Evaluating the model with the default evaluator.


Metric,validation,test
root_mean_squared_error,3.86595,3.478859
weighted_mean_squared_error,14.9799,11.770481
example_count,959.0,990.0
max_error,20.823,21.848149
mean_absolute_error,2.35457,2.147404
mean_absolute_percentage_error,54693600000000.0,38046733520674.17
mean_on_target,3.13932,2.83601
mean_squared_error,14.9456,12.102463
r2_score,-0.0180791,-0.001537
score,-0.0180791,-0.001537

metric,greater_is_better,value,threshold,validated
root_mean_squared_error,False,3.47886,10,✅
mean_absolute_error,False,2.1474,50,✅
weighted_mean_squared_error,False,11.7705,50,✅
max_error,False,21.8481,100,✅


In [10]:
r.run("register")

2022/11/18 19:07:22 INFO mlflow.recipes.utils.execution: ingest, split, transform, train, evaluate: No changes. Skipping.


2022/11/18 19:07:30 INFO mlflow.recipes.step: Running step register...
Successfully registered model 'taxi_fare_regressor'.
2022/11/18 19:07:31 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: taxi_fare_regressor, version 1
Created version '1' of model 'taxi_fare_regressor'.


In [11]:
r.inspect("train")

Metric,training,validation
root_mean_squared_error,4.03495,3.08811
weighted_mean_squared_error,3.86282,7.96016
example_count,8051.0,959.0
max_error,220.146,54.6742
mean_absolute_error,0.768177,0.930729
mean_absolute_percentage_error,57363300000000.0,0.079724
mean_on_target,12.3947,13.1517
mean_squared_error,16.2809,9.53643
r2_score,0.860647,0.91975
score,0.860647,0.91975

Name,Type
tpep_pickup_datetime,datetime
tpep_dropoff_datetime,datetime
trip_distance,double
pickup_zip,integer
dropoff_zip,integer

Name,Type
-,"Tensor('float64', (-1,))"

absolute_error,prediction,fare_amount,tpep_pickup_datetime,tpep_dropoff_datetime,trip_distance,pickup_zip,dropoff_zip
220.146456,39.853544,260.0,2016-02-29 12:16:16,2016-02-29 12:16:53,0.0,8876,8876
193.61122,81.38878,275.0,2016-02-12 20:55:19,2016-02-12 21:52:38,20.85,10013,7008
86.547994,18.452006,105.0,2016-01-16 18:09:15,2016-01-16 18:09:23,0.0,7310,7310
48.353499,48.353499,0.0,2016-01-07 04:07:58,2016-01-07 04:43:08,26.3,10018,10606
46.146501,48.353499,94.5,2016-01-21 00:58:34,2016-01-21 01:33:10,24.5,11371,10601
41.771659,13.228341,55.0,2016-02-28 04:50:41,2016-02-28 04:52:32,0.18,10115,10027
35.74171,35.74171,0.0,2016-02-21 03:31:58,2016-02-21 03:32:28,25.0,10502,10502
35.467842,59.532158,95.0,2016-02-22 21:17:27,2016-02-22 22:00:58,30.6,11371,7114
34.669571,50.330429,85.0,2016-02-11 17:52:13,2016-02-11 18:38:17,14.46,10282,7114
33.547994,18.452006,52.0,2016-01-24 20:57:37,2016-01-24 20:57:52,0.0,10162,10162

Unnamed: 0,Latest
Model Rank,> 0
root_mean_squared_error,3.08811
weighted_mean_squared_error,7.96016
max_error,54.6742
mean_absolute_error,0.930729
mean_absolute_percentage_error,0.079724
mean_squared_error,9.53643
Run Time,2022-11-18 18:50:22
Run ID,a41e16bbcfbc412fa018765763dbe35a


In [None]:
training_data = r.get_artifact("training_data")
training_data.describe()

In [None]:
trained_model = r.get_artifact("model")
print(trained_model)