In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# MLflow Regression Recipe Notebook

This notebook runs the MLflow Regression Recipe on Databricks and inspects its results. For more information about the MLflow Regression Recipe, including usage examples, see the [Regression Recipe overview documentation](https://mlflow.org/docs/latest/recipes.html#regression-recipe) the [Regression Recipe API documentation](https://mlflow.org/docs/latest/python_api/mlflow.recipes.html#module-mlflow.recipes.regression.v1.recipe).

In [4]:
from mlflow.recipes import Recipe

r = Recipe(profile="local")


2022/11/15 19:45:56 INFO mlflow.recipes.recipe: Creating MLflow Recipe 'regression' with profile: 'local'


In [5]:
r.clean()

In [6]:
r.inspect()

In [7]:
r.run("ingest")

2022/11/15 19:46:13 INFO mlflow.recipes.step: Running step ingest...


name,type
tpep_pickup_datetime,datetime
tpep_dropoff_datetime,datetime
trip_distance,number
fare_amount,number
pickup_zip,integer
dropoff_zip,integer

tpep_pickup_datetime,tpep_dropoff_datetime,trip_distance,fare_amount,pickup_zip,dropoff_zip
2016-02-13 21:47:53,2016-02-13 21:57:15,1.4,8.0,10103,10110
2016-02-13 18:29:09,2016-02-13 18:37:23,1.31,7.5,10023,10023
2016-02-06 19:40:58,2016-02-06 19:52:32,1.8,9.5,10001,10018
2016-02-12 19:06:43,2016-02-12 19:20:54,2.3,11.5,10044,10111
2016-02-23 10:27:56,2016-02-23 10:58:33,2.6,18.5,10199,10022


In [8]:
r.run("split")

2022/11/15 19:46:24 INFO mlflow.recipes.utils.execution: ingest: No changes. Skipping.


2022/11/15 19:46:26 INFO mlflow.recipes.step: Running step split...


In [9]:
r.run("transform")

2022/11/15 19:46:41 INFO mlflow.recipes.utils.execution: ingest, split: No changes. Skipping.


2022/11/15 19:46:42 INFO mlflow.recipes.step: Running step transform...


Name,Type
tpep_pickup_datetime,datetime64[ns]
tpep_dropoff_datetime,datetime64[ns]
trip_distance,float64
fare_amount,float64
pickup_zip,int32
dropoff_zip,int32

Name,Type
hour_encoder__pickup_hour_0,float64
hour_encoder__pickup_hour_1,float64
hour_encoder__pickup_hour_2,float64
hour_encoder__pickup_hour_3,float64
hour_encoder__pickup_hour_4,float64
hour_encoder__pickup_hour_5,float64
hour_encoder__pickup_hour_6,float64
hour_encoder__pickup_hour_7,float64
hour_encoder__pickup_hour_8,float64
hour_encoder__pickup_hour_9,float64

hour_encoder__pickup_hour_0,hour_encoder__pickup_hour_1,hour_encoder__pickup_hour_2,hour_encoder__pickup_hour_3,hour_encoder__pickup_hour_4,hour_encoder__pickup_hour_5,hour_encoder__pickup_hour_6,hour_encoder__pickup_hour_7,hour_encoder__pickup_hour_8,hour_encoder__pickup_hour_9,hour_encoder__pickup_hour_10,hour_encoder__pickup_hour_11,hour_encoder__pickup_hour_12,hour_encoder__pickup_hour_13,hour_encoder__pickup_hour_14,hour_encoder__pickup_hour_15,hour_encoder__pickup_hour_16,hour_encoder__pickup_hour_17,hour_encoder__pickup_hour_18,hour_encoder__pickup_hour_19,hour_encoder__pickup_hour_20,hour_encoder__pickup_hour_21,hour_encoder__pickup_hour_22,hour_encoder__pickup_hour_23,day_encoder__pickup_dow_0,day_encoder__pickup_dow_1,day_encoder__pickup_dow_2,day_encoder__pickup_dow_3,day_encoder__pickup_dow_4,day_encoder__pickup_dow_5,day_encoder__pickup_dow_6,std_scaler__trip_distance,std_scaler__trip_duration,fare_amount
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-0.421138,-0.111258,8.0
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-0.3063,-0.06703,9.5
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,-0.162753,-0.014425,11.5
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,-0.076624,0.315945,18.5
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.090944,0.034829,28.5


In [11]:
r.run("train")

2022/11/15 19:47:54 INFO mlflow.recipes.utils.execution: ingest, split, transform: No changes. Skipping.


2022/11/15 19:47:57 INFO mlflow.recipes.step: Running step train...
2022/11/15 19:48:14 INFO mlflow.models.evaluation.base: Evaluating the model with the default evaluator.
2022/11/15 19:48:14 INFO mlflow.models.evaluation.base: Evaluating the model with the default evaluator.


Metric,training,validation
root_mean_squared_error,0.383831,2.95898
weighted_mean_squared_error,0.271219,10.5769
example_count,8051.0,959.0
max_error,7.59684,55.7623
mean_absolute_error,0.254683,0.654309
mean_absolute_percentage_error,657852000000.0,0.0426456
mean_on_target,12.3947,13.1517
mean_squared_error,0.147326,8.75558
r2_score,0.998739,0.926321
score,0.998739,0.926321

Name,Type
tpep_pickup_datetime,datetime
tpep_dropoff_datetime,datetime
trip_distance,double
pickup_zip,integer
dropoff_zip,integer

Name,Type
-,"Tensor('float32', (-1,))"

absolute_error,prediction,fare_amount,tpep_pickup_datetime,tpep_dropoff_datetime,trip_distance,pickup_zip,dropoff_zip
7.596836,37.403164,45.0,2016-02-06 23:18:10,2016-02-06 23:35:18,3.51,10013,7302
7.135166,44.864834,52.0,2016-02-26 16:54:41,2016-02-26 17:06:30,4.02,11371,11367
6.019024,45.980976,52.0,2016-01-16 00:12:20,2016-01-16 00:27:06,6.1,11378,10012
4.092111,0.092111,-4.0,2016-02-02 10:48:08,2016-02-02 10:51:28,0.46,10065,10021
2.881069,50.618931,53.5,2016-01-29 04:49:35,2016-01-29 05:07:52,13.0,10011,7201
2.856945,47.143055,50.0,2016-01-19 00:48:47,2016-01-19 01:14:02,9.0,10119,11371
2.822248,13.822248,11.0,2016-02-14 15:37:35,2016-02-14 16:02:09,2.28,10023,10167
2.801239,59.698761,62.5,2016-02-01 10:30:45,2016-02-01 10:56:12,16.1,10019,7114
2.628321,10.128321,7.5,2016-02-05 13:50:35,2016-02-05 14:06:00,1.24,10002,10013
2.613594,53.386406,56.0,2016-02-21 07:02:59,2016-02-21 07:26:25,13.68,10278,7114

Unnamed: 0,Latest
Model Rank,> 0
root_mean_squared_error,2.95898
weighted_mean_squared_error,10.5769
max_error,55.7623
mean_absolute_error,0.654309
mean_absolute_percentage_error,0.0426456
mean_squared_error,8.75558
Run Time,2022-11-15 19:48:02
Run ID,0bf5af4404b549a4a17c5c3497b0a036


In [12]:
r.run("evaluate")

2022/11/15 19:48:49 INFO mlflow.recipes.utils.execution: ingest, split, transform, train: No changes. Skipping.


2022/11/15 19:48:50 INFO mlflow.recipes.step: Running step evaluate...
2022/11/15 19:48:52 INFO mlflow.models.evaluation.base: Evaluating the model with the default evaluator.
2022/11/15 19:49:02 INFO mlflow.models.evaluation.default_evaluator: Shap explainer _PatchedKernelExplainer is used.

  0%|          | 0/10 [00:00<?, ?it/s]
 30%|███       | 3/10 [00:00<00:00, 25.66it/s]
 60%|██████    | 6/10 [00:00<00:00, 23.26it/s]
 90%|█████████ | 9/10 [00:00<00:00, 20.01it/s]
100%|██████████| 10/10 [00:00<00:00, 19.30it/s]
elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
2022/11/15 19:49:15 INFO mlflow.models.evaluation.base: Evaluating the model with the default evaluator.


Metric,validation,test
root_mean_squared_error,2.95898,1.373591
weighted_mean_squared_error,10.5769,0.656317
example_count,959.0,990.0
max_error,55.7623,22.210772
mean_absolute_error,0.654309,0.473753
mean_absolute_percentage_error,0.0426456,0.400966
mean_on_target,13.1517,12.15102
mean_squared_error,8.75558,1.886753
r2_score,0.926321,0.980462
score,0.926321,0.980462

metric,greater_is_better,value,threshold,validated
root_mean_squared_error,False,1.37359,10,✅
mean_absolute_error,False,0.473753,50,✅
weighted_mean_squared_error,False,0.656317,50,✅


In [13]:
r.run("register")

2022/11/15 19:49:50 INFO mlflow.recipes.utils.execution: ingest, split, transform, train, evaluate: No changes. Skipping.


2022/11/15 19:49:52 INFO mlflow.recipes.step: Running step register...
Successfully registered model 'taxi_fare_regressor'.
2022/11/15 19:49:52 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: taxi_fare_regressor, version 1
Created version '1' of model 'taxi_fare_regressor'.


In [14]:
r.inspect("train")

Metric,training,validation
root_mean_squared_error,0.383831,2.95898
weighted_mean_squared_error,0.271219,10.5769
example_count,8051.0,959.0
max_error,7.59684,55.7623
mean_absolute_error,0.254683,0.654309
mean_absolute_percentage_error,657852000000.0,0.0426456
mean_on_target,12.3947,13.1517
mean_squared_error,0.147326,8.75558
r2_score,0.998739,0.926321
score,0.998739,0.926321

Name,Type
tpep_pickup_datetime,datetime
tpep_dropoff_datetime,datetime
trip_distance,double
pickup_zip,integer
dropoff_zip,integer

Name,Type
-,"Tensor('float32', (-1,))"

absolute_error,prediction,fare_amount,tpep_pickup_datetime,tpep_dropoff_datetime,trip_distance,pickup_zip,dropoff_zip
7.596836,37.403164,45.0,2016-02-06 23:18:10,2016-02-06 23:35:18,3.51,10013,7302
7.135166,44.864834,52.0,2016-02-26 16:54:41,2016-02-26 17:06:30,4.02,11371,11367
6.019024,45.980976,52.0,2016-01-16 00:12:20,2016-01-16 00:27:06,6.1,11378,10012
4.092111,0.092111,-4.0,2016-02-02 10:48:08,2016-02-02 10:51:28,0.46,10065,10021
2.881069,50.618931,53.5,2016-01-29 04:49:35,2016-01-29 05:07:52,13.0,10011,7201
2.856945,47.143055,50.0,2016-01-19 00:48:47,2016-01-19 01:14:02,9.0,10119,11371
2.822248,13.822248,11.0,2016-02-14 15:37:35,2016-02-14 16:02:09,2.28,10023,10167
2.801239,59.698761,62.5,2016-02-01 10:30:45,2016-02-01 10:56:12,16.1,10019,7114
2.628321,10.128321,7.5,2016-02-05 13:50:35,2016-02-05 14:06:00,1.24,10002,10013
2.613594,53.386406,56.0,2016-02-21 07:02:59,2016-02-21 07:26:25,13.68,10278,7114

Unnamed: 0,Latest
Model Rank,> 0
root_mean_squared_error,2.95898
weighted_mean_squared_error,10.5769
max_error,55.7623
mean_absolute_error,0.654309
mean_absolute_percentage_error,0.0426456
mean_squared_error,8.75558
Run Time,2022-11-15 19:48:02
Run ID,0bf5af4404b549a4a17c5c3497b0a036


In [16]:
training_data = r.get_artifact("training_data")
training_data.describe()

Unnamed: 0,trip_distance,fare_amount,pickup_zip,dropoff_zip
count,8051.0,8051.0,8051.0,8051.0
mean,2.866895,12.394709,10138.588995,10175.038753
std,3.483383,10.809536,340.490906,410.985094
min,0.0,-8.0,7002.0,7002.0
25%,1.0,6.5,10012.0,10013.0
50%,1.7,9.0,10022.0,10023.0
75%,3.06,14.0,10110.0,10119.0
max,30.6,275.0,11436.0,11691.0


In [18]:
trained_model = r.get_artifact("model")
print(trained_model)

mlflow.pyfunc.loaded_model:
  artifact_path: train/model
  flavor: mlflow.sklearn
  run_id: 0bf5af4404b549a4a17c5c3497b0a036

