In [102]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [103]:
import pandas as pd

# MLflow Classification Recipe Notebook

This notebook runs the MLflow Classification Recipe on Databricks and inspects its results. For more information about the MLflow Classification Recipe, including usage examples, see the [Classification Recipe overview documentation](https://mlflow.org/docs/latest/recipes.html#classification-recipe) the [Classification Recipe API documentation](https://mlflow.org/docs/latest/python_api/mlflow.recipes.html#module-mlflow.recipes.classification.v1.recipe).

In [104]:
from mlflow.recipes import Recipe

r = Recipe(profile="local")

2024/12/10 16:02:07 INFO mlflow.recipes.recipe: Creating MLflow Recipe 'Naveen' with profile: 'local'


In [105]:
r.inspect()

In [106]:
r.run("ingest")

2024/12/10 16:02:07 INFO mlflow.recipes.step: Running step ingest...

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 27235.74it/s]
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 8128.50it/s] 


name,type
Pregnant,integer
Glucose,number
Diastolic_BP,number
Skin_Fold,number
Serum_Insulin,number
BMI,number
Diabetes_Pedigree,number
Age,integer
Class,integer

Pregnant,Glucose,Diastolic_BP,Skin_Fold,Serum_Insulin,BMI,Diabetes_Pedigree,Age,Class
1,89.0,66.0,23.0,94.0,28.1,0.167,21,0
0,137.0,40.0,35.0,168.0,43.1,2.288,33,1
3,78.0,50.0,32.0,88.0,31.0,0.248,26,1
2,197.0,70.0,45.0,543.0,30.5,0.158,53,1
1,189.0,60.0,23.0,846.0,30.1,0.398,59,1


In [107]:
r.run("split")

2024/12/10 16:02:08 INFO mlflow.recipes.utils.execution: ingest: No changes. Skipping.


Run MLflow Recipe step: split
2024/12/10 16:02:08 INFO mlflow.recipes.step: Running step split...
  return bound(*args, **kwds)


In [108]:
training_data = r.get_artifact("training_data")
training_data.describe()

Unnamed: 0,Pregnant,Glucose,Diastolic_BP,Skin_Fold,Serum_Insulin,BMI,Diabetes_Pedigree,Age,Class
count,306.0,306.0,306.0,306.0,306.0,306.0,306.0,306.0,306.0
mean,3.173203,121.653595,71.133987,29.179739,150.169935,33.171569,0.526255,30.679739,0.326797
std,3.092996,31.151732,12.524796,10.57884,114.774921,7.151197,0.361407,10.130074,0.469811
min,0.0,56.0,24.0,7.0,14.0,18.2,0.085,21.0,0.0
25%,1.0,98.25,64.0,21.0,74.25,28.4,0.273,23.0,0.0
50%,2.0,117.5,70.0,29.0,120.0,33.2,0.4435,27.0,0.0
75%,5.0,141.75,80.0,36.0,183.75,36.8,0.681,35.0,1.0
max,15.0,198.0,110.0,63.0,744.0,67.1,2.42,81.0,1.0


In [109]:
r.run("transform")

2024/12/10 16:02:09 INFO mlflow.recipes.utils.execution: ingest, split: No changes. Skipping.


Run MLflow Recipe step: transform
2024/12/10 16:02:09 INFO mlflow.recipes.step: Running step transform...


Name,Type
Pregnant,int64
Glucose,float64
Diastolic_BP,float64
Skin_Fold,float64
Serum_Insulin,float64
BMI,float64
Diabetes_Pedigree,float64
Age,int64
Class,int64

Name,Type
Pregnant,int64
Glucose,float64
Diastolic_BP,float64
Skin_Fold,float64
Serum_Insulin,float64
BMI,float64
Diabetes_Pedigree,float64
Class,int64
Age,int64

Pregnant,Glucose,Diastolic_BP,Skin_Fold,Serum_Insulin,BMI,Diabetes_Pedigree,Class,Age
1,89.0,66.0,23.0,94.0,28.1,0.167,0,21
0,137.0,40.0,35.0,168.0,43.1,2.288,1,33
2,197.0,70.0,45.0,543.0,30.5,0.158,1,53
5,166.0,72.0,19.0,175.0,25.8,0.587,1,51
0,118.0,84.0,47.0,230.0,45.8,0.551,1,31


In [110]:
r.run("train")

2024/12/10 16:02:10 INFO mlflow.recipes.utils.execution: ingest, split, transform: No changes. Skipping.


Run MLflow Recipe step: train
2024/12/10 16:02:10 INFO mlflow.recipes.step: Running step train...

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]
Downloading artifacts:  20%|██        | 1/5 [00:00<00:00, 28728.11it/s]
Downloading artifacts:  40%|████      | 2/5 [00:00<00:00, 24105.20it/s]
Downloading artifacts:  60%|██████    | 3/5 [00:00<00:00, 25575.02it/s]
Downloading artifacts:  80%|████████  | 4/5 [00:00<00:00, 24209.55it/s]
Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 26116.46it/s]
Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 21822.60it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]
Downloading artifacts:  20%|██        | 1/5 [00:00<00:00, 19418.07it/s]
Downloading artifacts:  40%|████      | 2/5 [00:00<00:00, 15505.74it/s]
Downloading artifacts:  60%|██████    | 3/5 [00:00<00:00, 5660.33it/s] 
Downloading artifacts:  80%|████████  | 4/5 [00:00<00:00, 5399.81it/s]
Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 5

Metric,training,validation
root_mean_squared_error,3.01493,6.11521
example_count,306.0,38.0
max_error,18.4504,24.745
mean_absolute_error,2.09554,4.06987
mean_absolute_percentage_error,0.0671099,0.123442
mean_on_target,30.6797,30.8421
mean_squared_error,9.08979,37.3958
r2_score,0.911131,0.671895
score,0.911131,0.671895
sum_on_target,9388.0,1172.0

Name,Type
Pregnant,long
Glucose,double
Diastolic_BP,double
Skin_Fold,double
Serum_Insulin,double
BMI,double
Diabetes_Pedigree,double
Class,long

Name,Type
-,"Tensor('float64', (-1,))"

absolute_error,prediction,Age,Pregnant,Glucose,Diastolic_BP,Skin_Fold,Serum_Insulin,BMI,Diabetes_Pedigree,Class
18.450384615384607,62.54961538461539,81,9,134.0,74.0,33.0,60.0,25.9,0.46,0
13.895000000000003,39.105,53,2,197.0,70.0,45.0,543.0,30.5,0.158,1
9.634529291159431,32.36547070884057,42,1,100.0,66.0,29.0,196.0,32.0,0.444,0
9.218368208058251,40.78163179194175,50,1,164.0,82.0,43.0,67.0,32.8,0.341,0
8.899999999999999,49.1,58,0,173.0,78.0,32.0,265.0,46.5,1.159,0
8.408333333333339,52.59166666666666,61,7,142.0,60.0,33.0,190.0,28.8,0.687,0
7.610888213863109,28.38911178613689,36,1,95.0,74.0,21.0,73.0,25.9,0.673,0
7.340000000000003,55.66,63,10,101.0,76.0,48.0,180.0,32.9,0.171,0
7.122857142857143,32.12285714285714,25,0,180.0,78.0,63.0,14.0,59.4,2.42,1
7.109999999999999,30.11,23,6,93.0,50.0,30.0,64.0,28.7,0.356,0

Unnamed: 0,Latest
Model Rank,> 0
root_mean_squared_error,6.11521
max_error,24.745
mean_absolute_error,4.06987
mean_absolute_percentage_error,0.123442
mean_squared_error,37.3958
Run Time,2024-12-10 16:02:11
Run ID,8d0cae743f3e4a6e92deac0979cd0cd6


In [111]:
trained_model = r.get_artifact("model")
print(trained_model)

Downloading artifacts: 100%|██████████| 15/15 [00:00<00:00, 509.52it/s] 

mlflow.pyfunc.loaded_model:
  artifact_path: train/model
  flavor: mlflow.pyfunc.model
  run_id: 8d0cae743f3e4a6e92deac0979cd0cd6






In [112]:
r.run("evaluate")

2024/12/10 16:02:17 INFO mlflow.recipes.utils.execution: ingest, split, transform, train: No changes. Skipping.


Run MLflow Recipe step: evaluate
2024/12/10 16:02:18 INFO mlflow.recipes.step: Running step evaluate...
2024/12/10 16:02:19 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/12/10 16:02:19 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/12/10 16:02:19 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/12/10 16:02:19 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/12/10 16:02:19 INFO mlflow.tracking._tracking_service.client: 🏃 View run spiffy-bug-74 at: http://127.0.0.1:5000/#/experiments/569871241096661020/runs/8d0cae743f3e4a6e92deac0979cd0cd6.
2024/12/10 16:02:19 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/569871241096661020.


Metric,validation,test
root_mean_squared_error,6.11521,7.294343
example_count,38.0,48.0
max_error,24.745,29.114643
mean_absolute_error,4.06987,5.209878
mean_absolute_percentage_error,0.123442,0.156003
mean_on_target,30.8421,32.0625
mean_squared_error,37.3958,53.207446
r2_score,0.671895,0.486829
score,0.671895,0.486829
sum_on_target,1172.0,1539.0

metric,greater_is_better,value,threshold,validated
root_mean_squared_error,False,7.29434,10,✅
