# Start H2O

Import the h2o Python module and H2OAutoML class and initialize a local H2O cluster.


In [8]:
import h2o
from h2o.automl import H2OAutoML

h2o.init()

Checking whether there is an H2O instance running at http://localhost:54321 . connected.
Attempting to start a local H2O server...
  Java Version: java version "1.8.0_172"; Java(TM) SE Runtime Environment (build 1.8.0_172-b11); Java HotSpot(TM) 64-Bit Server VM (build 25.172-b11, mixed mode)
  Starting server from /Users/me/code/github-repos/automl-fall-school-2022/venv/lib/python3.10/site-packages/h2o/backend/bin/h2o.jar
  Ice root: /var/folders/gj/cm0k4b_s42j30zs376cq_5hh0000gn/T/tmpj1xsz93w
  JVM stdout: /var/folders/gj/cm0k4b_s42j30zs376cq_5hh0000gn/T/tmpj1xsz93w/h2o_me_started_from_python.out
  JVM stderr: /var/folders/gj/cm0k4b_s42j30zs376cq_5hh0000gn/T/tmpj1xsz93w/h2o_me_started_from_python.err
  Server is running at http://127.0.0.1:54325
Connecting to H2O server at http://127.0.0.1:54325 ... successful.


0,1
H2O_cluster_uptime:,12 secs
H2O_cluster_timezone:,America/Los_Angeles
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.38.0.1
H2O_cluster_version_age:,18 days
H2O_cluster_name:,H2O_from_python_me_00sq7w
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.556 Gb
H2O_cluster_total_cores:,0
H2O_cluster_allowed_cores:,0


# Load Data

For the AutoML binary classification demo, we use a subset of the [Product Backorders](https://www.kaggle.com/tiredgeek/predict-bo-trial/data) dataset. The goal here is to predict whether or not a product will be put on backorder status, given a number of product metrics such as current inventory, transit time, demand forecasts and prior sales.

In [13]:
# Use local data file or download from GitHub
import os
data_path = "data/product_backorders.csv"
if os.path.isfile(data_path) is False:
  data_path = "https://github.com/ledell/automl-fall-school-2022/raw/main/data/product_backorders.csv"


# Load data into H2O
df = h2o.import_file(data_path)

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


In [14]:
df.describe()

Unnamed: 0,sku,national_inv,lead_time,in_transit_qty,forecast_3_month,forecast_6_month,forecast_9_month,sales_1_month,sales_3_month,sales_6_month,sales_9_month,min_bank,potential_issue,pieces_past_due,perf_6_month_avg,perf_12_month_avg,local_bo_qty,deck_risk,oe_constraint,ppap_risk,stop_auto_buy,rev_stop,went_on_backorder
type,int,int,int,int,int,int,int,int,int,int,int,int,enum,int,real,real,int,enum,enum,enum,enum,enum,enum
mins,1111620.0,-1440.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,-99.0,-99.0,0.0,,,,,,
mean,2059552.760562641,376.36702881435997,7.706036161335188,48.27234556237863,182.9108276911772,344.7398309977431,497.79242114102766,56.11887891670601,168.53445651603428,333.53219965359784,504.25539285151945,48.84070750013119,,2.3114995013908577,-6.5198336220017845,-6.05393533826694,0.8917755734005144,,,,,,
maxs,3284775.0,730722.0,52.0,170920.0,479808.0,967776.0,1418208.0,186451.0,550609.0,1136154.0,1759152.0,85584.0,,13824.0,1.0,1.0,1440.0,,,,,,
sigma,663337.6456498681,7002.071628662684,6.7786650721241895,1465.9992102068286,4304.865591970627,8406.062155159249,12180.570042918363,1544.2177775482571,4581.340080221506,9294.566153218984,14184.14539565362,968.7738680675267,,110.24106014611976,25.975138766871872,25.18449715003253,23.033345417338797,,,,,,
zeros,0,1858,121,15432,12118,11136,10604,10278,8022,6864,6231,9909,,18601,474,401,18585,,,,,,
missing,0,0,1078,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,1113121.0,0.0,8.0,1.0,6.0,6.0,6.0,0.0,4.0,9.0,12.0,0.0,No,1.0,0.9,0.89,0.0,No,No,No,Yes,No,Yes
1,1113268.0,0.0,8.0,0.0,2.0,3.0,4.0,1.0,2.0,3.0,3.0,0.0,No,0.0,0.96,0.97,0.0,No,No,No,Yes,No,Yes
2,1113874.0,20.0,2.0,0.0,45.0,99.0,153.0,16.0,42.0,80.0,111.0,10.0,No,0.0,0.81,0.88,0.0,No,No,No,Yes,No,Yes


We will notice that the response column, `"went_on_backorder"`, is already encoded as "enum", so there's nothing we need to do here. If it were encoded as a 0/1 "int", then we'd have to convert the column as follows: `df[y] = df[y].asfactor()`

Next, let's identify the response & predictor columns by saving them as `x` and `y`. The `"sku"` column is a unique identifier so we'll want to remove that from the set of our predictors.

In [15]:
y = "went_on_backorder"
x = df.columns
x.remove(y)
x.remove("sku")

In [16]:
df.shape

(19053, 23)

# Run AutoML

Run AutoML, stopping after 10 models. The `max_models` argument specifies the number of individual (or "base") models, and does not include the ensemble models that are trained at subsequent stages of the modeling process and at the end.

In [17]:
aml = H2OAutoML(max_models = 10, seed = 1)
aml.train(x = x, y = y, training_frame = df)

AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%


Unnamed: 0,No,Yes,Error,Rate
No,8661.0,179.0,0.0202,(179.0/8840.0)
Yes,209.0,999.0,0.173,(209.0/1208.0)
Total,8870.0,1178.0,0.0386,(388.0/10048.0)

metric,threshold,value,idx
max f1,0.4110276,0.8373847,183.0
max f2,0.1891488,0.8712295,259.0
max f0point5,0.5789914,0.8817595,136.0
max accuracy,0.4792011,0.9621815,163.0
max precision,0.9862383,1.0,0.0
max recall,0.0059877,1.0,386.0
max specificity,0.9862383,1.0,0.0
max absolute_mcc,0.4110276,0.8155632,183.0
max min_per_class_accuracy,0.1908311,0.9371041,258.0
max mean_per_class_accuracy,0.1891488,0.9392957,259.0

group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
1,0.0100518,0.9479834,8.3178808,8.3178808,1.0,0.9645717,1.0,0.9645717,0.0836093,0.0836093,731.7880795,731.7880795,0.0836093
2,0.020004,0.9238226,8.3178808,8.3178808,1.0,0.936876,1.0,0.9507927,0.0827815,0.1663907,731.7880795,731.7880795,0.1663907
3,0.0300557,0.8958297,8.3178808,8.3178808,1.0,0.9105404,1.0,0.9373309,0.0836093,0.25,731.7880795,731.7880795,0.25
4,0.040008,0.8591309,8.234702,8.2971895,0.99,0.877732,0.9975124,0.9225053,0.0819536,0.3319536,723.4701987,729.7189549,0.3318405
5,0.0500597,0.8189169,8.1531703,8.2682712,0.980198,0.8411089,0.9940358,0.9061613,0.0819536,0.4139073,715.3170284,726.8271168,0.4135679
6,0.1000199,0.525231,6.8431967,7.556443,0.8227092,0.6849037,0.9084577,0.7956426,0.3418874,0.7557947,584.3196749,655.6442951,0.7453875
7,0.1500796,0.2484236,2.9931142,6.0343247,0.359841,0.37331,0.7254642,0.6547717,0.1498344,0.9056291,199.3114163,503.4324661,0.8587966
8,0.2000398,0.1168825,1.2095723,4.8293368,0.1454183,0.1748009,0.580597,0.5348984,0.0604305,0.9660596,20.9572307,382.933676,0.8706976
9,0.3000597,0.0398103,0.2234655,3.2940463,0.0268657,0.0680671,0.3960199,0.379288,0.022351,0.9884106,-77.6534546,229.4046325,0.7824151
10,0.3999801,0.0194523,0.0994169,2.4959851,0.0119522,0.0277594,0.3000746,0.2914714,0.0099338,0.9983444,-90.0583098,149.598513,0.6801317

Unnamed: 0,No,Yes,Error,Rate
No,15925.0,862.0,0.0513,(862.0/16787.0)
Yes,537.0,1729.0,0.237,(537.0/2266.0)
Total,16462.0,2591.0,0.0734,(1399.0/19053.0)

metric,threshold,value,idx
max f1,0.3181446,0.7119621,211.0
max f2,0.1616645,0.7816092,270.0
max f0point5,0.5558429,0.7389698,136.0
max accuracy,0.4848417,0.9331339,155.0
max precision,0.9738037,0.9545455,2.0
max recall,0.000507,1.0,399.0
max specificity,0.9822884,0.9999404,0.0
max absolute_mcc,0.3181446,0.672078,211.0
max min_per_class_accuracy,0.1263262,0.8885447,286.0
max mean_per_class_accuracy,0.1015805,0.8916409,298.0

group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
1,0.0100247,0.9292545,7.5717897,7.5717897,0.9005236,0.9520574,0.9005236,0.9520574,0.0759047,0.0759047,657.178967,657.178967,0.0747728
2,0.0200493,0.8994517,7.5277676,7.5497787,0.895288,0.9137073,0.8979058,0.9328823,0.0754634,0.151368,652.7767637,654.9778654,0.1490448
3,0.0300215,0.866413,7.2133576,7.4380304,0.8578947,0.8821501,0.8846154,0.9160307,0.0719329,0.223301,621.3357644,643.8030416,0.2193694
4,0.0400462,0.8229852,7.3516795,7.4164144,0.8743455,0.8448925,0.8820446,0.8982229,0.0736981,0.2969991,635.1679505,641.6414395,0.2916378
5,0.0500184,0.777435,6.7265666,7.2788792,0.8,0.8012711,0.8656873,0.8788935,0.0670786,0.3640777,572.6566637,627.8879165,0.3564527
6,0.1000367,0.4929586,5.4878337,6.3833565,0.6526758,0.6458881,0.7591815,0.7623908,0.2744925,0.6385702,448.7833747,538.3356456,0.6112276
7,0.1500026,0.2620157,3.1089174,5.2926406,0.3697479,0.3686855,0.6294612,0.6312476,0.1553398,0.79391,210.8917353,429.2640562,0.7308254
8,0.200021,0.1311776,1.7998683,4.4192184,0.2140609,0.188946,0.5255838,0.5206432,0.0900265,0.8839365,79.9868303,341.9218373,0.7762341
9,0.3000052,0.0437492,0.6399949,3.1596976,0.0761155,0.0765543,0.3757873,0.3726395,0.0639894,0.9479259,-36.0005143,215.9697589,0.7353804
10,0.3999895,0.0214805,0.273653,2.4382811,0.0325459,0.0306531,0.2899882,0.2871541,0.027361,0.9752868,-72.6347027,143.8281109,0.6529541

Unnamed: 0,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
accuracy,0.9281002,0.0045972,0.9315069,0.9234412,0.9256390,0.9343315,0.9255826
auc,0.9520687,0.0028361,0.9555078,0.9504733,0.9481458,0.9527548,0.9534617
err,0.0718998,0.0045972,0.0684932,0.0765588,0.0743610,0.0656685,0.0744174
err_count,274.0,18.069311,260.0,291.0,288.0,250.0,281.0
f0point5,0.6916458,0.0214159,0.7142857,0.6737864,0.6767077,0.7158074,0.6776416
f1,0.7147025,0.0186224,0.7352342,0.7045685,0.6889849,0.7288503,0.7158746
f2,0.7397039,0.0230607,0.7574486,0.7382979,0.7017158,0.7423774,0.7586798
lift_top_group,7.582595,0.1574124,7.4867578,7.497041,7.741134,7.424884,7.763158
logloss,0.1680476,0.0035543,0.1630039,0.1711725,0.1715073,0.1662353,0.168319
max_per_class_error,0.2424039,0.0299208,0.2269807,0.2373626,0.2895323,0.2483221,0.2098214


# Leaderboard

Next, we will view the AutoML Leaderboard. Since we did not specify a specific frame (`leaderboard_frame`) in the `H2OAutoML.train()` method for scoring and ranking the models, the AutoML leaderboard uses cross-validation metrics (5-fold CV by default) to rank the models.

A default performance metric for each machine learning task (binary classification, multiclass classification, regression) is specified internally and the leaderboard will be sorted by that metric. In the case of binary classification, the default ranking metric is Area Under the ROC Curve (AUC). In the future, the user will be able to specify any of the H2O metrics so that different metrics can be used to generate rankings on the leaderboard.

The leader model is stored at `aml.leader` and the leaderboard is stored at `aml.leaderboard`.

In [18]:
lb = aml.leaderboard

Now we will view a snapshot of the top models. Here we should see the two Stacked Ensembles at or near the top of the leaderboard. Stacked Ensembles can almost always outperform a single model.

In [19]:
lb.head()

model_id,auc,logloss,aucpr,mean_per_class_error,rmse,mse
StackedEnsemble_AllModels_1_AutoML_2_20221007_163621,0.9521,0.168135,0.753793,0.144165,0.222846,0.0496602
StackedEnsemble_BestOfFamily_1_AutoML_2_20221007_163621,0.951981,0.168935,0.750911,0.137889,0.223316,0.0498701
GBM_4_AutoML_2_20221007_163621,0.948606,0.173961,0.736134,0.158272,0.226527,0.0513145
GBM_1_AutoML_2_20221007_163621,0.946849,0.175838,0.736278,0.161556,0.228859,0.0523764
XGBoost_3_AutoML_2_20221007_163621,0.946818,0.176144,0.7357,0.158623,0.22797,0.0519703
GBM_3_AutoML_2_20221007_163621,0.946027,0.177371,0.732411,0.177962,0.228546,0.0522332
GBM_2_AutoML_2_20221007_163621,0.944746,0.179901,0.72553,0.151993,0.230201,0.0529923
XGBoost_1_AutoML_2_20221007_163621,0.944324,0.18062,0.723453,0.159076,0.229847,0.0528299
XGBoost_2_AutoML_2_20221007_163621,0.943639,0.18085,0.732783,0.167392,0.229929,0.0528675
XRT_1_AutoML_2_20221007_163621,0.942144,0.21219,0.726515,0.159676,0.24522,0.060133


To view the entire leaderboard, specify the rows argument of the `head()` method as the total number of rows:

In [20]:
lb.head(rows=lb.nrows)

model_id,auc,logloss,aucpr,mean_per_class_error,rmse,mse
StackedEnsemble_AllModels_1_AutoML_2_20221007_163621,0.9521,0.168135,0.753793,0.144165,0.222846,0.0496602
StackedEnsemble_BestOfFamily_1_AutoML_2_20221007_163621,0.951981,0.168935,0.750911,0.137889,0.223316,0.0498701
GBM_4_AutoML_2_20221007_163621,0.948606,0.173961,0.736134,0.158272,0.226527,0.0513145
GBM_1_AutoML_2_20221007_163621,0.946849,0.175838,0.736278,0.161556,0.228859,0.0523764
XGBoost_3_AutoML_2_20221007_163621,0.946818,0.176144,0.7357,0.158623,0.22797,0.0519703
GBM_3_AutoML_2_20221007_163621,0.946027,0.177371,0.732411,0.177962,0.228546,0.0522332
GBM_2_AutoML_2_20221007_163621,0.944746,0.179901,0.72553,0.151993,0.230201,0.0529923
XGBoost_1_AutoML_2_20221007_163621,0.944324,0.18062,0.723453,0.159076,0.229847,0.0528299
XGBoost_2_AutoML_2_20221007_163621,0.943639,0.18085,0.732783,0.167392,0.229929,0.0528675
XRT_1_AutoML_2_20221007_163621,0.942144,0.21219,0.726515,0.159676,0.24522,0.060133


By default, the leaderboard will contain model metrics only.  If you want to see more metrics, such as training time or prediction speed, you can view the "extended leaderboard" as follows:

In [21]:
# Get leaderboard with all possible columns
lb = h2o.automl.get_leaderboard(aml, extra_columns = "ALL")
lb

model_id,auc,logloss,aucpr,mean_per_class_error,rmse,mse,training_time_ms,predict_time_per_row_ms,algo
StackedEnsemble_AllModels_1_AutoML_2_20221007_163621,0.9521,0.168135,0.753793,0.144165,0.222846,0.0496602,9798,0.040853,StackedEnsemble
StackedEnsemble_BestOfFamily_1_AutoML_2_20221007_163621,0.951981,0.168935,0.750911,0.137889,0.223316,0.0498701,7966,0.021225,StackedEnsemble
GBM_4_AutoML_2_20221007_163621,0.948606,0.173961,0.736134,0.158272,0.226527,0.0513145,1406,0.008633,GBM
GBM_1_AutoML_2_20221007_163621,0.946849,0.175838,0.736278,0.161556,0.228859,0.0523764,2680,0.011024,GBM
XGBoost_3_AutoML_2_20221007_163621,0.946818,0.176144,0.7357,0.158623,0.22797,0.0519703,1697,0.001782,XGBoost
GBM_3_AutoML_2_20221007_163621,0.946027,0.177371,0.732411,0.177962,0.228546,0.0522332,1391,0.006984,GBM
GBM_2_AutoML_2_20221007_163621,0.944746,0.179901,0.72553,0.151993,0.230201,0.0529923,1652,0.008156,GBM
XGBoost_1_AutoML_2_20221007_163621,0.944324,0.18062,0.723453,0.159076,0.229847,0.0528299,2369,0.001851,XGBoost
XGBoost_2_AutoML_2_20221007_163621,0.943639,0.18085,0.732783,0.167392,0.229929,0.0528675,2121,0.00161,XGBoost
XRT_1_AutoML_2_20221007_163621,0.942144,0.21219,0.726515,0.159676,0.24522,0.060133,1488,0.006935,DRF


# Examine models

To examine the trained models more closely, you can interact with the models, either by model ID, or a convenience function which can grab the best model of each model type (ranked by the default metric, or a metric of your choosing).

Get the best model, or the best model of a certain type:

In [23]:
# Get the best model using the metric
m = aml.leader
# this is equivalent to
m = aml.get_best_model()

# Get the best model using a non-default metric
m = aml.get_best_model(criterion="logloss")

# Get the best XGBoost model using default sort metric
xgb = aml.get_best_model(algorithm="xgboost")

# Get the best XGBoost model, ranked by logloss
xgb = aml.get_best_model(algorithm="xgboost", criterion="logloss")