In [9]:
import h2o
h2o.init(ip="localhost", port=54323)

Checking whether there is an H2O instance running at http://localhost:54323. connected.


0,1
H2O_cluster_uptime:,4 hours 11 mins
H2O_cluster_timezone:,Europe/Zurich
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.40.0.1
H2O_cluster_version_age:,22 days
H2O_cluster_name:,H2O_from_python_tahendry_3wssqm
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,29.97 Gb
H2O_cluster_total_cores:,8
H2O_cluster_allowed_cores:,8


In [10]:
# example form here: https://docs.h2o.ai/h2o/latest-stable/h2o-docs/automl.html#training

import h2o
from h2o.automl import H2OAutoML

# Start the H2O cluster (locally)
h2o.init()

# Import a sample binary outcome train/test set into H2O
train = h2o.import_file("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv")
test = h2o.import_file("https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv")

# Identify predictors and response
x = train.columns
y = "response"
x.remove(y)

# For binary classification, response should be a factor
train[y] = train[y].asfactor()
test[y] = test[y].asfactor()

# Run AutoML for 50 base models
aml = H2OAutoML(max_models=50, balance_classes = True, seed=1)
aml.train(x=x, y=y, training_frame=train)

# View the AutoML Leaderboard
lb = aml.leaderboard
lb.head(rows=lb.nrows)  # Print all rows instead of default (10 rows)

Checking whether there is an H2O instance running at http://localhost:54321. connected.


0,1
H2O_cluster_uptime:,4 hours 4 mins
H2O_cluster_timezone:,Europe/Zurich
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.40.0.1
H2O_cluster_version_age:,22 days
H2O_cluster_name:,H2O_from_python_tahendry_i2e7zr
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,29.93 Gb
H2O_cluster_total_cores:,8
H2O_cluster_allowed_cores:,8


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%


model_id,auc,logloss,aucpr,mean_per_class_error,rmse,mse
StackedEnsemble_AllModels_1_AutoML_2_20230303_160844,0.790226,0.548868,0.80763,0.318718,0.431442,0.186143
StackedEnsemble_BestOfFamily_1_AutoML_2_20230303_160844,0.788183,0.551252,0.8051,0.316826,0.432391,0.186962
GBM_1_AutoML_2_20230303_160844,0.782177,0.557734,0.800457,0.328058,0.435367,0.189544
GBM_grid_1_AutoML_2_20230303_160844_model_9,0.779738,0.562504,0.798372,0.333087,0.437156,0.191106
GBM_2_AutoML_2_20230303_160844,0.77971,0.560845,0.798129,0.332059,0.436651,0.190664
GBM_5_AutoML_2_20230303_160844,0.778805,0.561756,0.796787,0.334326,0.437108,0.191063
GBM_grid_1_AutoML_2_20230303_160844_model_2,0.77786,0.564655,0.795359,0.33376,0.438088,0.191921
XGBoost_grid_1_AutoML_2_20230303_160844_model_15,0.777608,0.562586,0.795975,0.333241,0.437559,0.191458
GBM_grid_1_AutoML_2_20230303_160844_model_6,0.777219,0.564288,0.795407,0.324841,0.438074,0.191909
GBM_grid_1_AutoML_2_20230303_160844_model_7,0.776443,0.570148,0.792348,0.333251,0.440213,0.193787


In [13]:
aml

<h2o.automl._estimator.H2OAutoML at 0x7ff0836733d0>

In [14]:
lb

model_id,auc,logloss,aucpr,mean_per_class_error,rmse,mse
StackedEnsemble_AllModels_1_AutoML_2_20230303_160844,0.790226,0.548868,0.80763,0.318718,0.431442,0.186143
StackedEnsemble_BestOfFamily_1_AutoML_2_20230303_160844,0.788183,0.551252,0.8051,0.316826,0.432391,0.186962
GBM_1_AutoML_2_20230303_160844,0.782177,0.557734,0.800457,0.328058,0.435367,0.189544
GBM_grid_1_AutoML_2_20230303_160844_model_9,0.779738,0.562504,0.798372,0.333087,0.437156,0.191106
GBM_2_AutoML_2_20230303_160844,0.77971,0.560845,0.798129,0.332059,0.436651,0.190664
GBM_5_AutoML_2_20230303_160844,0.778805,0.561756,0.796787,0.334326,0.437108,0.191063
GBM_grid_1_AutoML_2_20230303_160844_model_2,0.77786,0.564655,0.795359,0.33376,0.438088,0.191921
XGBoost_grid_1_AutoML_2_20230303_160844_model_15,0.777608,0.562586,0.795975,0.333241,0.437559,0.191458
GBM_grid_1_AutoML_2_20230303_160844_model_6,0.777219,0.564288,0.795407,0.324841,0.438074,0.191909
GBM_grid_1_AutoML_2_20230303_160844_model_7,0.776443,0.570148,0.792348,0.333251,0.440213,0.193787


In [15]:
# To generate predictions on a test set, you can make predictions
# directly on the `H2OAutoML` object ...
preds = aml.predict(test)
print(preds)

# ... or on the leader model object directly
preds_lead = aml.leader.predict(test)
print(preds_lead)

aml.model_performance()

stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%
  predict        p0        p1
        0  0.715834  0.284166
        1  0.634358  0.365642
        1  0.539452  0.460548
        1  0.353233  0.646767
        0  0.707339  0.292661
        1  0.22036   0.77964
        1  0.300842  0.699158
        1  0.592561  0.407439
        1  0.566501  0.433499
        0  0.794187  0.205813
[5000 rows x 3 columns]

stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%
  predict        p0        p1
        0  0.715834  0.284166
        1  0.634358  0.365642
        1  0.539452  0.460548
        1  0.353233  0.646767
        0  0.707339  0.292661
        1  0.22036   0.77964
        1  0.300842  0.699158
        1  0.592561  0.407439
        1  0.566501  0.433499
        0  0.794187  0.205813
[5000 rows x 3 columns]



AttributeError: 'H2OAutoML' object has no attribute 'model_performance'