In [35]:
#Import H2O and other libraries that will be used in this tutorial 
import h2o
import matplotlib as plt

#Import the Estimators
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
from h2o.estimators import H2ORandomForestEstimator
from h2o.estimators.gbm import H2OGradientBoostingEstimator

#Import h2o grid search 
import h2o.grid 
from h2o.grid.grid_search import H2OGridSearch

In [36]:
import h2o
h2o.init(max_mem_size=16)

Checking whether there is an H2O instance running at http://localhost:54321 . connected.


0,1
H2O cluster uptime:,4 days 20 hours 49 mins
H2O cluster timezone:,Etc/UTC
H2O data parsing timezone:,UTC
H2O cluster version:,3.28.0.2
H2O cluster version age:,1 month and 18 days
H2O cluster name:,H2O_from_python_unknownUser_b8im2o
H2O cluster total nodes:,1
H2O cluster free memory:,2.344 Gb
H2O cluster total cores:,4
H2O cluster allowed cores:,4


## Import a sample binary outcome train/test set into H2O
#### https://archive.ics.uci.edu/ml/datasets/HIGGS
#### The data has been produced using Monte Carlo simulations. The first 21 features (columns 2-22) are kinematic properties measured by the particle detectors in the accelerator. The last seven features are functions of the first 21 features; these are high-level features derived by physicists to help discriminate between the two classes. There is an interest in using deep learning methods to obviate the need for physicists to manually develop such features. Target variable is binary, 0 for background noise, 1 for signal

## AutoML
http://docs.h2o.ai/h2o/latest-stable/h2o-r/docs/reference/h2o.automl.html

#### The models are ranked by a default metric based on the problem type (the second column of the leaderboard). In binary classification problems, that metric is AUC, and in multiclass classification problems, the metric is mean per-class error. In regression problems, the default sort metric is deviance.

In [37]:
# Binary classification, AutoML finds the best algorithm and model

from h2o.automl import H2OAutoML, get_leaderboard


train = h2o.import_file("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv")
test = h2o.import_file("https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv")

# Identify predictors and response
x = train.columns
y = "response"
x.remove(y)

# For binary classification, response should be a factor
train[y] = train[y].asfactor()
test[y] = test[y].asfactor()

# Run AutoML for 20 base models (limited to 1 hour max runtime by default)
# max_runtime_secs - maximum runtime for automl, default is set to 1 hour
# balance_classes - Set to True to over or undersample the classes to balance it
# modeling_plan - can pass a list of models to try out, the result of aml.modeling_steps can be passed back to it during initialization
# sort_metric - 'auc', 'aucpr' etc.
# export_checkpoints_dir - provide the name of the folder to checkpoint the model
# include_algos - algorithm to use, "DRF", "DeepLearning", "XGBoost" etc
# exclude_algos - exclude certain algorithms

aml = H2OAutoML(max_models=20, seed=1, project_name="First", max_runtime_secs=1000)
aml.train(x=x, y=y, training_frame=train) # provide data and response column names from the train H20 dataframe

# AutoML Leaderboard
lb = aml.leaderboard

# Optionally edd extra model information to the leaderboard
lb = get_leaderboard(aml, extra_columns='ALL')

# Print all rows (instead of default 10 rows)
lb.head(rows=lb.nrows)


Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
AutoML progress: |████████████████████████████████████████████████████████| 100%


model_id,auc,logloss,aucpr,mean_per_class_error,rmse,mse,training_time_ms,predict_time_per_row_ms
StackedEnsemble_BestOfFamily_AutoML_20200309_235545,0.789356,0.551808,0.805424,0.313837,0.432294,0.186878,4996,0.405899
StackedEnsemble_AllModels_AutoML_20200309_235545,0.789334,0.552098,0.805773,0.318917,0.432399,0.186969,12697,1.16885
XGBoost_3_AutoML_20200309_235545,0.786708,0.555684,0.804735,0.317438,0.433971,0.188331,2490,0.005907
XGBoost_grid__1_AutoML_20200309_235545_model_3,0.785959,0.557668,0.803676,0.324007,0.434596,0.188874,2535,0.005942
XGBoost_grid__1_AutoML_20200309_235545_model_4,0.782749,0.558526,0.800769,0.311874,0.43555,0.189703,2045,0.006156
GBM_5_AutoML_20200309_235545,0.78219,0.558353,0.800234,0.319658,0.435512,0.18967,3448,0.13329
XGBoost_2_AutoML_20200309_235545,0.782155,0.557366,0.801728,0.337128,0.435331,0.189514,6736,0.007512
XGBoost_grid__1_AutoML_20200309_235545_model_1,0.782142,0.560727,0.799441,0.324183,0.436255,0.190319,3587,0.005926
XGBoost_1_AutoML_20200309_235545,0.7819,0.557833,0.801298,0.33284,0.435508,0.189667,4216,0.007356
GBM_2_AutoML_20200309_235545,0.777673,0.562514,0.796181,0.334056,0.437583,0.191479,2406,0.12835




In [55]:
train.describe()

Rows:10000
Cols:29




Unnamed: 0,response,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16,x17,x18,x19,x20,x21,x22,x23,x24,x25,x26,x27,x28
type,enum,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real
mins,,0.2750626504421234,-2.425236463546753,-1.7425082921981812,0.012355248443782328,-1.7437547445297241,0.15948815643787384,-2.9410083293914795,-1.7412374019622803,0.0,0.189988374710083,-2.910175323486328,-1.7423716783523557,0.0,0.26360762119293213,-2.727842330932617,-1.7420687675476072,0.0,0.36535415053367615,-2.496431827545166,-1.7421357631683347,0.0,0.1722409576177597,0.34246698021888733,0.46118336915969854,0.384410560131073,0.08098646253347397,0.38877949118614197,0.4449557662010193
mean,,0.9979236403375864,-0.016680663833495066,-0.0034857744504741277,0.9913848843651824,-0.009822067860461522,0.9920579565405846,-0.0014677957667277976,0.0037509448854136282,1.00493906686306,0.9877106610402465,-0.020984136478627995,-0.00016777858562709903,0.9810776063561439,0.9945834684580565,0.013843917558944673,0.007515283621835988,1.0034907880783084,0.97978388197124,0.0012221027551102396,-0.014788587855815424,0.9991417586565018,1.0291478910103438,1.021455474603176,1.0508769570112229,1.0125336340218787,0.9677125638335943,1.0312243017762897,0.9578644521445038
maxs,,6.695387840270996,2.4299979209899902,1.7432359457015991,5.824007034301758,1.7428184747695923,7.064657211303711,2.9696741104125977,1.741453766822815,2.1730761528015137,5.192512035369873,2.9093241691589355,1.7431747913360596,2.214872121810913,6.523279190063477,2.7272779941558842,1.741774320602417,2.548224449157715,6.067653179168701,2.4955105781555176,1.7428174018859863,3.101961374282837,13.098125457763672,7.3919677734375,3.6822597980499268,6.5831212997436515,8.255083084106445,4.749468803405762,4.3163652420043945
sigma,,0.5749652323165785,1.0029429816464286,1.0108375148072333,0.59546048128496,1.004827635002585,0.47740822779756953,1.0041150858680707,1.0159068205331792,1.0269649157432101,0.4899343832646299,1.009461647840542,1.0053419750816672,1.045674383534585,0.485816821742571,1.0169739644992715,0.9970667134235766,1.1953415446637299,0.4991268289853618,1.0054594863334043,1.0009981151815668,1.3969915502672692,0.6372254799960355,0.36962268165272505,0.1659393548346413,0.40492650011247683,0.523194789843795,0.36622010143683376,0.3133368626304032
zeros,,0,0,0,0,0,0,0,0,4870,0,0,0,5093,0,0,0,5688,0,0,0,6468,0,0,0,0,0,0,0
missing,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,1,0.869293212890625,-0.6350818276405334,0.22569026052951813,0.327470064163208,-0.6899932026863098,0.7542022466659546,-0.24857313930988315,-1.0920639038085938,0.0,1.3749921321868896,-0.6536741852760315,0.9303491115570068,1.1074360609054565,1.138904333114624,-1.5781983137130735,-1.046985387802124,0.0,0.6579295396804808,-0.010454569943249226,-0.0457671694457531,3.101961374282837,1.353760004043579,0.9795631170272827,0.978076159954071,0.9200048446655273,0.7216574549674988,0.9887509346008301,0.8766783475875854
1,1,0.9075421094894409,0.3291472792625427,0.3594118654727936,1.4979698657989502,-0.3130095303058624,1.09553062915802,-0.5575249195098877,-1.5882297754287722,2.1730761528015137,0.8125811815261841,-0.2136419266462326,1.2710145711898804,2.214872121810913,0.4999939501285553,-1.2614318132400513,0.7321561574935913,0.0,0.39870089292526245,-1.138930082321167,-0.0008191101951524615,0.0,0.3022198975086212,0.8330481648445129,0.9856996536254883,0.9780983924865723,0.7797321677207947,0.9923557639122009,0.7983425855636597
2,1,0.7988347411155701,1.4706387519836426,-1.6359747648239136,0.45377317070961,0.4256291687488556,1.104874610900879,1.2823222875595093,1.3816642761230469,0.0,0.8517372012138367,1.540658950805664,-0.8196895122528076,2.214872121810913,0.9934899210929871,0.3560801148414612,-0.2087775468826294,2.548224449157715,1.256954550743103,1.128847599029541,0.9004608392715454,0.0,0.9097532629966736,1.108330488204956,0.9856922030448914,0.9513312578201294,0.8032515048980713,0.8659244179725647,0.7801175713539124


In [39]:
train['response']

response
1
1
1
0
1
0
1
1
1
1




### Look at the object returned by AutoML

In [40]:
help(aml)

Help on H2OAutoML in module h2o.automl.autoh2o object:

class H2OAutoML(h2o.base.Keyed)
 |  Automatic Machine Learning
 |  
 |  The Automatic Machine Learning (AutoML) function automates the supervised machine learning model training process.
 |  The current version of AutoML trains and cross-validates 
 |  a Random Forest (DRF), 
 |  an Extremely-Randomized Forest (DRF/XRT),
 |  a random grid of Generalized Linear Models (GLM)
 |  a random grid of XGBoost (XGBoost),
 |  a random grid of Gradient Boosting Machines (GBM), 
 |  a random grid of Deep Neural Nets (DeepLearning), 
 |  and 2 Stacked Ensembles, one of all the models, and one of only the best models of each kind.
 |  
 |  :examples:
 |  >>> import h2o
 |  >>> from h2o.automl import H2OAutoML
 |  >>> h2o.init()
 |  >>> # Import a sample binary outcome train/test set into H2O
 |  >>> train = h2o.import_file("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv")
 |  >>> test = h2o.import_file("https://s3.amazonaws.com/er

In [56]:
aml.modeling_steps

[{'name': 'XGBoost',
  'steps': [{'id': 'def_1', 'weight': 10},
   {'id': 'def_2', 'weight': 10},
   {'id': 'def_3', 'weight': 10}]},
 {'name': 'GLM', 'steps': [{'id': 'def_1', 'weight': 10}]},
 {'name': 'DRF', 'steps': [{'id': 'def_1', 'weight': 10}]},
 {'name': 'GBM',
  'steps': [{'id': 'def_1', 'weight': 10},
   {'id': 'def_2', 'weight': 10},
   {'id': 'def_3', 'weight': 10},
   {'id': 'def_4', 'weight': 10},
   {'id': 'def_5', 'weight': 10}]},
 {'name': 'DeepLearning', 'steps': [{'id': 'def_1', 'weight': 10}]},
 {'name': 'DRF', 'steps': [{'id': 'XRT', 'weight': 10}]},
 {'name': 'XGBoost', 'steps': [{'id': 'grid_1', 'weight': 100}]},
 {'name': 'GBM', 'steps': [{'id': 'grid_1', 'weight': 60}]},
 {'name': 'DeepLearning',
  'steps': [{'id': 'grid_1', 'weight': 20}, {'id': 'grid_2', 'weight': 20}]},
 {'name': 'StackedEnsemble',
  'steps': [{'id': 'best', 'weight': 10}, {'id': 'all', 'weight': 10}]}]

In [57]:
aml.training_info

{'creation_epoch': '1583798145',
 'start_epoch': '1583798145',
 'stop_epoch': '1583798916',
 'duration_secs': '771'}

### The Leaderboard contains information about all the models that were trained

In [58]:
aml.leaderboard

model_id,auc,logloss,aucpr,mean_per_class_error,rmse,mse
StackedEnsemble_BestOfFamily_AutoML_20200309_235545,0.789356,0.551808,0.805424,0.313837,0.432294,0.186878
StackedEnsemble_AllModels_AutoML_20200309_235545,0.789334,0.552098,0.805773,0.318917,0.432399,0.186969
XGBoost_3_AutoML_20200309_235545,0.786708,0.555684,0.804735,0.317438,0.433971,0.188331
XGBoost_grid__1_AutoML_20200309_235545_model_3,0.785959,0.557668,0.803676,0.324007,0.434596,0.188874
XGBoost_grid__1_AutoML_20200309_235545_model_4,0.782749,0.558526,0.800769,0.311874,0.43555,0.189703
GBM_5_AutoML_20200309_235545,0.78219,0.558353,0.800234,0.319658,0.435512,0.18967
XGBoost_2_AutoML_20200309_235545,0.782155,0.557366,0.801728,0.337128,0.435331,0.189514
XGBoost_grid__1_AutoML_20200309_235545_model_1,0.782142,0.560727,0.799441,0.324183,0.436255,0.190319
XGBoost_1_AutoML_20200309_235545,0.7819,0.557833,0.801298,0.33284,0.435508,0.189667
GBM_2_AutoML_20200309_235545,0.777673,0.562514,0.796181,0.334056,0.437583,0.191479




In [59]:
# Get the leader or the best-performing model
aml.leader

Model Details
H2OStackedEnsembleEstimator :  Stacked Ensemble
Model Key:  StackedEnsemble_BestOfFamily_AutoML_20200309_235545

No model summary for this model

ModelMetricsBinomialGLM: stackedensemble
** Reported on train data. **

MSE: 0.11461026655514583
RMSE: 0.3385413808608127
LogLoss: 0.3819220544808018
Null degrees of freedom: 9999
Residual degrees of freedom: 9993
Null deviance: 13828.113387424273
Residual deviance: 7638.441089616036
AIC: 7652.441089616036
AUC: 0.9389420773713295
AUCPR: 0.9439458143413849
Gini: 0.877884154742659

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.4302687858474688: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,3706.0,999.0,0.2123,(999.0/4705.0)
1,1,459.0,4836.0,0.0867,(459.0/5295.0)
2,Total,4165.0,5835.0,0.1458,(1458.0/10000.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.430269,0.869003,229.0
1,max f2,0.299182,0.918775,282.0
2,max f0point5,0.588462,0.880602,162.0
3,max accuracy,0.468906,0.8573,213.0
4,max precision,0.947606,1.0,0.0
5,max recall,0.153232,1.0,352.0
6,max specificity,0.947606,1.0,0.0
7,max absolute_mcc,0.459177,0.713753,217.0
8,max min_per_class_accuracy,0.499435,0.853985,201.0
9,max mean_per_class_accuracy,0.487095,0.855685,206.0



Gains/Lift Table: Avg response rate: 52.95 %, avg score: 51.57 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.01,0.926374,1.888574,1.888574,1.0,0.933956,1.0,0.933956,0.018886,0.018886,88.857413,88.857413
1,,2,0.02,0.918744,1.888574,1.888574,1.0,0.922639,1.0,0.928297,0.018886,0.037771,88.857413,88.857413
2,,3,0.03,0.913301,1.888574,1.888574,1.0,0.9159,1.0,0.924165,0.018886,0.056657,88.857413,88.857413
3,,4,0.04,0.906326,1.888574,1.888574,1.0,0.909755,1.0,0.920562,0.018886,0.075543,88.857413,88.857413
4,,5,0.05,0.902132,1.888574,1.888574,1.0,0.904423,1.0,0.917335,0.018886,0.094429,88.857413,88.857413
5,,6,0.1,0.877205,1.888574,1.888574,1.0,0.889529,1.0,0.903432,0.094429,0.188857,88.857413,88.857413
6,,7,0.15,0.846018,1.877243,1.884797,0.994,0.862238,0.998,0.8897,0.093862,0.28272,87.724268,88.479698
7,,8,0.2,0.814223,1.862134,1.879131,0.986,0.830782,0.995,0.874971,0.093107,0.375826,86.213409,87.913126
8,,9,0.3,0.7318,1.724268,1.82751,0.913,0.775345,0.967667,0.841762,0.172427,0.548253,72.426818,82.751023
9,,10,0.4,0.637372,1.571294,1.763456,0.832,0.686046,0.93375,0.802833,0.157129,0.705382,57.129367,76.345609




ModelMetricsBinomialGLM: stackedensemble
** Reported on cross-validation data. **

MSE: 0.1868782838512945
RMSE: 0.43229420982855477
LogLoss: 0.5518079637132357
Null degrees of freedom: 9999
Residual degrees of freedom: 9993
Null deviance: 13829.717313823481
Residual deviance: 11036.159274264714
AIC: 11050.159274264714
AUC: 0.7893561888935384
AUCPR: 0.8054244861244769
Gini: 0.5787123777870768

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.3707663702927824: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,2420.0,2285.0,0.4857,(2285.0/4705.0)
1,1,752.0,4543.0,0.142,(752.0/5295.0)
2,Total,3172.0,6828.0,0.3037,(3037.0/10000.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.370766,0.749484,261.0
1,max f2,0.156872,0.860412,357.0
2,max f0point5,0.594584,0.739926,164.0
3,max accuracy,0.525082,0.7148,195.0
4,max precision,0.944007,1.0,0.0
5,max recall,0.064893,1.0,394.0
6,max specificity,0.944007,1.0,0.0
7,max absolute_mcc,0.548529,0.431204,184.0
8,max min_per_class_accuracy,0.526493,0.713126,194.0
9,max mean_per_class_accuracy,0.548529,0.715936,184.0



Gains/Lift Table: Avg response rate: 52.95 %, avg score: 52.95 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.01,0.920645,1.869688,1.869688,0.99,0.929483,0.99,0.929483,0.018697,0.018697,86.968839,86.968839
1,,2,0.02,0.91223,1.756374,1.813031,0.93,0.915982,0.96,0.922732,0.017564,0.036261,75.637394,81.303116
2,,3,0.03,0.90535,1.813031,1.813031,0.96,0.908621,0.96,0.918029,0.01813,0.054391,81.303116,81.303116
3,,4,0.04,0.898451,1.661945,1.77526,0.88,0.901792,0.94,0.913969,0.016619,0.07101,66.194523,77.525968
4,,5,0.05,0.892484,1.794145,1.779037,0.95,0.895495,0.942,0.910274,0.017941,0.088952,79.414542,77.903683
5,,6,0.1,0.862861,1.72238,1.750708,0.912,0.877101,0.927,0.893688,0.086119,0.175071,72.23796,75.070822
6,,7,0.15,0.83,1.627951,1.709789,0.862,0.846485,0.905333,0.877953,0.081398,0.256468,62.79509,70.978911
7,,8,0.2,0.796306,1.507082,1.659112,0.798,0.813035,0.8785,0.861724,0.075354,0.331822,50.708215,65.911237
8,,9,0.3,0.719065,1.414542,1.577589,0.749,0.759122,0.835333,0.827523,0.141454,0.473277,41.454202,57.758892
9,,10,0.4,0.632925,1.199245,1.483003,0.635,0.677859,0.78525,0.790107,0.119924,0.593201,19.924457,48.300283







In [60]:
# Get the performance of the leading model on the test set
aml.leader.model_performance(test)


ModelMetricsBinomialGLM: stackedensemble
** Reported on test data. **

MSE: 0.18647978578882982
RMSE: 0.43183305314534437
LogLoss: 0.5513586076589898
Null degrees of freedom: 4999
Residual degrees of freedom: 4993
Null deviance: 6905.1964032609185
Residual deviance: 5513.586076589898
AIC: 5527.586076589898
AUC: 0.7913170923979713
AUCPR: 0.8104999079432735
Gini: 0.5826341847959426

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.31401700574960095: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,1092.0,1223.0,0.5283,(1223.0/2315.0)
1,1,302.0,2383.0,0.1125,(302.0/2685.0)
2,Total,1394.0,3606.0,0.305,(1525.0/5000.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.314017,0.75759,284.0
1,max f2,0.150604,0.864069,360.0
2,max f0point5,0.600642,0.74908,153.0
3,max accuracy,0.510144,0.7146,192.0
4,max precision,0.943635,1.0,0.0
5,max recall,0.077094,1.0,389.0
6,max specificity,0.943635,1.0,0.0
7,max absolute_mcc,0.538633,0.433793,181.0
8,max min_per_class_accuracy,0.487641,0.712311,202.0
9,max mean_per_class_accuracy,0.528598,0.71709,185.0



Gains/Lift Table: Avg response rate: 53.70 %, avg score: 50.77 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.01,0.917617,1.787709,1.787709,0.96,0.924525,0.96,0.924525,0.017877,0.017877,78.77095,78.77095
1,,2,0.02,0.908409,1.787709,1.787709,0.96,0.912385,0.96,0.918455,0.017877,0.035754,78.77095,78.77095
2,,3,0.03,0.90089,1.824953,1.800124,0.98,0.904846,0.966667,0.913918,0.01825,0.054004,82.495345,80.012415
3,,4,0.04,0.894522,1.750466,1.787709,0.94,0.897743,0.96,0.909875,0.017505,0.071508,75.046555,78.77095
4,,5,0.05,0.886717,1.750466,1.780261,0.94,0.890564,0.956,0.906012,0.017505,0.089013,75.046555,78.026071
5,,6,0.1,0.855146,1.66108,1.72067,0.892,0.872134,0.924,0.889073,0.083054,0.172067,66.108007,72.067039
6,,7,0.15,0.81638,1.564246,1.668529,0.84,0.835638,0.896,0.871262,0.078212,0.250279,56.424581,66.852886
7,,8,0.2,0.77772,1.571695,1.64432,0.844,0.79776,0.883,0.852886,0.078585,0.328864,57.16946,64.43203
8,,9,0.3,0.690086,1.411546,1.566729,0.758,0.733966,0.841333,0.813246,0.141155,0.470019,41.154562,56.672874
9,,10,0.4,0.594116,1.255121,1.488827,0.674,0.642604,0.7995,0.770586,0.125512,0.595531,25.512104,48.882682







### What if you want to select a particular model?

In [61]:
model_id_list = aml.leaderboard['model_id'].as_data_frame()

In [62]:
# Select a model using the model id
model_id = model_id_list['model_id'].loc[1]
model_id_2 = model_id_list['model_id'].loc[2] 

# Get the model using the model_id from the list
h2o.get_model(model_id)

Model Details
H2OStackedEnsembleEstimator :  Stacked Ensemble
Model Key:  StackedEnsemble_AllModels_AutoML_20200309_235545

No model summary for this model

ModelMetricsBinomialGLM: stackedensemble
** Reported on train data. **

MSE: 0.11147227777050864
RMSE: 0.3338746437969027
LogLoss: 0.3757075166789931
Null degrees of freedom: 9999
Residual degrees of freedom: 9985
Null deviance: 13828.113387424273
Residual deviance: 7514.1503335798625
AIC: 7544.1503335798625
AUC: 0.9444303620904368
AUCPR: 0.9495456827779074
Gini: 0.8888607241808737

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.4502823666747347: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,3831.0,874.0,0.1858,(874.0/4705.0)
1,1,478.0,4817.0,0.0903,(478.0/5295.0)
2,Total,4309.0,5691.0,0.1352,(1352.0/10000.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.450282,0.876934,226.0
1,max f2,0.322503,0.923865,278.0
2,max f0point5,0.601357,0.886789,161.0
3,max accuracy,0.490114,0.8653,210.0
4,max precision,0.946113,1.0,0.0
5,max recall,0.152934,1.0,356.0
6,max specificity,0.946113,1.0,0.0
7,max absolute_mcc,0.452916,0.729882,225.0
8,max min_per_class_accuracy,0.506857,0.8644,203.0
9,max mean_per_class_accuracy,0.504325,0.864801,204.0



Gains/Lift Table: Avg response rate: 52.95 %, avg score: 51.77 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.01,0.922281,1.888574,1.888574,1.0,0.929802,1.0,0.929802,0.018886,0.018886,88.857413,88.857413
1,,2,0.02,0.915038,1.888574,1.888574,1.0,0.918705,1.0,0.924254,0.018886,0.037771,88.857413,88.857413
2,,3,0.03,0.909154,1.888574,1.888574,1.0,0.912152,1.0,0.92022,0.018886,0.056657,88.857413,88.857413
3,,4,0.04,0.903272,1.888574,1.888574,1.0,0.906242,1.0,0.916726,0.018886,0.075543,88.857413,88.857413
4,,5,0.05,0.898275,1.888574,1.888574,1.0,0.900821,1.0,0.913545,0.018886,0.094429,88.857413,88.857413
5,,6,0.1,0.874378,1.888574,1.888574,1.0,0.886626,1.0,0.900085,0.094429,0.188857,88.857413,88.857413
6,,7,0.15,0.845745,1.873466,1.883538,0.992,0.860147,0.997333,0.886773,0.093673,0.282531,87.346553,88.353793
7,,8,0.2,0.815116,1.850803,1.875354,0.98,0.830178,0.993,0.872624,0.09254,0.375071,85.080264,87.535411
8,,9,0.3,0.734876,1.739377,1.830028,0.921,0.777239,0.969,0.840829,0.173938,0.549008,73.937677,83.002833
9,,10,0.4,0.643179,1.610954,1.77526,0.853,0.691693,0.94,0.803545,0.161095,0.710104,61.095373,77.525968




ModelMetricsBinomialGLM: stackedensemble
** Reported on cross-validation data. **

MSE: 0.1869692963696618
RMSE: 0.43239946388688066
LogLoss: 0.5520975907496893
Null degrees of freedom: 9999
Residual degrees of freedom: 9987
Null deviance: 13829.717313823481
Residual deviance: 11041.951814993787
AIC: 11067.951814993787
AUC: 0.7893343729522467
AUCPR: 0.8057732518761764
Gini: 0.5786687459044935

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.3525454826028507: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,2310.0,2395.0,0.509,(2395.0/4705.0)
1,1,682.0,4613.0,0.1288,(682.0/5295.0)
2,Total,2992.0,7008.0,0.3077,(3077.0/10000.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.352545,0.749898,270.0
1,max f2,0.164008,0.860749,358.0
2,max f0point5,0.586601,0.739819,165.0
3,max accuracy,0.516152,0.7156,196.0
4,max precision,0.942189,1.0,0.0
5,max recall,0.069217,1.0,394.0
6,max specificity,0.942189,1.0,0.0
7,max absolute_mcc,0.542864,0.430824,184.0
8,max min_per_class_accuracy,0.524504,0.713709,192.0
9,max mean_per_class_accuracy,0.542864,0.715781,184.0



Gains/Lift Table: Avg response rate: 52.95 %, avg score: 52.93 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.01,0.918997,1.869688,1.869688,0.99,0.92718,0.99,0.92718,0.018697,0.018697,86.968839,86.968839
1,,2,0.02,0.91067,1.737488,1.803588,0.92,0.914701,0.955,0.920941,0.017375,0.036072,73.74882,80.358829
2,,3,0.03,0.903384,1.813031,1.806736,0.96,0.907129,0.956667,0.916337,0.01813,0.054202,81.303116,80.673591
3,,4,0.04,0.896745,1.794145,1.803588,0.95,0.900253,0.955,0.912316,0.017941,0.072144,79.414542,80.358829
4,,5,0.05,0.891607,1.699717,1.782814,0.9,0.894397,0.944,0.908732,0.016997,0.089141,69.971671,78.281398
5,,6,0.1,0.863899,1.69594,1.739377,0.898,0.877725,0.921,0.893229,0.084797,0.173938,69.593957,73.937677
6,,7,0.15,0.831145,1.639282,1.706012,0.868,0.848166,0.903333,0.878208,0.081964,0.255902,63.928234,70.601196
7,,8,0.2,0.799196,1.484419,1.650614,0.786,0.815434,0.874,0.862514,0.074221,0.330123,48.441926,65.061379
8,,9,0.3,0.722288,1.423985,1.575071,0.754,0.762369,0.834,0.829133,0.142398,0.472521,42.398489,57.507082
9,,10,0.4,0.636043,1.201133,1.481586,0.636,0.679797,0.7845,0.791799,0.120113,0.592635,20.113314,48.15864







In [48]:
model = h2o.get_model(model_id)
model2 = h2o.get_model(model_id_2)
help(model)

Help on H2OStackedEnsembleEstimator in module h2o.estimators.stackedensemble object:

ModelBase = class H2OStackedEnsembleEstimator(h2o.estimators.estimator_base.H2OEstimator)
 |  Stacked Ensemble
 |  
 |  Builds a stacked ensemble (aka "super learner") machine learning method that uses two
 |  or more H2O learning algorithms to improve predictive performance. It is a loss-based
 |  supervised learning method that finds the optimal combination of a collection of prediction
 |  algorithms.This method supports regression and binary classification.
 |  
 |  :examples:
 |  
 |  >>> import h2o
 |  >>> h2o.init()
 |  >>> from h2o.estimators.random_forest import H2ORandomForestEstimator
 |  >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
 |  >>> from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator
 |  >>> col_types = ["numeric", "numeric", "numeric", "enum",
 |  ...              "enum", "numeric", "numeric", "numeric", "numeric"]
 |  >>> data = h2o.import_fil

In [63]:
import pprint

pprint.pprint(model.actual_params)

{'base_models': [{'URL': '/3/Models/XGBoost_3_AutoML_20200309_235545',
                  '__meta': {'schema_name': 'ModelKeyV3',
                             'schema_type': 'Key<Model>',
                             'schema_version': 3},
                  'name': 'XGBoost_3_AutoML_20200309_235545',
                  'type': 'Key<Model>'},
                 {'URL': '/3/Models/XGBoost_grid__1_AutoML_20200309_235545_model_3',
                  '__meta': {'schema_name': 'ModelKeyV3',
                             'schema_type': 'Key<Model>',
                             'schema_version': 3},
                  'name': 'XGBoost_grid__1_AutoML_20200309_235545_model_3',
                  'type': 'Key<Model>'},
                 {'URL': '/3/Models/XGBoost_grid__1_AutoML_20200309_235545_model_4',
                  '__meta': {'schema_name': 'ModelKeyV3',
                             'schema_type': 'Key<Model>',
                             'schema_version': 3},
                  'name': 'XGBoost_gri

In [64]:
pprint.pprint(model.full_parameters)

{'base_models': {'__meta': {'schema_name': 'ModelParameterSchemaV3',
                            'schema_type': 'Iced',
                            'schema_version': 3},
                 'actual_value': [{'URL': '/3/Models/XGBoost_3_AutoML_20200309_235545',
                                   '__meta': {'schema_name': 'ModelKeyV3',
                                              'schema_type': 'Key<Model>',
                                              'schema_version': 3},
                                   'name': 'XGBoost_3_AutoML_20200309_235545',
                                   'type': 'Key<Model>'},
                                  {'URL': '/3/Models/XGBoost_grid__1_AutoML_20200309_235545_model_3',
                                   '__meta': {'schema_name': 'ModelKeyV3',
                                              'schema_type': 'Key<Model>',
                                              'schema_version': 3},
                                   'name': 'XGBoost_grid__1_AutoML_

### Save model

In [65]:
# For MOJO only these are supported
# GBM (Gradient Boosting Machines)
# DRF (Distributed Random Forest)
# IRF (Isolation Random Forest)
# GLM (Generalized Linear Model)
# XGBoost

model2.download_mojo() # Won't work for StackedEnsemble

'/home/ubuntu/keras/XGBoost_3_AutoML_20200309_235545.zip'

In [67]:
h2o.save_model(model, 'saved2')

'/home/ubuntu/keras/saved2/StackedEnsemble_AllModels_AutoML_20200309_235545'

In [71]:
model_loaded = h2o.load_model('/home/ubuntu/keras/saved/StackedEnsemble_BestOfFamily_AutoML_20200304_162954')
model_loaded.predict(test_data=test)

stackedensemble prediction progress: |████████████████████████████████████| 100%


predict,p0,p1
0,0.742459,0.257541
0,0.66473,0.33527
1,0.636464,0.363536
1,0.353159,0.646841
0,0.702917,0.297083
1,0.247771,0.752229
1,0.258367,0.741633
1,0.626536,0.373464
1,0.631361,0.368639
0,0.752502,0.247498




In [72]:
mojo_loaded = h2o.import_mojo('XGBoost_3_AutoML_20200304_162954.zip')
mojo_loaded.predict(test_data=test)

generic Model Build progress: |███████████████████████████████████████████| 100%
Model Details
H2OGenericEstimator :  Import MOJO Model
Model Key:  Generic_model_python_1583377547966_3549


Model Summary: 


Unnamed: 0,Unnamed: 1,number_of_trees
0,,113.0




ModelMetricsBinomialGeneric: generic
** Reported on train data. **

MSE: 0.15625625666093138
RMSE: 0.39529262156145967
LogLoss: 0.48291363480005595
Mean Per-Class Error: 0.21430088136804226
AUC: 0.870783778332375
AUCPR: 0.8796833496562693
Gini: 0.7415675566647499

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.43120504220326744: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,3067.0,1638.0,0.3481,(1638.0/4705.0)
1,1,581.0,4714.0,0.1097,(581.0/5295.0)
2,Total,3648.0,6352.0,0.2219,(2219.0/10000.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.431205,0.809479,239.0
1,max f2,0.314405,0.882383,296.0
2,max f0point5,0.592485,0.813927,163.0
3,max accuracy,0.502079,0.7871,206.0
4,max precision,0.97314,1.0,0.0
5,max recall,0.109858,1.0,376.0
6,max specificity,0.97314,1.0,0.0
7,max absolute_mcc,0.502079,0.572293,206.0
8,max min_per_class_accuracy,0.52083,0.783947,197.0
9,max mean_per_class_accuracy,0.502079,0.785699,206.0



Gains/Lift Table: Avg response rate: 52.95 %, avg score: 52.99 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.01,0.924895,1.888574,1.888574,1.0,0.940394,1.0,0.940394,0.018886,0.018886,88.857413,88.857413
1,,2,0.02,0.910696,1.869688,1.879131,0.99,0.917663,0.995,0.929028,0.018697,0.037583,86.968839,87.913126
2,,3,0.03,0.899943,1.869688,1.875984,0.99,0.904679,0.993333,0.920912,0.018697,0.05628,86.968839,87.598363
3,,4,0.04,0.89005,1.869688,1.87441,0.99,0.894837,0.9925,0.914393,0.018697,0.074976,86.968839,87.440982
4,,5,0.05,0.88028,1.869688,1.873466,0.99,0.885259,0.992,0.908566,0.018697,0.093673,86.968839,87.346553
5,,6,0.1,0.836625,1.763928,1.818697,0.934,0.858198,0.963,0.883382,0.088196,0.18187,76.392823,81.869688
6,,7,0.15,0.792427,1.794145,1.810513,0.95,0.814465,0.958667,0.86041,0.089707,0.271577,79.414542,81.051306
7,,8,0.2,0.756815,1.654391,1.771483,0.876,0.775439,0.938,0.839167,0.08272,0.354297,65.439093,77.148253
8,,9,0.3,0.682644,1.582625,1.70853,0.838,0.719167,0.904667,0.799167,0.158263,0.512559,58.262512,70.853006
9,,10,0.4,0.607911,1.397545,1.630784,0.74,0.645727,0.8635,0.760807,0.139754,0.652314,39.754485,63.078376




ModelMetricsBinomialGeneric: generic
** Reported on cross-validation data. **

MSE: 0.18833066427664727
RMSE: 0.4339708103970211
LogLoss: 0.5556840365368239
Mean Per-Class Error: 0.284721314897157
AUC: 0.7867075489779923
AUCPR: 0.8047354058317235
Gini: 0.5734150979559847

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.404884185642004: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,2395.0,2310.0,0.491,(2310.0/4705.0)
1,1,762.0,4533.0,0.1439,(762.0/5295.0)
2,Total,3157.0,6843.0,0.3072,(3072.0/10000.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.404884,0.746911,257.0
1,max f2,0.20319,0.860697,347.0
2,max f0point5,0.578521,0.740008,174.0
3,max accuracy,0.555247,0.7126,185.0
4,max precision,0.969054,1.0,0.0
5,max recall,0.063909,1.0,390.0
6,max specificity,0.969054,1.0,0.0
7,max absolute_mcc,0.555247,0.430729,185.0
8,max min_per_class_accuracy,0.520935,0.709033,201.0
9,max mean_per_class_accuracy,0.555247,0.715279,185.0



Gains/Lift Table: Avg response rate: 52.95 %, avg score: 52.93 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.01,0.919137,1.869688,1.869688,0.99,0.935129,0.99,0.935129,0.018697,0.018697,86.968839,86.968839
1,,2,0.02,0.903936,1.831917,1.850803,0.97,0.91145,0.98,0.923289,0.018319,0.037016,83.19169,85.080264
2,,3,0.03,0.89169,1.794145,1.831917,0.95,0.898037,0.97,0.914872,0.017941,0.054958,79.414542,83.19169
3,,4,0.04,0.881699,1.718602,1.803588,0.91,0.886847,0.955,0.907866,0.017186,0.072144,71.860246,80.358829
4,,5,0.05,0.873016,1.718602,1.786591,0.91,0.876987,0.946,0.90169,0.017186,0.08933,71.860246,78.659112
5,,6,0.1,0.8285,1.699717,1.743154,0.9,0.850286,0.923,0.875988,0.084986,0.174315,69.971671,74.315392
6,,7,0.15,0.78604,1.631728,1.706012,0.864,0.805938,0.903333,0.852638,0.081586,0.255902,63.172805,70.601196
7,,8,0.2,0.747704,1.495751,1.653447,0.792,0.766231,0.8755,0.831036,0.074788,0.330689,49.575071,65.344665
8,,9,0.3,0.675239,1.391879,1.566257,0.737,0.711246,0.829333,0.791106,0.139188,0.469877,39.187913,56.625748
9,,10,0.4,0.604399,1.227573,1.481586,0.65,0.639951,0.7845,0.753317,0.122757,0.592635,22.757318,48.15864




Cross-Validation Metrics Summary: 


Unnamed: 0,Unnamed: 1,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
0,accuracy,0.6844,0.0128179565,0.691,0.6875,0.6625,0.6955,0.6855
1,auc,0.78686744,0.0067041004,0.7970181,0.7880977,0.7784664,0.7850508,0.7857043
2,aucpr,0.80503035,0.0087537365,0.81653607,0.80501264,0.80183905,0.8089297,0.7928344
3,err,0.3156,0.0128179565,0.309,0.3125,0.3375,0.3045,0.3145
4,err_count,631.2,25.635912,618.0,625.0,675.0,609.0,629.0
5,f0point5,0.68398035,0.011940547,0.6882146,0.6868898,0.67016554,0.7001769,0.6744548
6,f1,0.7498648,0.010262782,0.75240386,0.75915223,0.74690664,0.7572738,0.73358744
7,f2,0.83005923,0.017483376,0.8297985,0.84840655,0.8434959,0.8245096,0.80408543
8,lift_top_group,1.8711896,0.0767502,1.8975332,1.7757009,1.8535681,1.8450185,1.9841269
9,logloss,0.55568403,0.006567608,0.545614,0.5530586,0.5623815,0.5578491,0.55951697



See the whole table with table.as_data_frame()

Scoring History: 


Unnamed: 0,Unnamed: 1,timestamp,duration,number_of_trees,training_rmse,training_logloss,training_auc,training_pr_auc,training_lift,training_classification_error
0,,2020-03-04 16:32:21,31.776 sec,0.0,0.5,0.693147,0.5,0.0,1.0,0.4705
1,,2020-03-04 16:32:21,32.107 sec,5.0,0.482614,0.658839,0.786043,0.795905,1.780213,0.3069
2,,2020-03-04 16:32:21,32.234 sec,10.0,0.46973,0.633733,0.798669,0.809335,1.76729,0.2921
3,,2020-03-04 16:32:21,32.333 sec,15.0,0.458053,0.610914,0.806382,0.815637,1.813031,0.275
4,,2020-03-04 16:32:22,32.458 sec,20.0,0.449562,0.594067,0.811875,0.820995,1.869875,0.2741
5,,2020-03-04 16:32:22,32.617 sec,25.0,0.442306,0.579416,0.81583,0.825379,1.869688,0.2765
6,,2020-03-04 16:32:22,32.749 sec,30.0,0.436562,0.567691,0.821003,0.829769,1.888574,0.2729
7,,2020-03-04 16:32:22,32.893 sec,35.0,0.432161,0.558675,0.82532,0.835295,1.888574,0.2619
8,,2020-03-04 16:32:22,33.054 sec,40.0,0.428006,0.55005,0.829387,0.839608,1.888574,0.2597
9,,2020-03-04 16:32:22,33.182 sec,45.0,0.424485,0.542691,0.833368,0.843841,1.888574,0.2538



See the whole table with table.as_data_frame()

Variable Importances: 


Unnamed: 0,variable,relative_importance,scaled_importance,percentage
0,x26,8278.851562,1.0,0.217662
1,x28,4533.871582,0.547645,0.119201
2,x27,3890.611328,0.469946,0.102289
3,x25,3153.177002,0.380871,0.082901
4,x6,3151.825195,0.380708,0.082866
5,x23,2836.803467,0.342657,0.074583
6,x1,1661.282593,0.200666,0.043677
7,x4,1467.69458,0.177282,0.038588
8,x10,1120.439209,0.135338,0.029458
9,x22,1103.762451,0.133323,0.029019



See the whole table with table.as_data_frame()

generic prediction progress: |████████████████████████████████████████████| 100%


predict,p0,p1
0,0.746806,0.253194
0,0.63423,0.36577
0,0.631137,0.368863
1,0.359113,0.640887
0,0.676197,0.323803
1,0.314696,0.685304
1,0.337244,0.662756
0,0.560475,0.439525
0,0.602084,0.397916
0,0.671376,0.328624




### Modify the parameters of the AutoML object

In [73]:
# Binary classification, AutoML finds the best algorithm and model

from h2o.automl import H2OAutoML, get_leaderboard

# Import a sample binary outcome train/test set into H2O
train = h2o.import_file("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv")
test = h2o.import_file("https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv")

# Identify predictors and response
x = train.columns
y = "response"
x.remove(y)
# For binary classification, response should be a factor
train[y] = train[y].asfactor()
test[y] = test[y].asfactor()

# Run AutoML for 20 base models (limited to 1 hour max runtime by default)
# max_runtime_secs - maximum runtime for automl
# balance_classes - Set to True to over or undersample the classes to balance it
# modeling_plan - can pass a list of models to try out, the result of aml.modeling_plan can be passed back to it during initialization
# sort_metric - 'auc', 'aucpr' etc.
# export_checkpoints_dir - provide the name of the folder to checkpoint the model
# include_algos - algorithm to use, "DRF", "DeepLearning", "XGBoost" etc
# exclude_algos - exclude certain algorithms

# Pass the modeling plan from the last execution, set maximum runtime
aml = H2OAutoML(max_models=20, seed=1, 
                max_runtime_secs=100,
                balance_classes=True, # Usually a good idea to set this to True
                modeling_plan=aml.modeling_steps, # Modeling plan from the previous run
                export_checkpoints_dir='/home/ubuntu/H2Omodels' # The models created get saved to this folder
               )
aml.train(x=x, y=y, training_frame=train) # provide data and response column names from the train H20 dataframe

# AutoML Leaderboard, H20 dataframe
lb = aml.leaderboard

# Optionally edd extra model information to the leaderboard
lb = get_leaderboard(aml, extra_columns='ALL')

# Print all rows (instead of default 10 rows)
lb.head(rows=lb.nrows)


Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
AutoML progress: |████████████████████████████████████████████████████████| 100%


model_id,auc,logloss,aucpr,mean_per_class_error,rmse,mse,training_time_ms,predict_time_per_row_ms
StackedEnsemble_AllModels_AutoML_20200310_003256,0.788414,0.553296,0.805908,0.326925,0.433016,0.187502,851,0.022565
StackedEnsemble_BestOfFamily_AutoML_20200310_003256,0.786945,0.554656,0.802356,0.310221,0.43359,0.188,605,0.008582
XGBoost_3_AutoML_20200310_003256,0.786708,0.555684,0.804735,0.317438,0.433971,0.188331,2357,0.007846
XGBoost_2_AutoML_20200310_003256,0.782155,0.557366,0.801728,0.337128,0.435331,0.189514,7056,0.008772
XGBoost_1_AutoML_20200310_003256,0.7819,0.557833,0.801298,0.33284,0.435508,0.189667,4220,0.009546
GLM_1_AutoML_20200310_003256,0.682648,0.63852,0.680344,0.397234,0.472683,0.223429,535,0.00398




In [74]:
# Sort the leaderboard by a certain column
lb.sort(by='training_time_ms').head(rows=lb.nrows)

model_id,auc,logloss,aucpr,mean_per_class_error,rmse,mse,training_time_ms,predict_time_per_row_ms
GLM_1_AutoML_20200310_003256,0.682648,0.63852,0.680344,0.397234,0.472683,0.223429,535,0.00398
StackedEnsemble_BestOfFamily_AutoML_20200310_003256,0.786945,0.554656,0.802356,0.310221,0.43359,0.188,605,0.008582
StackedEnsemble_AllModels_AutoML_20200310_003256,0.788414,0.553296,0.805908,0.326925,0.433016,0.187502,851,0.022565
XGBoost_3_AutoML_20200310_003256,0.786708,0.555684,0.804735,0.317438,0.433971,0.188331,2357,0.007846
XGBoost_1_AutoML_20200310_003256,0.7819,0.557833,0.801298,0.33284,0.435508,0.189667,4220,0.009546
XGBoost_2_AutoML_20200310_003256,0.782155,0.557366,0.801728,0.337128,0.435331,0.189514,7056,0.008772




### Look at the modeling plan passed to AutoML and what was used

In [75]:
# Modeling plan passed to it from the previous execution
aml.modeling_plan

[{'name': 'XGBoost',
  'steps': [{'id': 'def_1', 'weight': 10},
   {'id': 'def_2', 'weight': 10},
   {'id': 'def_3', 'weight': 10}]},
 {'name': 'GLM', 'steps': [{'id': 'def_1', 'weight': 10}]},
 {'name': 'DRF', 'steps': [{'id': 'def_1', 'weight': 10}]},
 {'name': 'GBM',
  'steps': [{'id': 'def_1', 'weight': 10},
   {'id': 'def_2', 'weight': 10},
   {'id': 'def_3', 'weight': 10},
   {'id': 'def_4', 'weight': 10},
   {'id': 'def_5', 'weight': 10}]},
 {'name': 'DeepLearning', 'steps': [{'id': 'def_1', 'weight': 10}]},
 {'name': 'DRF', 'steps': [{'id': 'XRT', 'weight': 10}]},
 {'name': 'XGBoost', 'steps': [{'id': 'grid_1', 'weight': 100}]},
 {'name': 'GBM', 'steps': [{'id': 'grid_1', 'weight': 60}]},
 {'name': 'DeepLearning',
  'steps': [{'id': 'grid_1', 'weight': 20}, {'id': 'grid_2', 'weight': 20}]},
 {'name': 'StackedEnsemble',
  'steps': [{'id': 'best', 'weight': 10}, {'id': 'all', 'weight': 10}]}]

In [76]:
# The mpodeling steps that were actually executed
aml.modeling_steps

[{'name': 'XGBoost',
  'steps': [{'id': 'def_1', 'weight': 10},
   {'id': 'def_2', 'weight': 10},
   {'id': 'def_3', 'weight': 10}]},
 {'name': 'GLM', 'steps': [{'id': 'def_1', 'weight': 10}]},
 {'name': 'DRF', 'steps': [{'id': 'def_1', 'weight': 10}]},
 {'name': 'StackedEnsemble',
  'steps': [{'id': 'best', 'weight': 10}, {'id': 'all', 'weight': 10}]}]

In [77]:
lb.summary()

Unnamed: 0,model_id,auc,logloss,aucpr,mean_per_class_error,rmse,mse,training_time_ms,predict_time_per_row_ms
type,string,real,real,real,real,real,real,int,real
mins,,0.6826480980292398,0.5532956484799175,0.6803442006197458,0.310221280276643,0.4330155490191371,0.1875024656923447,535.0,0.00398
mean,,0.7681282912485027,0.5695590638398027,0.7827284328198147,0.33696435625746557,0.44068304991228224,0.1944071438845845,2604.0,0.010215166666666668
maxs,,0.7884140493056329,0.6385204577838607,0.8059082181349254,0.39723407180394954,0.4726827446476101,0.22342897708759785,7056.0,0.022565
sigma,,0.04196198639558059,0.03382596643575538,0.05019008460698926,0.03113431024719494,0.015707071194597355,0.014243104827651816,2601.635639362284,0.006359345639817565
zeros,0,0,0,0,0,0,0,0,0
missing,0,0,0,0,0,0,0,0,0
0,StackedEnsemble_AllModels_AutoML_20200310_003256,0.7884140493056329,0.5532956484799175,0.8059082181349254,0.32692502601556017,0.4330155490191371,0.1875024656923447,851.0,0.022565
1,StackedEnsemble_BestOfFamily_AutoML_20200310_003256,0.7869448751102589,0.5546559654467973,0.8023559568635561,0.310221280276643,0.43358987180047975,0.18800017692795645,605.0,0.008582
2,XGBoost_3_AutoML_20200310_003256,0.7867075489779923,0.5556840365368239,0.8047354058317235,0.3174382023824935,0.4339708103970211,0.18833066427664727,2357.0,0.007846


### Continue the AutoML run on top of the first one (22 models). The new one now has 24 models

In [78]:
# Run AutoML on top of the first run

from h2o.automl import H2OAutoML, get_leaderboard

# Import a sample binary outcome train/test set into H2O
train = h2o.import_file("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv")
test = h2o.import_file("https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv")

# Identify predictors and response
x = train.columns
y = "response"
x.remove(y)
# For binary classification, response should be a factor
train[y] = train[y].asfactor()
test[y] = test[y].asfactor()

# Run AutoML for 20 base models (limited to 1 hour max runtime by default)
# max_runtime_secs - maximum runtime for automl
# balance_classes - Set to True to over or undersample the classes to balance it
# modeling_plan - can pass a list of models to try out, the result of aml.modeling_plan can be passed back to it during initialization
# sort_metric - 'auc', 'aucpr' etc.
# export_checkpoints_dir - provide the name of the folder to checkpoint the model
# include_algos - algorithm to use, "DRF", "DeepLearning", "XGBoost" etc
# exclude_algos - exclude certain algorithms

# Pass the modeling plan from the last execution, set maximum runtime
aml = H2OAutoML(max_models=20, seed=1, 
                project_name="First", # Setting the project to 'First' from the first run, so it can start from there
                max_runtime_secs=100,
                balance_classes=True, # Usually a good idea to set this to True
                modeling_plan=aml.modeling_steps,
                export_checkpoints_dir='/home/ubuntu/H2Omodels' # The models created get saved to this folder
               )
aml.train(x=x, y=y, training_frame=train) # provide data and response column names from the train H20 dataframe

# AutoML Leaderboard, H20 dataframe
lb = aml.leaderboard

# Optionally edd extra model information to the leaderboard
lb = get_leaderboard(aml, extra_columns='ALL')

# Print all rows (instead of default 10 rows)
lb.head(rows=lb.nrows)


Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
AutoML progress: |
00:35:00.697: New models will be added to existing leaderboard First@@response (leaderboard frame=null) with already 22 models.

████████████████████████████████████████████████████████| 100%

00:36:43.52: StackedEnsemble_BestOfFamily_AutoML_20200310_003500 [StackedEnsemble best (built using top model from each algorithm type)] failed: water.exceptions.H2OIllegalArgumentException: Failed to find the xval predictions frame. . .  Looks like keep_cross_validation_predictions wasn't set when building the models, or the frame was deleted.
00:36:44.65: StackedEnsemble_AllModels_AutoML_20200310_003500 [StackedEnsemble all (built using all AutoML models)] failed: water.exceptions.H2OIllegalArgumentException: Failed to find the xval predictions frame. . .  Looks like keep_cross_validation_predictions wasn't set when 

model_id,auc,logloss,aucpr,mean_per_class_error,rmse,mse,training_time_ms,predict_time_per_row_ms
StackedEnsemble_BestOfFamily_AutoML_20200309_235545,0.789356,0.551808,0.805424,0.313837,0.432294,0.186878,4996,0.405899
StackedEnsemble_AllModels_AutoML_20200309_235545,0.789334,0.552098,0.805773,0.318917,0.432399,0.186969,12697,1.16885
XGBoost_3_AutoML_20200309_235545,0.786708,0.555684,0.804735,0.317438,0.433971,0.188331,2490,0.005907
XGBoost_3_AutoML_20200310_003500,0.786708,0.555684,0.804735,0.317438,0.433971,0.188331,2297,0.008706
XGBoost_grid__1_AutoML_20200309_235545_model_3,0.785959,0.557668,0.803676,0.324007,0.434596,0.188874,2535,0.005942
XGBoost_grid__1_AutoML_20200309_235545_model_4,0.782749,0.558526,0.800769,0.311874,0.43555,0.189703,2045,0.006156
GBM_5_AutoML_20200309_235545,0.78219,0.558353,0.800234,0.319658,0.435512,0.18967,3448,0.13329
XGBoost_2_AutoML_20200309_235545,0.782155,0.557366,0.801728,0.337128,0.435331,0.189514,6736,0.007512
XGBoost_2_AutoML_20200310_003500,0.782155,0.557366,0.801728,0.337128,0.435331,0.189514,5708,0.01135
XGBoost_grid__1_AutoML_20200309_235545_model_1,0.782142,0.560727,0.799441,0.324183,0.436255,0.190319,3587,0.005926




### Metalearners are ensembles trained using strong and diverse learners, also called superlearning or stacking
### Different from bagging and boosting because you use strong learners in metalearning instead of weak learners in bagging or boosting

In [79]:
metalearner = model.metalearner()
metalearner

{'__meta': {'schema_version': 3,
  'schema_name': 'ModelKeyV3',
  'schema_type': 'Key<Model>'},
 'name': 'metalearner_AUTO_StackedEnsemble_AllModels_AutoML_20200309_235545',
 'type': 'Key<Model>',
 'URL': '/3/Models/metalearner_AUTO_StackedEnsemble_AllModels_AutoML_20200309_235545'}

In [80]:
meta_model = h2o.get_model(metalearner['name'])
meta_model.coef()

{'Intercept': -3.162261092902133,
 'XGBoost_3_AutoML_20200309_235545': 1.5719912858889982,
 'XGBoost_grid__1_AutoML_20200309_235545_model_3': 0.7765396869087495,
 'XGBoost_grid__1_AutoML_20200309_235545_model_4': 0.14267664180604744,
 'GBM_5_AutoML_20200309_235545': 0.37712755115500585,
 'XGBoost_2_AutoML_20200309_235545': 0.30453512115670683,
 'XGBoost_grid__1_AutoML_20200309_235545_model_1': 0.0,
 'XGBoost_1_AutoML_20200309_235545': 0.43505747463915406,
 'GBM_2_AutoML_20200309_235545': 0.0,
 'GBM_1_AutoML_20200309_235545': 0.0,
 'GBM_3_AutoML_20200309_235545': 0.2073317493062996,
 'GBM_grid__1_AutoML_20200309_235545_model_1': 0.31258789200544834,
 'GBM_4_AutoML_20200309_235545': 0.19393158813130681,
 'XGBoost_grid__1_AutoML_20200309_235545_model_2': 0.0,
 'DRF_1_AutoML_20200309_235545': 0.39763322620070596,
 'XRT_1_AutoML_20200309_235545': 0.0,
 'GBM_grid__1_AutoML_20200309_235545_model_2': 0.13964863266626337,
 'DeepLearning_grid__2_AutoML_20200309_235545_model_1': 0.896144727160026

### Train using only family of algorithms, here that is Deep Learning

In [81]:
# Run only the DL algorithm
from h2o.automl import H2OAutoML, get_leaderboard

# Import a sample binary outcome train/test set into H2O
train = h2o.import_file("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv")
test = h2o.import_file("https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv")

# Identify predictors and response
x = train.columns
y = "response"
x.remove(y)
# For binary classification, response should be a factor
train[y] = train[y].asfactor()
test[y] = test[y].asfactor()

# Run AutoML for 20 base models (limited to 1 hour max runtime by default)
# max_runtime_secs - maximum runtime for automl
# balance_classes - Set to True to over or undersample the classes to balance it
# modeling_plan - can pass a list of models to try out, the result of aml.modeling_plan can be passed back to it during initialization
# sort_metric - 'auc', 'aucpr' etc.
# export_checkpoints_dir - provide the name of the folder to checkpoint the model
# include_algos - algorithm to use, "DRF", "DeepLearning", "XGBoost" etc
# exclude_algos - exclude certain algorithms

# Pass the modeling plan from the last execution, set maximum runtime
aml = H2OAutoML(max_models=20, seed=1, 
                max_runtime_secs=500,
                balance_classes=True, # Usually a good idea to set this to True
                include_algos=['DeepLearning'],
                export_checkpoints_dir='/home/ubuntu/H2Omodels' # The models created get saved to this folder
               )
aml.train(x=x, y=y, training_frame=train) # provide data and response column names from the train H20 dataframe

# AutoML Leaderboard, H20 dataframe
lb = aml.leaderboard

# Optionally edd extra model information to the leaderboard
lb = get_leaderboard(aml, extra_columns='ALL')

# Print all rows (instead of default 10 rows)
lb.head(rows=lb.nrows)


Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
AutoML progress: |████████████████████████████████████████████████████████| 100%


model_id,auc,logloss,aucpr,mean_per_class_error,rmse,mse,training_time_ms,predict_time_per_row_ms
DeepLearning_1_AutoML_20200310_003705,0.696432,0.635375,0.697367,0.386452,0.470137,0.221029,1322,0.008701
DeepLearning_grid__1_AutoML_20200310_003705_model_1,0.670126,0.688284,0.675721,0.431458,0.488227,0.238366,39592,0.023208
DeepLearning_grid__2_AutoML_20200310_003705_model_1,0.629596,0.99175,0.628609,0.457179,0.547097,0.299316,28904,0.18481




In [82]:
aml.leader

Model Details
H2ODeepLearningEstimator :  Deep Learning
Model Key:  DeepLearning_1_AutoML_20200310_003705


Status of Neuron Layers: predicting response, 2-class classification, bernoulli distribution, CrossEntropy loss, 532 weights/biases, 15.7 KB, 121,095 training samples, mini-batch size 1


Unnamed: 0,Unnamed: 1,layer,units,type,dropout,l1,l2,mean_rate,rate_rms,momentum,mean_weight,weight_rms,mean_bias,bias_rms
0,,1,28,Input,0.0,,,,,,,,,
1,,2,10,Rectifier,0.0,0.0,0.0,0.00181679,0.000603307,0.0,0.00450799,0.223388,0.364417,0.188309
2,,3,10,Rectifier,0.0,0.0,0.0,0.00146331,0.000893009,0.0,-0.0651652,0.282906,0.82938,0.136357
3,,4,10,Rectifier,0.0,0.0,0.0,0.0036623,0.00696624,0.0,-0.0330073,0.349944,0.983523,0.0832265
4,,5,2,Softmax,,0.0,0.0,0.00176001,0.000907807,0.0,-0.263394,1.76841,-0.0317449,0.0115728




ModelMetricsBinomial: deeplearning
** Reported on train data. **

MSE: 0.20710060027927094
RMSE: 0.45508306964692824
LogLoss: 0.6031286351139664
Mean Per-Class Error: 0.30977519772316187
AUC: 0.7492102914893893
AUCPR: 0.7278289658750663
Gini: 0.49842058297877867

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.43715026843338606: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,2439.0,2599.0,0.5159,(2599.0/5038.0)
1,1,757.0,4255.0,0.151,(757.0/5012.0)
2,Total,3196.0,6854.0,0.3339,(3356.0/10050.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.43715,0.717175,242.0
1,max f2,0.204762,0.839337,337.0
2,max f0point5,0.625677,0.692609,155.0
3,max accuracy,0.589055,0.690249,171.0
4,max precision,0.942572,0.923611,10.0
5,max recall,0.031257,1.0,395.0
6,max specificity,0.98354,0.999802,0.0
7,max absolute_mcc,0.552916,0.381722,189.0
8,max min_per_class_accuracy,0.58307,0.688547,174.0
9,max mean_per_class_accuracy,0.589055,0.690225,171.0



Gains/Lift Table: Avg response rate: 49.87 %, avg score: 55.55 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.01005,0.949458,1.826507,1.826507,0.910891,0.963567,0.910891,0.963567,0.018356,0.018356,82.650747,82.650747
1,,2,0.02,0.930274,1.784617,1.805666,0.89,0.939913,0.900498,0.951798,0.017757,0.036113,78.461692,80.56664
2,,3,0.03005,0.917409,1.528707,1.713041,0.762376,0.924033,0.854305,0.942513,0.015363,0.051476,52.870734,71.304102
3,,4,0.0401,0.905898,1.727241,1.7166,0.861386,0.911537,0.856079,0.93475,0.017358,0.068835,72.724076,71.659976
4,,5,0.05005,0.894326,1.644254,1.702217,0.82,0.899724,0.848907,0.927786,0.016361,0.085196,64.425379,70.221687
5,,6,0.1,0.854186,1.601753,1.652035,0.798805,0.873553,0.823881,0.900697,0.080008,0.165204,60.17534,65.203512
6,,7,0.15005,0.820315,1.506881,1.603618,0.751491,0.836993,0.799735,0.879448,0.075419,0.240623,50.68805,60.361816
7,,8,0.2,0.784041,1.398039,1.552275,0.697211,0.801758,0.774129,0.860045,0.069832,0.310455,39.803913,55.227454
8,,9,0.3,0.719296,1.388667,1.497739,0.692537,0.750841,0.746932,0.823644,0.138867,0.449322,38.86672,49.773876
9,,10,0.4,0.65296,1.310854,1.451018,0.653731,0.685692,0.723632,0.789156,0.131085,0.580407,31.085395,45.101756




ModelMetricsBinomial: deeplearning
** Reported on cross-validation data. **

MSE: 0.22102911283475132
RMSE: 0.4701373340150209
LogLoss: 0.6353745333848072
Mean Per-Class Error: 0.35778223997736114
AUC: 0.6964324413282637
AUCPR: 0.6973673670014411
Gini: 0.3928648826565273

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.3140322161698463: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,1503.0,3202.0,0.6806,(3202.0/4705.0)
1,1,489.0,4806.0,0.0924,(489.0/5295.0)
2,Total,1992.0,8008.0,0.3691,(3691.0/10000.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.314032,0.722544,290.0
1,max f2,0.13182,0.85391,359.0
2,max f0point5,0.530087,0.663383,190.0
3,max accuracy,0.439099,0.6467,234.0
4,max precision,0.968819,1.0,0.0
5,max recall,0.001149,1.0,399.0
6,max specificity,0.968819,1.0,0.0
7,max absolute_mcc,0.383822,0.292053,260.0
8,max min_per_class_accuracy,0.527924,0.641233,191.0
9,max mean_per_class_accuracy,0.530087,0.642218,190.0



Gains/Lift Table: Avg response rate: 52.95 %, avg score: 50.92 %


Unnamed: 0,Unnamed: 1,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
0,,1,0.01,0.907923,1.624174,1.624174,0.86,0.931513,0.86,0.931513,0.016242,0.016242,62.417375,62.417375
1,,2,0.02,0.880515,1.643059,1.633617,0.87,0.894116,0.865,0.912815,0.016431,0.032672,64.305949,63.361662
2,,3,0.03,0.860687,1.529745,1.598993,0.81,0.869733,0.846667,0.898454,0.015297,0.04797,52.974504,59.899276
3,,4,0.04,0.843773,1.378659,1.543909,0.73,0.852276,0.8175,0.886909,0.013787,0.061756,37.865911,54.390935
4,,5,0.05,0.829816,1.567517,1.548631,0.83,0.83647,0.82,0.876822,0.015675,0.077432,56.751653,54.863078
5,,6,0.1,0.779718,1.344665,1.446648,0.712,0.802388,0.766,0.839605,0.067233,0.144665,34.466478,44.664778
6,,7,0.15,0.738865,1.378659,1.423985,0.73,0.758826,0.754,0.812679,0.068933,0.213598,37.865911,42.398489
7,,8,0.2,0.704814,1.340888,1.403211,0.71,0.721803,0.743,0.78996,0.067044,0.280642,34.088763,40.321058
8,,9,0.3,0.64336,1.235127,1.347183,0.654,0.673404,0.713333,0.751108,0.123513,0.404155,23.512748,34.718288
9,,10,0.4,0.587802,1.182247,1.305949,0.626,0.615488,0.6915,0.717203,0.118225,0.52238,18.22474,30.594901




Cross-Validation Metrics Summary: 


Unnamed: 0,Unnamed: 1,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
0,accuracy,0.6359,0.020959485,0.673,0.6225,0.63,0.625,0.629
1,auc,0.7020999,0.01018764,0.7173769,0.6954201,0.6909995,0.70567477,0.70102835
2,aucpr,0.70241386,0.016060004,0.7042649,0.70126927,0.7052372,0.7231518,0.67814606
3,err,0.3641,0.020959485,0.327,0.3775,0.37,0.375,0.371
4,err_count,728.2,41.91897,654.0,755.0,740.0,750.0,742.0
5,f0point5,0.6481819,0.01830434,0.67867255,0.64002025,0.6474401,0.6448163,0.6299603
6,f1,0.72419155,0.010715324,0.7252101,0.7283195,0.7297297,0.73214287,0.70555556
7,f2,0.8216121,0.03011523,0.7785998,0.8448823,0.8359833,0.8468275,0.80176765
8,lift_top_group,1.6602457,0.10573612,1.5180266,1.682243,1.7608898,1.7527676,1.5873016
9,logloss,0.63537455,0.00779863,0.64148283,0.6308772,0.64141595,0.6238037,0.63929296



See the whole table with table.as_data_frame()

Scoring History: 


Unnamed: 0,Unnamed: 1,timestamp,duration,training_speed,epochs,iterations,samples,training_rmse,training_logloss,training_r2,training_auc,training_pr_auc,training_lift,training_classification_error
0,,2020-03-10 00:37:11,0.000 sec,,0.0,0,0.0,,,,,,,
1,,2020-03-10 00:37:12,6.930 sec,99369 obs/sec,1.038313,1,11030.0,0.521045,0.750979,-0.085959,0.631372,0.611151,1.647827,0.438607
2,,2020-03-10 00:37:13,7.993 sec,108410 obs/sec,11.399322,11,121095.0,0.455083,0.603129,0.171592,0.74921,0.727829,1.826507,0.33393



Variable Importances: 


Unnamed: 0,variable,relative_importance,scaled_importance,percentage
0,x28,1.0,1.0,0.088311
1,x26,0.761011,0.761011,0.067206
2,x27,0.758636,0.758636,0.066996
3,x1,0.677894,0.677894,0.059865
4,x6,0.632586,0.632586,0.055864
5,x23,0.580508,0.580508,0.051265
6,x9,0.487036,0.487036,0.043011
7,x13,0.413166,0.413166,0.036487
8,x4,0.37599,0.37599,0.033204
9,x10,0.368789,0.368789,0.032568



See the whole table with table.as_data_frame()


