In [1]:
import h2o
h2o.init()

Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
  Java Version: java version "11.0.1" 2018-10-16 LTS; Java(TM) SE Runtime Environment 18.9 (build 11.0.1+13-LTS); Java HotSpot(TM) 64-Bit Server VM 18.9 (build 11.0.1+13-LTS, mixed mode)
  Starting server from /Users/carmen/anaconda3/lib/python3.7/site-packages/h2o/backend/bin/h2o.jar
  Ice root: /var/folders/mf/strl7mm53_dcwkr8qcwz_9fm0000gn/T/tmpzrlhpe02
  JVM stdout: /var/folders/mf/strl7mm53_dcwkr8qcwz_9fm0000gn/T/tmpzrlhpe02/h2o_carmen_started_from_python.out
  JVM stderr: /var/folders/mf/strl7mm53_dcwkr8qcwz_9fm0000gn/T/tmpzrlhpe02/h2o_carmen_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321... successful.


0,1
H2O cluster uptime:,01 secs
H2O cluster timezone:,America/Los_Angeles
H2O data parsing timezone:,UTC
H2O cluster version:,3.22.1.3
H2O cluster version age:,15 days
H2O cluster name:,H2O_from_python_carmen_nloyns
H2O cluster total nodes:,1
H2O cluster free memory:,4 Gb
H2O cluster total cores:,8
H2O cluster allowed cores:,8


In [2]:
print("Import approved and rejected loan requests...")
train = h2o.import_file(path = "data/train.csv")
test = h2o.import_file(path = "data/test.csv")

Import approved and rejected loan requests...
Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%


In [3]:
# Ensure objetive variable (bad_load) is categorical / factor
train["bad_loan"] = train["bad_loan"].asfactor()
test["bad_loan"] = test["bad_loan"].asfactor()

In [4]:
# Bad Loan Model

# myY = "bad_loan" objetive 
# myX = Dataset  predictor variables names

myY = "bad_loan"
myX = ["loan_amnt", "longest_credit_length", "revol_util", "emp_length",
       "home_ownership", "annual_inc", "purpose", "addr_state", "dti",
       "delinq_2yrs", "total_acc", "verification_status", "term"]

In [5]:
from h2o.automl import H2OAutoML

In [6]:
# Run AutoML for 5 base models (limited to 1 hour max runtime by default)
bad_loan_model = H2OAutoML(max_models=5,seed=1, nfolds=0)
bad_loan_model.train(x=myX, y=myY, training_frame=train, validation_frame=test,leaderboard_frame=test)

AutoML progress: |████████████████████████████████████████████████████████| 100%


In [7]:
# View the AutoML Leaderboard
lb = bad_loan_model.leaderboard
lb.head(rows=lb.nrows)  # Print all rows instead of default (10 rows)

model_id,auc,logloss,mean_per_class_error,rmse,mse
XGBoost_1_AutoML_20190210_133722,0.681544,0.444621,0.36952,0.373908,0.139807
XGBoost_2_AutoML_20190210_133722,0.680823,0.444955,0.370734,0.374215,0.140037
GLM_grid_1_AutoML_20190210_133722_model_1,0.674697,0.447057,0.372094,0.374849,0.140512
XRT_1_AutoML_20190210_133722,0.669451,0.449403,0.376828,0.375889,0.141293
DRF_1_AutoML_20190210_133722,0.661431,0.453165,0.380155,0.377059,0.142173




In [8]:
# Download generated MOJO for model
import os
if not os.path.exists("tmp"):
    os.makedirs("tmp")
bad_loan_model_file=bad_loan_model.download_mojo( path="tmp", get_genmodel_jar=True )


In [9]:
# Deploy the model
import shutil
shutil.copy('tmp/h2o-genmodel.jar', '../src/main/webapp/WEB-INF/lib/')
shutil.copy(bad_loan_model_file, '../src/main/webapp/BadLoanModel.zip')

'../src/main/webapp/BadLoanModel.zip'

In [10]:
# Interest Rate Model

# myY = "int_rate" objetive 
# myX = Dataset  predictor variables names

myY = "int_rate"
myX = ["loan_amnt", "longest_credit_length", "revol_util", "emp_length",
       "home_ownership", "annual_inc", "purpose", "addr_state", "dti",
       "delinq_2yrs", "total_acc", "verification_status", "term"]

In [11]:
from h2o.estimators.gbm import H2OGradientBoostingEstimator
interest_rate_model = H2OGradientBoostingEstimator(score_each_iteration = True,
                                     ntrees = 100,
                                     max_depth = 5,
                                     learn_rate = 0.05,
                                     model_id = "InterestRateModel")
interest_rate_model.train(x = myX, y = myY, training_frame = train, validation_frame = test)
print(interest_rate_model)

gbm Model Build progress: |███████████████████████████████████████████████| 100%
Model Details
H2OGradientBoostingEstimator :  Gradient Boosting Machine
Model Key:  InterestRateModel


ModelMetricsRegression: gbm
** Reported on train data. **

MSE: 10.86260198745982
RMSE: 3.2958461716924563
MAE: 2.636052939136392
RMSLE: 0.23805564679045385
Mean Residual Deviance: 10.86260198745982

ModelMetricsRegression: gbm
** Reported on validation data. **

MSE: 11.112677596397843
RMSE: 3.333568297845095
MAE: 2.6649262049291167
RMSLE: 0.24004987688404397
Mean Residual Deviance: 11.112677596397843
Scoring History: 


0,1,2,3,4,5,6,7,8,9
,timestamp,duration,number_of_trees,training_rmse,training_mae,training_deviance,validation_rmse,validation_mae,validation_deviance
,2019-02-10 13:38:34,0.002 sec,0.0,4.3924826,3.5256478,19.2939031,4.3896994,3.5205772,19.2694612
,2019-02-10 13:38:34,0.210 sec,1.0,4.3164406,3.4659350,18.6316591,4.3146846,3.4614585,18.6165035
,2019-02-10 13:38:34,0.349 sec,2.0,4.2465963,3.4111965,18.0335805,4.2458401,3.4072598,18.0271582
,2019-02-10 13:38:34,0.465 sec,3.0,4.1823269,3.3602462,17.4918585,4.1827113,3.3568970,17.4950738
,2019-02-10 13:38:34,0.569 sec,4.0,4.1231114,3.3135136,17.0000472,4.1243835,3.3105583,17.0105391
---,---,---,---,---,---,---,---,---,---
,2019-02-10 13:38:39,4.632 sec,96.0,3.3000945,2.6402393,10.8906239,3.3363991,2.6677566,11.1315587
,2019-02-10 13:38:39,4.670 sec,97.0,3.2989506,2.6391444,10.8830748,3.3356345,2.6670257,11.1264576
,2019-02-10 13:38:39,4.707 sec,98.0,3.2978330,2.6380627,10.8757026,3.3348888,2.6662941,11.1214834



See the whole table with table.as_data_frame()
Variable Importances: 


0,1,2,3
variable,relative_importance,scaled_importance,percentage
term,5188762.0,1.0,0.4571483
revol_util,3203326.2500000,0.6173585,0.2822244
purpose,598462.75,0.1153383,0.0527267
delinq_2yrs,492388.5,0.0948952,0.0433812
loan_amnt,416479.3750000,0.0802657,0.0366933
longest_credit_length,302441.2187500,0.0582877,0.0266461
verification_status,286500.5625000,0.0552156,0.0252417
home_ownership,249026.8281250,0.0479935,0.0219401
dti,190445.6875000,0.0367035,0.0167789





In [12]:
# View the AutoML Leaderboard
lb = interest_rate_model.leaderboard
lb.head(rows=lb.nrows)  # Print all rows instead of default (10 rows)

AttributeError: type object 'H2OGradientBoostingEstimator' has no attribute 'leaderboard'

In [None]:
# Download generated MOJO for model
interest_rate_model_file=interest_rate_model.download_mojo( path="tmp")


In [None]:
# Deploy the model
shutil.copy(interest_rate_model_file, '../src/main/webapp/InterestRateModel.zip')