# Introduction
H2O is an open-source platform for building and deploying machine learning models at scale.

It offers a range of AutoML tools, including automatic feature engineering, hyperparameter tuning, and model selection.

H2O supports several machine learning algorithms, including deep learning, gradient boosting, and generalized linear models. 

In [5]:
pip install h2O

Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.



In [6]:
pip install pandas

Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.



In [7]:
import h2o
import numpy
import pandas

In [8]:
# Initialisation of H2O cluster
h2o.init()

Checking whether there is an H2O instance running at http://localhost:54321. connected.


0,1
H2O_cluster_uptime:,2 mins 49 secs
H2O_cluster_timezone:,Asia/Kolkata
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.40.0.2
H2O_cluster_version_age:,"21 days, 17 hours and 25 minutes"
H2O_cluster_name:,H2O_from_python_Dell_eejxaj
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,1.710 Gb
H2O_cluster_total_cores:,8
H2O_cluster_allowed_cores:,8


In [9]:
from h2o.automl import H2OAutoML

In [10]:
# Load data 
data= h2o.import_file('student-por.csv')

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


In [11]:
# datatype for each column
data.types

{'school': 'enum',
 'sex': 'enum',
 'age': 'int',
 'address': 'enum',
 'famsize': 'enum',
 'Pstatus': 'enum',
 'Medu': 'int',
 'Fedu': 'int',
 'Mjob': 'enum',
 'Fjob': 'enum',
 'reason': 'enum',
 'guardian': 'enum',
 'traveltime': 'int',
 'studytime': 'int',
 'failures': 'int',
 'schoolsup': 'enum',
 'famsup': 'enum',
 'paid': 'enum',
 'activities': 'enum',
 'nursery': 'enum',
 'higher': 'enum',
 'internet': 'enum',
 'romantic': 'enum',
 'famrel': 'int',
 'freetime': 'int',
 'goout': 'int',
 'Dalc': 'int',
 'Walc': 'int',
 'health': 'int',
 'absences': 'int',
 'G1': 'int',
 'G2': 'int',
 'G3': 'int'}

In [12]:
# summarizes various statistics for each columns & total number of rows and columns
data.describe()


Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,reason,guardian,traveltime,studytime,failures,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
type,enum,enum,int,enum,enum,enum,int,int,enum,enum,enum,enum,int,int,int,enum,enum,enum,enum,enum,enum,enum,enum,int,int,int,int,int,int,int,int,int,int
mins,,,15.0,,,,0.0,0.0,,,,,1.0,1.0,0.0,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0
mean,,,16.7442218798151,,,,2.514637904468413,2.3066255778120186,,,,,1.568567026194145,1.9306625577812018,0.2218798151001541,,,,,,,,,3.930662557781202,3.180277349768875,3.184899845916795,1.50231124807396,2.280431432973806,3.536209553158706,3.6594761171032357,11.399075500770415,11.570107858243452,11.906009244992296
maxs,,,22.0,,,,4.0,4.0,,,,,4.0,4.0,3.0,,,,,,,,,5.0,5.0,5.0,5.0,5.0,5.0,32.0,19.0,19.0,19.0
sigma,,,1.218137639480064,,,,1.134551999953317,1.099930911140803,,,,,0.7486600863534161,0.829509574636615,0.5932351492649627,,,,,,,,,0.9557168913381863,1.051092575357063,1.175766113842312,0.9248344305673268,1.2843799719490245,1.4462590500910701,4.640758808397153,2.7452651284463725,2.913638664303867,3.230656242804805
zeros,,,0,,,,6,7,,,,,0,0,549,,,,,,,,,0,0,0,0,0,0,244,1,7,15
missing,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,GP,F,18.0,U,GT3,A,4.0,4.0,at_home,teacher,course,mother,2.0,2.0,0.0,yes,no,no,no,yes,yes,no,no,4.0,3.0,4.0,1.0,1.0,3.0,4.0,0.0,11.0,11.0
1,GP,F,17.0,U,GT3,T,1.0,1.0,at_home,other,course,father,1.0,2.0,0.0,no,yes,no,no,no,yes,yes,no,5.0,3.0,3.0,1.0,1.0,3.0,2.0,9.0,11.0,11.0
2,GP,F,15.0,U,LE3,T,1.0,1.0,at_home,other,other,mother,1.0,2.0,0.0,yes,no,no,no,yes,yes,yes,no,4.0,3.0,2.0,2.0,3.0,3.0,6.0,12.0,13.0,12.0


In [13]:
# Dividing the data into training ,testing and valid
data_train,data_test,data_valid = data.split_frame(ratios=[.7, .15])

In [14]:
# Printing training data
data_train


school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,reason,guardian,traveltime,studytime,failures,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
GP,F,18,U,GT3,A,4,4,at_home,teacher,course,mother,2,2,0,yes,no,no,no,yes,yes,no,no,4,3,4,1,1,3,4,0,11,11
GP,F,15,U,LE3,T,1,1,at_home,other,other,mother,1,2,0,yes,no,no,no,yes,yes,yes,no,4,3,2,2,3,3,6,12,13,12
GP,M,16,U,LE3,T,4,3,services,other,reputation,mother,1,2,0,no,yes,no,yes,yes,yes,yes,no,5,4,2,1,2,5,6,12,12,13
GP,M,16,U,LE3,T,2,2,other,other,home,mother,1,2,0,no,no,no,no,yes,yes,yes,no,4,4,4,1,1,3,0,13,12,13
GP,F,17,U,GT3,A,4,4,other,teacher,home,mother,2,2,0,yes,yes,no,no,yes,yes,no,no,4,1,4,1,1,1,2,10,13,13
GP,M,15,U,LE3,A,3,2,services,other,home,mother,1,2,0,no,yes,no,no,yes,yes,yes,no,4,2,2,1,1,1,0,15,16,17
GP,F,15,U,GT3,T,4,4,teacher,health,reputation,mother,1,2,0,no,yes,no,no,yes,yes,yes,no,3,3,3,1,2,2,2,14,14,14
GP,F,15,U,GT3,T,2,1,services,other,reputation,father,3,3,0,no,yes,no,yes,yes,yes,yes,no,5,2,2,1,1,4,0,10,12,13
GP,M,15,U,LE3,T,4,4,health,services,course,father,1,1,0,no,yes,no,yes,yes,yes,yes,no,4,3,3,1,3,5,0,12,13,12
GP,M,15,U,GT3,T,4,3,teacher,other,course,mother,2,2,0,no,yes,no,no,yes,yes,yes,no,5,4,3,1,2,3,0,12,12,13


In [15]:
y = "address" 
x = data.columns #total number of columns in the dataset
x.remove(y)



In [16]:
# training models with specified feature columns and response variable
aml = H2OAutoML(max_models = 10, exclude_algos = ["StackedEnsemble", "DeepLearning"], verbosity="debug", nfolds=5)
aml
aml.train(x = x, y = y, training_frame = data_train)

AutoML progress: |█
08:43:18.237: Project: AutoML_1_20230331_84318
08:43:18.240: Setting stopping tolerance adaptively based on the training frame: 0.04617570965396102
08:43:18.240: Build control seed: -1 (random)
08:43:18.242: training frame: Frame key: AutoML_1_20230331_84318_training_py_2_sid_9038    cols: 33    rows: 469  chunks: 17    size: 177245  checksum: -766451213868
08:43:18.242: validation frame: NULL
08:43:18.243: leaderboard frame: NULL
08:43:18.243: blending frame: NULL
08:43:18.243: response column: address
08:43:18.243: fold column: null
08:43:18.244: weights column: null
08:43:18.285: AutoML: XGBoost is not available; skipping it.
08:43:18.299: Loading execution steps: [{XGBoost : [def_2 (1g, 10w), def_1 (2g, 10w), def_3 (3g, 10w), grid_1 (4g, 90w), lr_search (7g, 30w)]}, {GLM : [def_1 (1g, 10w)]}, {DRF : [def_1 (2g, 10w), XRT (3g, 10w)]}, {GBM : [def_5 (1g, 10w), def_2 (2g, 10w), def_3 (2g, 10w), def_4 (2g, 10w), def_1 (3g, 10w), grid_1 (4g, 60w), lr_annealing (7g, 1

██████████████████████████████████████████████████| (done) 100%


Unnamed: 0,number_of_trees,number_of_internal_trees,model_size_in_bytes,min_depth,max_depth,mean_depth,min_leaves,max_leaves,mean_leaves
,37.0,37.0,3172.0,1.0,2.0,1.1621622,2.0,3.0,2.162162

Unnamed: 0,R,U,Error,Rate
R,54.0,91.0,0.6276,(91.0/145.0)
U,25.0,299.0,0.0772,(25.0/324.0)
Total,79.0,390.0,0.2473,(116.0/469.0)

metric,threshold,value,idx
max f1,0.4411456,0.837535,242.0
max f2,0.3797646,0.9199319,277.0
max f0point5,0.7000265,0.8256173,134.0
max accuracy,0.6231756,0.7590618,214.0
max precision,0.8902089,1.0,0.0
max recall,0.3797646,1.0,277.0
max specificity,0.8902089,1.0,0.0
max absolute_mcc,0.7032981,0.4276429,128.0
max min_per_class_accuracy,0.6785896,0.712963,155.0
max mean_per_class_accuracy,0.7032981,0.7313325,128.0

group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
1,0.010661,0.8849429,1.4475309,1.4475309,1.0,0.8897833,1.0,0.8897833,0.0154321,0.0154321,44.7530864,44.7530864,0.0154321
2,0.0255864,0.8834661,1.4475309,1.4475309,1.0,0.8834661,1.0,0.8860983,0.0216049,0.037037,44.7530864,44.7530864,0.037037
3,0.0341151,0.881225,1.4475309,1.4475309,1.0,0.881225,1.0,0.8848799,0.0123457,0.0493827,44.7530864,44.7530864,0.0493827
4,0.0447761,0.8802971,1.4475309,1.4475309,1.0,0.8802971,1.0,0.8837888,0.0154321,0.0648148,44.7530864,44.7530864,0.0648148
5,0.0575693,0.8780035,1.4475309,1.4475309,1.0,0.8787437,1.0,0.8826677,0.0185185,0.0833333,44.7530864,44.7530864,0.0833333
6,0.1172708,0.8645678,1.2924383,1.3685746,0.8928571,0.8694617,0.9454545,0.8759446,0.0771605,0.1604938,29.2438272,36.8574635,0.1398042
7,0.1577825,0.861661,1.2189734,1.3301635,0.8421053,0.8627688,0.9189189,0.8725616,0.0493827,0.2098765,21.8973359,33.0163497,0.1684972
8,0.2004264,0.8549272,1.1580247,1.2935382,0.8,0.8583573,0.893617,0.8695394,0.0493827,0.2592593,15.8024691,29.3538219,0.1902937
9,0.3006397,0.8416552,1.3551353,1.3140706,0.9361702,0.8488749,0.9078014,0.8626513,0.1358025,0.3950617,35.5135277,31.4070572,0.3054066
10,0.4008529,0.8166868,1.3243367,1.3166371,0.9148936,0.8296726,0.9095745,0.8544066,0.132716,0.5277778,32.4336748,31.6637116,0.4105364

Unnamed: 0,R,U,Error,Rate
R,70.0,75.0,0.5172,(75.0/145.0)
U,39.0,285.0,0.1204,(39.0/324.0)
Total,109.0,360.0,0.2431,(114.0/469.0)

metric,threshold,value,idx
max f1,0.5538509,0.8333333,277.0
max f2,0.3458676,0.917847,351.0
max f0point5,0.6340683,0.8105701,259.0
max accuracy,0.5838137,0.7569296,275.0
max precision,0.8894598,1.0,0.0
max recall,0.3458676,1.0,351.0
max specificity,0.8894598,1.0,0.0
max absolute_mcc,0.5838137,0.3981713,275.0
max min_per_class_accuracy,0.6875587,0.691358,198.0
max mean_per_class_accuracy,0.7032211,0.6999361,180.0

group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
1,0.010661,0.8832067,1.1580247,1.1580247,0.8,0.8845859,0.8,0.8845859,0.0123457,0.0123457,15.8024691,15.8024691,0.0054491
2,0.0255864,0.8815692,1.4475309,1.3269033,1.0,0.8820155,0.9166667,0.8830865,0.0216049,0.0339506,44.7530864,32.6903292,0.0270541
3,0.0341151,0.8798778,1.4475309,1.3570602,1.0,0.8802401,0.9375,0.8823749,0.0123457,0.0462963,44.7530864,35.7060185,0.0393997
4,0.0405117,0.8772562,0.9650206,1.2951592,0.6666667,0.8789091,0.8947368,0.8818276,0.0061728,0.0524691,-3.4979424,29.5159194,0.038676
5,0.0511727,0.8747708,1.4475309,1.3269033,1.0,0.8758161,0.9166667,0.8805752,0.0154321,0.0679012,44.7530864,32.6903292,0.0541081
6,0.1002132,0.8698537,1.3845947,1.3551353,0.9565217,0.8718345,0.9361702,0.8762979,0.0679012,0.1358025,38.459474,35.5135277,0.1151128
7,0.1513859,0.864411,1.3269033,1.3455921,0.9166667,0.867201,0.9295775,0.8732229,0.0679012,0.2037037,32.6903292,34.5592071,0.1692209
8,0.2004264,0.8536149,1.1957864,1.3089375,0.826087,0.8592293,0.9042553,0.8697989,0.058642,0.2623457,19.5786366,30.8937484,0.2002767
9,0.3006397,0.8358955,1.2627397,1.2935382,0.8723404,0.8442162,0.893617,0.8612714,0.1265432,0.3888889,26.273969,29.3538219,0.2854406
10,0.4008529,0.8079299,1.2319412,1.278139,0.8510638,0.8233435,0.8829787,0.8517894,0.1234568,0.5123457,23.1941161,27.8138955,0.3606215

Unnamed: 0,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
accuracy,0.7653626,0.0413045,0.7446808,0.819149,0.7978724,0.7446808,0.7204301
auc,0.7682067,0.0473054,0.7537122,0.7924638,0.814881,0.7867944,0.6931818
err,0.2346374,0.0413045,0.2553191,0.1808511,0.2021277,0.2553191,0.2795699
err_count,22.0,3.8078866,24.0,17.0,19.0,24.0,26.0
f0point5,0.803069,0.0425501,0.7925072,0.8438287,0.8513514,0.7692308,0.758427
f1,0.8433164,0.0339111,0.8208955,0.8874172,0.8689655,0.8333333,0.8059701
f2,0.8886869,0.0348375,0.8513932,0.9357542,0.8873239,0.9090909,0.8598726
lift_top_group,1.4526737,0.0939072,1.4920635,1.3623189,1.3428571,1.516129,1.55
logloss,0.5265688,0.052009,0.5427651,0.4816821,0.4701688,0.5396063,0.598622
max_per_class_error,0.5819379,0.0759871,0.516129,0.6,0.5,0.6875,0.6060606

Unnamed: 0,timestamp,duration,number_of_trees,training_rmse,training_logloss,training_auc,training_pr_auc,training_lift,training_classification_error
,2023-03-31 08:43:35,0.593 sec,0.0,0.4621508,0.6184337,0.5,0.6908316,1.0,0.3091684
,2023-03-31 08:43:35,0.619 sec,5.0,0.4507295,0.5942018,0.7206152,0.838488,1.2987193,0.3091684
,2023-03-31 08:43:35,0.640 sec,10.0,0.4387503,0.569698,0.7491166,0.8533606,1.3096708,0.2579957
,2023-03-31 08:43:35,0.661 sec,15.0,0.4288754,0.5498905,0.7644849,0.8555287,1.2407407,0.2452026
,2023-03-31 08:43:35,0.683 sec,20.0,0.4216587,0.5349314,0.7717007,0.865412,1.4475309,0.2452026
,2023-03-31 08:43:35,0.706 sec,25.0,0.4174354,0.5256433,0.7716049,0.8668436,1.4475309,0.2452026
,2023-03-31 08:43:35,0.729 sec,30.0,0.4155406,0.5215445,0.7744679,0.8739167,1.4475309,0.2452026
,2023-03-31 08:43:35,0.753 sec,35.0,0.4133006,0.5164339,0.7809919,0.881911,1.4475309,0.2473348
,2023-03-31 08:43:35,0.767 sec,37.0,0.4127302,0.5151504,0.7807791,0.8819371,1.4475309,0.2473348

variable,relative_importance,scaled_importance,percentage
traveltime,47.3647461,1.0,0.4223353
school,47.1242676,0.9949228,0.4201910
reason,4.9641252,0.1048063,0.0442634
Medu,3.9783580,0.0839941,0.0354737
G2,2.3266835,0.0491227,0.0207462
G3,2.1934135,0.0463090,0.0195579
absences,1.5755904,0.0332650,0.0140490
G1,1.4323676,0.0302412,0.0127719
Mjob,0.6403601,0.0135198,0.0057099
activities,0.3730655,0.0078764,0.0033265


In [17]:
# ranks the models based on their performance
lb = aml.leaderboard

In [18]:
lb.head()


model_id,auc,logloss,aucpr,mean_per_class_error,rmse,mse
GBM_grid_1_AutoML_1_20230331_84318_model_1,0.757269,0.526415,0.858925,0.318806,0.418195,0.174887
GBM_3_AutoML_1_20230331_84318,0.755651,0.531577,0.862368,0.400915,0.421931,0.178025
GBM_grid_1_AutoML_1_20230331_84318_model_2,0.750819,0.529686,0.856569,0.372605,0.41978,0.176215
GBM_1_AutoML_1_20230331_84318,0.748957,0.527418,0.845475,0.318806,0.418292,0.174968
GBM_2_AutoML_1_20230331_84318,0.745126,0.537451,0.84765,0.379502,0.422456,0.178469
GBM_4_AutoML_1_20230331_84318,0.737761,0.541442,0.855657,0.392114,0.42542,0.180982
DRF_1_AutoML_1_20230331_84318,0.726032,0.543552,0.853033,0.451362,0.428656,0.183746
GLM_1_AutoML_1_20230331_84318,0.722393,0.551477,0.824638,0.413165,0.42959,0.184547
GBM_5_AutoML_1_20230331_84318,0.716603,0.563988,0.828014,0.385579,0.431707,0.186371
XRT_1_AutoML_1_20230331_84318,0.682514,0.578266,0.810439,0.466337,0.441893,0.195269


In [19]:
# predictions for checking if the data is overfit or not
data_pred=aml.leader.predict(data_test)

gbm prediction progress: |███████████████████████████████████████████████████████| (done) 100%


In [20]:
data_pred.head()


predict,R,U
U,0.116534,0.883466
U,0.14525,0.85475
U,0.171884,0.828116
U,0.31526,0.68474
U,0.129426,0.870574
U,0.157267,0.842733
U,0.147328,0.852672
U,0.171796,0.828204
U,0.133084,0.866916
U,0.135432,0.864568


In [21]:
# access the quality of the leader models predictions
aml.leader.model_performance(data_test)

Unnamed: 0,R,U,Error,Rate
R,5.0,16.0,0.7619,(16.0/21.0)
U,2.0,63.0,0.0308,(2.0/65.0)
Total,7.0,79.0,0.2093,(18.0/86.0)

metric,threshold,value,idx
max f1,0.4177388,0.875,69.0
max f2,0.3896575,0.9393064,74.0
max f0point5,0.6391189,0.8461538,56.0
max accuracy,0.4251568,0.7906977,68.0
max precision,0.8834661,1.0,0.0
max recall,0.3896575,1.0,74.0
max specificity,0.8834661,1.0,0.0
max absolute_mcc,0.6781355,0.380769,41.0
max min_per_class_accuracy,0.6781355,0.6769231,41.0
max mean_per_class_accuracy,0.6781355,0.7194139,41.0

group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
1,0.0232558,0.8834661,1.3230769,1.3230769,1.0,0.8834661,1.0,0.8834661,0.0307692,0.0307692,32.3076923,32.3076923,0.0307692
2,0.0232558,0.8812174,0.0,1.3230769,0.0,0.0,1.0,0.8834661,0.0,0.0307692,-100.0,32.3076923,0.0307692
3,0.0348837,0.879016,1.3230769,1.3230769,1.0,0.8802536,1.0,0.8823953,0.0153846,0.0461538,32.3076923,32.3076923,0.0461538
4,0.0465116,0.8750315,0.0,0.9923077,0.0,0.8780035,0.75,0.8812973,0.0,0.0461538,-100.0,-0.7692308,-0.0014652
5,0.0581395,0.8696593,1.3230769,1.0584615,1.0,0.8705735,0.8,0.8791526,0.0153846,0.0615385,32.3076923,5.8461538,0.0139194
6,0.127907,0.8645678,1.1025641,1.0825175,0.8333333,0.8654012,0.8181818,0.8716518,0.0769231,0.1384615,10.2564103,8.2517483,0.0432234
7,0.1511628,0.861778,1.3230769,1.1195266,1.0,0.8642144,0.8461538,0.8705076,0.0307692,0.1692308,32.3076923,11.9526627,0.0739927
8,0.2093023,0.8543408,1.3230769,1.1760684,1.0,0.8574312,0.8888889,0.8668753,0.0769231,0.2461538,32.3076923,17.6068376,0.1509158
9,0.3023256,0.832647,0.8269231,1.0686391,0.625,0.8451688,0.8076923,0.8601964,0.0769231,0.3230769,-17.3076923,6.8639053,0.0849817
10,0.4069767,0.7294684,1.3230769,1.1340659,1.0,0.8123312,0.8571429,0.8478882,0.1384615,0.4615385,32.3076923,13.4065934,0.2234432


In [22]:
# list of all the model ids 
model_ids = list(aml.leaderboard['model_id'].as_data_frame().iloc[:,0])

In [23]:
model_ids

['GBM_grid_1_AutoML_1_20230331_84318_model_1',
 'GBM_3_AutoML_1_20230331_84318',
 'GBM_grid_1_AutoML_1_20230331_84318_model_2',
 'GBM_1_AutoML_1_20230331_84318',
 'GBM_2_AutoML_1_20230331_84318',
 'GBM_4_AutoML_1_20230331_84318',
 'DRF_1_AutoML_1_20230331_84318',
 'GLM_1_AutoML_1_20230331_84318',
 'GBM_5_AutoML_1_20230331_84318',
 'XRT_1_AutoML_1_20230331_84318']

In [24]:
# further analysis through desired model
h2o.get_model([mid for mid in model_ids if "GBM_grid" in mid][0])

Unnamed: 0,number_of_trees,number_of_internal_trees,model_size_in_bytes,min_depth,max_depth,mean_depth,min_leaves,max_leaves,mean_leaves
,37.0,37.0,3172.0,1.0,2.0,1.1621622,2.0,3.0,2.162162

Unnamed: 0,R,U,Error,Rate
R,54.0,91.0,0.6276,(91.0/145.0)
U,25.0,299.0,0.0772,(25.0/324.0)
Total,79.0,390.0,0.2473,(116.0/469.0)

metric,threshold,value,idx
max f1,0.4411456,0.837535,242.0
max f2,0.3797646,0.9199319,277.0
max f0point5,0.7000265,0.8256173,134.0
max accuracy,0.6231756,0.7590618,214.0
max precision,0.8902089,1.0,0.0
max recall,0.3797646,1.0,277.0
max specificity,0.8902089,1.0,0.0
max absolute_mcc,0.7032981,0.4276429,128.0
max min_per_class_accuracy,0.6785896,0.712963,155.0
max mean_per_class_accuracy,0.7032981,0.7313325,128.0

group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
1,0.010661,0.8849429,1.4475309,1.4475309,1.0,0.8897833,1.0,0.8897833,0.0154321,0.0154321,44.7530864,44.7530864,0.0154321
2,0.0255864,0.8834661,1.4475309,1.4475309,1.0,0.8834661,1.0,0.8860983,0.0216049,0.037037,44.7530864,44.7530864,0.037037
3,0.0341151,0.881225,1.4475309,1.4475309,1.0,0.881225,1.0,0.8848799,0.0123457,0.0493827,44.7530864,44.7530864,0.0493827
4,0.0447761,0.8802971,1.4475309,1.4475309,1.0,0.8802971,1.0,0.8837888,0.0154321,0.0648148,44.7530864,44.7530864,0.0648148
5,0.0575693,0.8780035,1.4475309,1.4475309,1.0,0.8787437,1.0,0.8826677,0.0185185,0.0833333,44.7530864,44.7530864,0.0833333
6,0.1172708,0.8645678,1.2924383,1.3685746,0.8928571,0.8694617,0.9454545,0.8759446,0.0771605,0.1604938,29.2438272,36.8574635,0.1398042
7,0.1577825,0.861661,1.2189734,1.3301635,0.8421053,0.8627688,0.9189189,0.8725616,0.0493827,0.2098765,21.8973359,33.0163497,0.1684972
8,0.2004264,0.8549272,1.1580247,1.2935382,0.8,0.8583573,0.893617,0.8695394,0.0493827,0.2592593,15.8024691,29.3538219,0.1902937
9,0.3006397,0.8416552,1.3551353,1.3140706,0.9361702,0.8488749,0.9078014,0.8626513,0.1358025,0.3950617,35.5135277,31.4070572,0.3054066
10,0.4008529,0.8166868,1.3243367,1.3166371,0.9148936,0.8296726,0.9095745,0.8544066,0.132716,0.5277778,32.4336748,31.6637116,0.4105364

Unnamed: 0,R,U,Error,Rate
R,70.0,75.0,0.5172,(75.0/145.0)
U,39.0,285.0,0.1204,(39.0/324.0)
Total,109.0,360.0,0.2431,(114.0/469.0)

metric,threshold,value,idx
max f1,0.5538509,0.8333333,277.0
max f2,0.3458676,0.917847,351.0
max f0point5,0.6340683,0.8105701,259.0
max accuracy,0.5838137,0.7569296,275.0
max precision,0.8894598,1.0,0.0
max recall,0.3458676,1.0,351.0
max specificity,0.8894598,1.0,0.0
max absolute_mcc,0.5838137,0.3981713,275.0
max min_per_class_accuracy,0.6875587,0.691358,198.0
max mean_per_class_accuracy,0.7032211,0.6999361,180.0

group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
1,0.010661,0.8832067,1.1580247,1.1580247,0.8,0.8845859,0.8,0.8845859,0.0123457,0.0123457,15.8024691,15.8024691,0.0054491
2,0.0255864,0.8815692,1.4475309,1.3269033,1.0,0.8820155,0.9166667,0.8830865,0.0216049,0.0339506,44.7530864,32.6903292,0.0270541
3,0.0341151,0.8798778,1.4475309,1.3570602,1.0,0.8802401,0.9375,0.8823749,0.0123457,0.0462963,44.7530864,35.7060185,0.0393997
4,0.0405117,0.8772562,0.9650206,1.2951592,0.6666667,0.8789091,0.8947368,0.8818276,0.0061728,0.0524691,-3.4979424,29.5159194,0.038676
5,0.0511727,0.8747708,1.4475309,1.3269033,1.0,0.8758161,0.9166667,0.8805752,0.0154321,0.0679012,44.7530864,32.6903292,0.0541081
6,0.1002132,0.8698537,1.3845947,1.3551353,0.9565217,0.8718345,0.9361702,0.8762979,0.0679012,0.1358025,38.459474,35.5135277,0.1151128
7,0.1513859,0.864411,1.3269033,1.3455921,0.9166667,0.867201,0.9295775,0.8732229,0.0679012,0.2037037,32.6903292,34.5592071,0.1692209
8,0.2004264,0.8536149,1.1957864,1.3089375,0.826087,0.8592293,0.9042553,0.8697989,0.058642,0.2623457,19.5786366,30.8937484,0.2002767
9,0.3006397,0.8358955,1.2627397,1.2935382,0.8723404,0.8442162,0.893617,0.8612714,0.1265432,0.3888889,26.273969,29.3538219,0.2854406
10,0.4008529,0.8079299,1.2319412,1.278139,0.8510638,0.8233435,0.8829787,0.8517894,0.1234568,0.5123457,23.1941161,27.8138955,0.3606215

Unnamed: 0,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
accuracy,0.7653626,0.0413045,0.7446808,0.819149,0.7978724,0.7446808,0.7204301
auc,0.7682067,0.0473054,0.7537122,0.7924638,0.814881,0.7867944,0.6931818
err,0.2346374,0.0413045,0.2553191,0.1808511,0.2021277,0.2553191,0.2795699
err_count,22.0,3.8078866,24.0,17.0,19.0,24.0,26.0
f0point5,0.803069,0.0425501,0.7925072,0.8438287,0.8513514,0.7692308,0.758427
f1,0.8433164,0.0339111,0.8208955,0.8874172,0.8689655,0.8333333,0.8059701
f2,0.8886869,0.0348375,0.8513932,0.9357542,0.8873239,0.9090909,0.8598726
lift_top_group,1.4526737,0.0939072,1.4920635,1.3623189,1.3428571,1.516129,1.55
logloss,0.5265688,0.052009,0.5427651,0.4816821,0.4701688,0.5396063,0.598622
max_per_class_error,0.5819379,0.0759871,0.516129,0.6,0.5,0.6875,0.6060606

Unnamed: 0,timestamp,duration,number_of_trees,training_rmse,training_logloss,training_auc,training_pr_auc,training_lift,training_classification_error
,2023-03-31 08:43:35,0.593 sec,0.0,0.4621508,0.6184337,0.5,0.6908316,1.0,0.3091684
,2023-03-31 08:43:35,0.619 sec,5.0,0.4507295,0.5942018,0.7206152,0.838488,1.2987193,0.3091684
,2023-03-31 08:43:35,0.640 sec,10.0,0.4387503,0.569698,0.7491166,0.8533606,1.3096708,0.2579957
,2023-03-31 08:43:35,0.661 sec,15.0,0.4288754,0.5498905,0.7644849,0.8555287,1.2407407,0.2452026
,2023-03-31 08:43:35,0.683 sec,20.0,0.4216587,0.5349314,0.7717007,0.865412,1.4475309,0.2452026
,2023-03-31 08:43:35,0.706 sec,25.0,0.4174354,0.5256433,0.7716049,0.8668436,1.4475309,0.2452026
,2023-03-31 08:43:35,0.729 sec,30.0,0.4155406,0.5215445,0.7744679,0.8739167,1.4475309,0.2452026
,2023-03-31 08:43:35,0.753 sec,35.0,0.4133006,0.5164339,0.7809919,0.881911,1.4475309,0.2473348
,2023-03-31 08:43:35,0.767 sec,37.0,0.4127302,0.5151504,0.7807791,0.8819371,1.4475309,0.2473348

variable,relative_importance,scaled_importance,percentage
traveltime,47.3647461,1.0,0.4223353
school,47.1242676,0.9949228,0.4201910
reason,4.9641252,0.1048063,0.0442634
Medu,3.9783580,0.0839941,0.0354737
G2,2.3266835,0.0491227,0.0207462
G3,2.1934135,0.0463090,0.0195579
absences,1.5755904,0.0332650,0.0140490
G1,1.4323676,0.0302412,0.0127719
Mjob,0.6403601,0.0135198,0.0057099
activities,0.3730655,0.0078764,0.0033265
