## Installing and importing

In [1]:
!pip install -U h2o

Collecting h2o
  Downloading https://files.pythonhosted.org/packages/b0/e7/b7057e4a6832f3bec0cb36fda4913bf84a6dc610c92a2d3543442f4154a1/h2o-3.30.1.1.tar.gz (129.3MB)
Building wheels for collected packages: h2o
  Building wheel for h2o (setup.py): started
  Building wheel for h2o (setup.py): finished with status 'done'
  Created wheel for h2o: filename=h2o-3.30.1.1-py2.py3-none-any.whl size=129358610 sha256=53db6e7112beadc56e51d8ea7c1d826ab3ffb742f11cee401bce1f40f88ba060
  Stored in directory: C:\Users\HP\AppData\Local\pip\Cache\wheels\33\ac\52\165c35d747abdb629c3c9fb7e087f360c662d8cb58824caed8
Successfully built h2o
Installing collected packages: h2o
  Found existing installation: h2o 3.30.0.7
    Uninstalling h2o-3.30.0.7:
      Successfully uninstalled h2o-3.30.0.7
Successfully installed h2o-3.30.1.1


In [2]:
import h2o
from h2o.automl import H2OAutoML

from h2o.estimators.glm import H2OGeneralizedLinearEstimator
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.estimators.deeplearning import H2ODeepLearningEstimator
from h2o.estimators.random_forest import H2ORandomForestEstimator



In [3]:
h2o.init()

Checking whether there is an H2O instance running at http://localhost:54321 ..... not found.
Attempting to start a local H2O server...
; Java HotSpot(TM) 64-Bit Server VM (build 13.0.2+8, mixed mode, sharing)
  Starting server from C:\Users\HP\Anaconda3\lib\site-packages\h2o\backend\bin\h2o.jar
  Ice root: C:\Users\HP\AppData\Local\Temp\tmpcovq1rva
  JVM stdout: C:\Users\HP\AppData\Local\Temp\tmpcovq1rva\h2o_HP_started_from_python.out
  JVM stderr: C:\Users\HP\AppData\Local\Temp\tmpcovq1rva\h2o_HP_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,02 secs
H2O_cluster_timezone:,Europe/Lisbon
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.30.1.1
H2O_cluster_version_age:,"7 days, 8 hours and 51 minutes"
H2O_cluster_name:,H2O_from_python_HP_oz63sn
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.973 Gb
H2O_cluster_total_cores:,8
H2O_cluster_allowed_cores:,8


In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

In [5]:
np.set_printoptions(precision=3)

In [6]:
df_read = pd.read_csv('Classification.csv')

## Quick Data Engineering, Cleaning and Encoding

In [7]:
df_read.columns

Index(['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [8]:
df_read['Churn'] = df_read['Churn'].replace(['No','Yes'],[0,1])

In [9]:
df_read.loc[pd.to_numeric(df_read['TotalCharges'], errors='coerce').isnull(),'TotalCharges'] = np.nan
df_read = df_read.dropna()
df_read['TotalCharges'] = pd.to_numeric(df_read['TotalCharges'], errors='coerce')

In [10]:
df_read = df_read.drop(labels = ['customerID'],axis = 1)

In [11]:

df_read['gender'] = np.where(df_read['gender'].isin(['Male']), 1, 0)
df_read['DeviceProtection'] = np.where(df_read['DeviceProtection'].isin(['No']), 1, 0)
df_read['Partner'] = np.where(df_read['Partner'].isin(['Yes']), 1, 0)
df_read['SeniorCitizen'] = np.where(df_read['SeniorCitizen'].isin([1]), 1, 0)
df_read['Dependents'] = np.where(df_read['Dependents'].isin(['Yes']), 1, 0)
df_read['PhoneService'] = np.where(df_read['PhoneService'].isin(['Yes']), 1, 0)
df_read['PaperlessBilling'] = np.where(df_read['PaperlessBilling'].isin(['Yes']), 1, 0)
df_read['StreamingMovies'] = np.where(df_read['StreamingMovies'].isin(['Yes']), 1, 0)
df_read['StreamingTV'] = np.where(df_read['StreamingTV'].isin(['Yes']), 1, 0)
df_read['PaymentMethod'] = np.where(df_read['PaymentMethod'].isin(['Electronic check']), 1, 0)
df_read['MultipleLines'] = np.where(df_read['MultipleLines'].isin(['Yes']), 1, 0)
df_read['InternetService'] = np.where(df_read['InternetService'].isin(['Fiber optic']), 1, 0)
df_read['OnlineSecurity'] = np.where(df_read['OnlineSecurity'].isin(['No']), 1, 0)
df_read['Contract'] = np.where(df_read['Contract'].isin(['Month-to-month']), 1, 0)
df_read['TechSupport'] = np.where(df_read['TechSupport'].isin(['No']), 1, 0)
df_read['OnlineBackup'] = np.where(df_read['OnlineBackup'].isin(['No']), 1, 0)

## Creating H2O Dataframe

In [12]:
Churn = h2o.H2OFrame(df_read)
Churn.head()

Parse progress: |█████████████████████████████████████████████████████████| 100%


gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,29.85,29.85,0
1,0,0,0,34,1,0,0,0,1,0,1,0,0,0,0,0,56.95,1889.5,0
1,0,0,0,2,1,0,0,0,0,1,1,0,0,1,1,0,53.85,108.15,1
1,0,0,0,45,0,0,0,0,1,0,0,0,0,0,0,0,42.3,1840.75,0
0,0,0,0,2,1,0,1,1,1,1,1,0,0,1,1,1,70.7,151.65,1
0,0,0,0,8,1,1,1,1,1,0,1,1,1,1,1,1,99.65,820.5,1
1,0,0,1,22,1,1,1,1,0,1,1,1,0,1,1,0,89.1,1949.4,0
0,0,0,0,10,0,0,0,0,1,1,1,0,0,1,0,0,29.75,301.9,0
0,0,1,0,28,1,1,1,1,1,0,0,1,1,1,1,1,104.8,3046.05,1
1,0,0,1,62,1,0,0,0,0,1,1,0,0,0,0,0,56.15,3487.95,0




## As we are doing a logistic regression, we need to define the target

In [13]:
Churn['Churn']=Churn['Churn'].asfactor()

## Splitting dataset into train and test data

In [14]:
Churn_train, Churn_test = Churn.split_frame(ratios=[0.9],seed=20111974)

## Defining the predictors by removing the target column

In [15]:
preditoras=list(Churn_train.columns)
target = "Churn"
preditoras.remove(target)
preditoras

['gender',
 'SeniorCitizen',
 'Partner',
 'Dependents',
 'tenure',
 'PhoneService',
 'MultipleLines',
 'InternetService',
 'OnlineSecurity',
 'OnlineBackup',
 'DeviceProtection',
 'TechSupport',
 'StreamingTV',
 'StreamingMovies',
 'Contract',
 'PaperlessBilling',
 'PaymentMethod',
 'MonthlyCharges',
 'TotalCharges']

# GBM MODEL 

## Training model

In [16]:
gbm=H2OGradientBoostingEstimator()
gbm.train(x=preditoras, y=target, training_frame=Churn_train)

gbm Model Build progress: |███████████████████████████████████████████████| 100%


In [17]:
print(gbm)

Model Details
H2OGradientBoostingEstimator :  Gradient Boosting Machine
Model Key:  GBM_model_python_1597703066763_1


Model Summary: 


Unnamed: 0,Unnamed: 1,number_of_trees,number_of_internal_trees,model_size_in_bytes,min_depth,max_depth,mean_depth,min_leaves,max_leaves,mean_leaves
0,,50.0,50.0,20504.0,5.0,5.0,5.0,20.0,32.0,27.94




ModelMetricsBinomial: gbm
** Reported on train data. **

MSE: 0.11324556891472846
RMSE: 0.3365197897817132
LogLoss: 0.35723432680729805
Mean Per-Class Error: 0.18194187099297343
AUC: 0.8976742759194077
AUCPR: 0.7682224801067109
Gini: 0.7953485518388155

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.34811801146886223: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,3889.0,728.0,0.1577,(728.0/4617.0)
1,1,380.0,1318.0,0.2238,(380.0/1698.0)
2,Total,4269.0,2046.0,0.1755,(1108.0/6315.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.348118,0.70406,212.0
1,max f2,0.224648,0.797421,266.0
2,max f0point5,0.558898,0.719232,128.0
3,max accuracy,0.481987,0.838955,157.0
4,max precision,0.937335,1.0,0.0
5,max recall,0.024971,1.0,388.0
6,max specificity,0.937335,1.0,0.0
7,max absolute_mcc,0.348118,0.585994,212.0
8,max min_per_class_accuracy,0.312141,0.812132,226.0
9,max mean_per_class_accuracy,0.273268,0.818058,245.0



Gains/Lift Table: Avg response rate: 26,89 %, avg score: 26,91 %


Unnamed: 0,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
0,1,0.010293,0.873691,3.661865,3.661865,0.984615,0.891523,0.984615,0.891523,0.037691,0.037691,266.186464,266.186464,0.037475
1,2,0.020111,0.851995,3.599111,3.631229,0.967742,0.862589,0.976378,0.877398,0.035336,0.073027,259.911091,263.122896,0.072377
2,3,0.030087,0.833242,3.364883,3.542914,0.904762,0.841576,0.952632,0.86552,0.033569,0.106596,236.488306,254.291426,0.104647
3,4,0.040063,0.81444,3.423916,3.513282,0.920635,0.824798,0.944664,0.85538,0.034158,0.140754,242.391609,251.328231,0.137722
4,5,0.05004,0.783608,3.128751,3.436619,0.84127,0.800215,0.924051,0.844382,0.031213,0.171967,212.875091,243.66194,0.166769
5,6,0.100079,0.677187,3.048234,3.242427,0.81962,0.727161,0.871835,0.785772,0.152532,0.324499,204.823433,224.242687,0.306956
6,7,0.150119,0.598226,2.612772,3.032542,0.702532,0.636481,0.815401,0.736008,0.130742,0.455241,161.277229,203.254201,0.417338
7,8,0.2,0.535618,2.278675,2.844523,0.612698,0.568922,0.764846,0.694336,0.113663,0.568905,127.867519,184.452297,0.504577
8,9,0.300079,0.375875,1.671233,2.45322,0.449367,0.454393,0.659631,0.614313,0.167256,0.73616,67.123272,145.321984,0.596459
9,10,0.4,0.260044,1.261305,2.155477,0.339144,0.316536,0.579572,0.539928,0.126031,0.862191,26.13049,115.547703,0.632171




Scoring History: 


Unnamed: 0,Unnamed: 1,timestamp,duration,number_of_trees,training_rmse,training_logloss,training_auc,training_pr_auc,training_lift,training_classification_error
0,,2020-08-17 23:24:34,0.077 sec,0.0,0.443379,0.582145,0.5,0.268884,1.0,0.731116
1,,2020-08-17 23:24:35,0.346 sec,1.0,0.429408,0.551762,0.85074,0.666712,3.223204,0.226762
2,,2020-08-17 23:24:35,0.407 sec,2.0,0.417862,0.527899,0.854763,0.674719,3.223204,0.219319
3,,2020-08-17 23:24:35,0.449 sec,3.0,0.408263,0.508564,0.856451,0.676993,3.223204,0.202692
4,,2020-08-17 23:24:35,0.491 sec,4.0,0.400017,0.492062,0.860774,0.690543,3.444723,0.21441
5,,2020-08-17 23:24:35,0.532 sec,5.0,0.393224,0.478499,0.861098,0.691137,3.444723,0.21441
6,,2020-08-17 23:24:35,0.582 sec,6.0,0.387453,0.466933,0.862467,0.694699,3.444723,0.215202
7,,2020-08-17 23:24:35,0.634 sec,7.0,0.382441,0.456747,0.864218,0.698419,3.469059,0.214727
8,,2020-08-17 23:24:35,0.696 sec,8.0,0.378163,0.447928,0.865549,0.70046,3.412353,0.220586
9,,2020-08-17 23:24:35,0.742 sec,9.0,0.374457,0.440173,0.867138,0.702997,3.412353,0.205067



See the whole table with table.as_data_frame()

Variable Importances: 


Unnamed: 0,variable,relative_importance,scaled_importance,percentage
0,Contract,1070.82605,1.0,0.390051
1,tenure,381.158325,0.355948,0.138838
2,TotalCharges,271.245178,0.253305,0.098802
3,MonthlyCharges,271.233459,0.253294,0.098798
4,InternetService,201.522507,0.188194,0.073405
5,OnlineSecurity,140.838989,0.131524,0.051301
6,TechSupport,99.220512,0.092658,0.036141
7,PaymentMethod,65.103767,0.060798,0.023714
8,PaperlessBilling,53.793587,0.050236,0.019594
9,OnlineBackup,28.066605,0.02621,0.010223





In [18]:
perf=gbm.model_performance(Churn_test)
print(perf)


ModelMetricsBinomial: gbm
** Reported on test data. **

MSE: 0.13419722654244862
RMSE: 0.36632939623028976
LogLoss: 0.41281323190516217
Mean Per-Class Error: 0.23170104749052123
AUC: 0.832240858556648
AUCPR: 0.6046400770366149
Gini: 0.664481717113296

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.21390393936092603: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,376.0,170.0,0.3114,(170.0/546.0)
1,1,26.0,145.0,0.152,(26.0/171.0)
2,Total,402.0,315.0,0.2734,(196.0/717.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.213904,0.596708,229.0
1,max f2,0.179685,0.73913,250.0
2,max f0point5,0.552521,0.60698,95.0
3,max accuracy,0.552521,0.814505,95.0
4,max precision,0.884407,1.0,0.0
5,max recall,0.015239,1.0,394.0
6,max specificity,0.884407,1.0,0.0
7,max absolute_mcc,0.213904,0.46076,229.0
8,max min_per_class_accuracy,0.285558,0.747253,196.0
9,max mean_per_class_accuracy,0.213904,0.768299,229.0



Gains/Lift Table: Avg response rate: 23,85 %, avg score: 26,08 %


Unnamed: 0,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
0,1,0.011158,0.844831,3.66886,3.66886,0.875,0.864474,0.875,0.864474,0.040936,0.040936,266.885965,266.885965,0.039104
1,2,0.020921,0.813809,4.192982,3.91345,1.0,0.825135,0.933333,0.846116,0.040936,0.081871,319.298246,291.345029,0.08004
2,3,0.030683,0.784158,3.593985,3.811802,0.857143,0.799366,0.909091,0.831241,0.035088,0.116959,259.398496,281.180223,0.113296
3,4,0.040446,0.762799,2.39599,3.470054,0.571429,0.772404,0.827586,0.817039,0.023392,0.140351,139.598997,247.005445,0.131193
4,5,0.050209,0.74612,1.796992,3.144737,0.428571,0.755416,0.75,0.805057,0.017544,0.157895,79.699248,214.473684,0.141411
5,6,0.100418,0.633335,2.562378,2.853558,0.611111,0.686018,0.680556,0.745537,0.128655,0.28655,156.237817,185.35575,0.244425
6,7,0.150628,0.58502,2.329435,2.67885,0.555556,0.606925,0.638889,0.699333,0.116959,0.403509,132.94347,167.88499,0.33208
7,8,0.200837,0.509906,1.980019,2.504142,0.472222,0.546608,0.597222,0.661152,0.099415,0.502924,98.001949,150.41423,0.396697
8,9,0.299861,0.372156,1.594514,2.203754,0.380282,0.431417,0.525581,0.585286,0.157895,0.660819,59.451446,120.375357,0.474006
9,10,0.400279,0.256515,1.164717,1.943089,0.277778,0.308235,0.463415,0.515782,0.116959,0.777778,16.471735,94.308943,0.495726






# H2O AUTO MACHINE LEARNING

In [19]:
h2O_Churn=H2OAutoML(max_models=10, max_runtime_secs=300,seed=2011974)
h2O_Churn.train(x=preditoras,y=target,training_frame=Churn_train)

#We set to create only 10 models

AutoML progress: |
23:24:38.681: AutoML: XGBoost is not available; skipping it.

████████████████████████████████████████████████████████| 100%


In [20]:
Leader_Border=h2O_Churn.leaderboard
Leader_Border.head()
#Here we can assess the 10 created models. The area under the curve is greater to the Stacked Ensemble method, even higher than the previous GBM model

model_id,auc,logloss,aucpr,mean_per_class_error,rmse,mse
StackedEnsemble_AllModels_AutoML_20200817_232438,0.84828,0.422639,0.666571,0.236078,0.368513,0.135801
StackedEnsemble_BestOfFamily_AutoML_20200817_232438,0.84827,0.422571,0.666312,0.237757,0.36854,0.135821
GBM_5_AutoML_20200817_232438,0.84606,0.417146,0.667514,0.23353,0.367847,0.135311
GLM_1_AutoML_20200817_232438,0.845062,0.419572,0.660366,0.230976,0.368683,0.135927
GBM_1_AutoML_20200817_232438,0.842513,0.422079,0.658136,0.242468,0.370083,0.136961
GBM_2_AutoML_20200817_232438,0.839972,0.425484,0.655241,0.242483,0.371693,0.138156
GBM_3_AutoML_20200817_232438,0.835864,0.43233,0.647215,0.245618,0.37442,0.14019
GBM_grid__1_AutoML_20200817_232438_model_1,0.835342,0.432839,0.646556,0.245624,0.374882,0.140536
DeepLearning_1_AutoML_20200817_232438,0.834227,0.432619,0.639629,0.242038,0.374198,0.140024
GBM_4_AutoML_20200817_232438,0.833592,0.438059,0.641318,0.251998,0.377536,0.142534




In [21]:
h2O_Churn.leader.model_performance(Churn_test)


ModelMetricsBinomialGLM: stackedensemble
** Reported on test data. **

MSE: 0.13229633434902233
RMSE: 0.3637256305912773
LogLoss: 0.41292142664244796
Null degrees of freedom: 716
Residual degrees of freedom: 711
Null deviance: 791.2044330334966
Residual deviance: 592.1293258052702
AIC: 604.1293258052702
AUC: 0.837719298245614
AUCPR: 0.6084403988175298
Gini: 0.675438596491228

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.2579490511824605: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,425.0,121.0,0.2216,(121.0/546.0)
1,1,43.0,128.0,0.2515,(43.0/171.0)
2,Total,468.0,249.0,0.2287,(164.0/717.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.257949,0.609524,204.0
1,max f2,0.156017,0.735151,273.0
2,max f0point5,0.521501,0.619048,113.0
3,max accuracy,0.525478,0.818689,111.0
4,max precision,0.836287,0.9,8.0
5,max recall,0.059048,1.0,387.0
6,max specificity,0.873132,0.998168,0.0
7,max absolute_mcc,0.452627,0.477308,130.0
8,max min_per_class_accuracy,0.235787,0.75641,217.0
9,max mean_per_class_accuracy,0.167122,0.767913,264.0



Gains/Lift Table: Avg response rate: 23,85 %, avg score: 26,12 %


Unnamed: 0,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
0,1,0.011158,0.836987,3.66886,3.66886,0.875,0.853013,0.875,0.853013,0.040936,0.040936,266.885965,266.885965,0.039104
1,2,0.020921,0.824897,3.593985,3.633918,0.857143,0.831088,0.866667,0.842781,0.035088,0.076023,259.398496,263.391813,0.07236
2,3,0.030683,0.813556,3.593985,3.621212,0.857143,0.817628,0.863636,0.834778,0.035088,0.111111,259.398496,262.121212,0.105617
3,4,0.040446,0.805721,2.39599,3.325469,0.571429,0.808849,0.793103,0.82852,0.023392,0.134503,139.598997,232.546884,0.123514
4,5,0.050209,0.776776,3.593985,3.37768,0.857143,0.796511,0.805556,0.822296,0.035088,0.169591,259.398496,237.768031,0.15677
5,6,0.100418,0.690752,2.67885,3.028265,0.638889,0.726144,0.722222,0.77422,0.134503,0.304094,167.88499,202.826511,0.267464
6,7,0.150628,0.601014,2.329435,2.795322,0.555556,0.645621,0.666667,0.731353,0.116959,0.421053,132.94347,179.532164,0.355119
7,8,0.200837,0.505407,2.329435,2.67885,0.555556,0.559664,0.638889,0.688431,0.116959,0.538012,132.94347,167.88499,0.442774
8,9,0.299861,0.319259,1.299234,2.223256,0.309859,0.40641,0.530233,0.595298,0.128655,0.666667,29.9234,122.325581,0.481685
9,10,0.400279,0.1987,0.99001,1.91387,0.236111,0.258501,0.456446,0.510806,0.099415,0.766082,-0.999025,91.387004,0.480368







# H20 DEEP LEARNING

In [22]:
h2o_Churn_DL=H2ODeepLearningEstimator(hidden=[50,50],epochs=30,adaptive_rate=True)
h2o_Churn_DL.train(x=preditoras, y=target,training_frame=Churn_train)

deeplearning Model Build progress: |██████████████████████████████████████| 100%


In [23]:
h2o_Churn_DL.model_performance(Churn_test)


ModelMetricsBinomial: deeplearning
** Reported on test data. **

MSE: 0.16344852955546937
RMSE: 0.40428768167663653
LogLoss: 0.5015152241613045
Mean Per-Class Error: 0.2772154745838956
AUC: 0.7789184499710815
AUCPR: 0.5110336631860659
Gini: 0.557836899942163

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.28122167926702524: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,371.0,175.0,0.3205,(175.0/546.0)
1,1,40.0,131.0,0.2339,(40.0/171.0)
2,Total,411.0,306.0,0.2999,(215.0/717.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.281222,0.549266,226.0
1,max f2,0.038907,0.690236,348.0
2,max f0point5,0.667897,0.508607,90.0
3,max accuracy,0.78767,0.783821,42.0
4,max precision,0.960756,1.0,0.0
5,max recall,0.001263,1.0,398.0
6,max specificity,0.960756,1.0,0.0
7,max absolute_mcc,0.281222,0.383909,226.0
8,max min_per_class_accuracy,0.329571,0.716117,203.0
9,max mean_per_class_accuracy,0.281222,0.722785,226.0



Gains/Lift Table: Avg response rate: 23,85 %, avg score: 28,51 %


Unnamed: 0,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
0,1,0.011158,0.920419,3.66886,3.66886,0.875,0.933608,0.875,0.933608,0.040936,0.040936,266.885965,266.885965,0.039104
1,2,0.020921,0.896267,1.796992,2.795322,0.428571,0.9059,0.666667,0.920678,0.017544,0.05848,79.699248,179.532164,0.049322
2,3,0.030683,0.857488,2.994987,2.858852,0.714286,0.875638,0.681818,0.906347,0.02924,0.087719,199.498747,185.885167,0.074899
3,4,0.040446,0.83076,3.593985,3.036298,0.857143,0.845642,0.724138,0.891694,0.035088,0.122807,259.398496,203.629764,0.108155
4,5,0.050209,0.813312,1.796992,2.795322,0.428571,0.823917,0.666667,0.878515,0.017544,0.140351,79.699248,179.532164,0.118373
5,6,0.100418,0.749343,1.747076,2.271199,0.416667,0.783137,0.541667,0.830826,0.087719,0.22807,74.707602,127.119883,0.167631
6,7,0.150628,0.687711,2.096491,2.212963,0.5,0.7173,0.527778,0.792984,0.105263,0.333333,109.649123,121.296296,0.239927
7,8,0.200837,0.572871,2.096491,2.183845,0.5,0.631455,0.520833,0.752602,0.105263,0.438596,109.649123,118.384503,0.312223
8,9,0.299861,0.426587,1.476402,1.950224,0.352113,0.499467,0.465116,0.669009,0.146199,0.584795,47.640227,95.02244,0.374173
9,10,0.400279,0.31161,1.455897,1.826212,0.347222,0.371503,0.43554,0.594373,0.146199,0.730994,45.589669,82.621187,0.434291







In [24]:
h2o_Churn_DL.mse()

0.11634733529650682

## The champion model was the Stacked Ensemble, so let's see its predictions

In [25]:
Y_Prediction=h2O_Churn.leader.predict(Churn_test)
Y_Prediction

stackedensemble prediction progress: |████████████████████████████████████| 100%


predict,p0,p1
1,0.596173,0.403827
0,0.937988,0.0620121
0,0.689467,0.310533
0,0.897513,0.102487
1,0.405414,0.594586
1,0.56906,0.43094
0,0.887162,0.112838
0,0.879779,0.120221
0,0.928138,0.0718619
1,0.329644,0.670356




In [28]:
ID_campeao=h2O_Churn.leader.model_id
ID_campeao

'StackedEnsemble_AllModels_AutoML_20200817_232438'

In [29]:
Model_Parameter=h2o.get_model(ID_campeao)
Model_Parameter

Model Details
H2OStackedEnsembleEstimator :  Stacked Ensemble
Model Key:  StackedEnsemble_AllModels_AutoML_20200817_232438

No model summary for this model

ModelMetricsBinomialGLM: stackedensemble
** Reported on train data. **

MSE: 0.10895491173018024
RMSE: 0.33008318910568624
LogLoss: 0.35047365178227824
Null degrees of freedom: 6314
Residual degrees of freedom: 6309
Null deviance: 7352.495006004923
Residual deviance: 4426.482222010173
AIC: 4438.482222010173
AUC: 0.9110182755234725
AUCPR: 0.7972253631083965
Gini: 0.822036551046945

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.2662591032886267: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,3804.0,813.0,0.1761,(813.0/4617.0)
1,1,307.0,1391.0,0.1808,(307.0/1698.0)
2,Total,4111.0,2204.0,0.1774,(1120.0/6315.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.266259,0.712968,250.0
1,max f2,0.134971,0.814874,319.0
2,max f0point5,0.582204,0.733333,123.0
3,max accuracy,0.460657,0.845131,170.0
4,max precision,0.904449,1.0,0.0
5,max recall,0.080406,1.0,363.0
6,max specificity,0.904449,1.0,0.0
7,max absolute_mcc,0.266259,0.598212,250.0
8,max min_per_class_accuracy,0.264408,0.820377,251.0
9,max mean_per_class_accuracy,0.192772,0.823739,286.0



Gains/Lift Table: Avg response rate: 26,89 %, avg score: 26,92 %


Unnamed: 0,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
0,1,0.010135,0.864207,3.719081,3.719081,1.0,0.879371,1.0,0.879371,0.037691,0.037691,271.908127,271.908127,0.037691
1,2,0.020111,0.846889,3.660048,3.689797,0.984127,0.855825,0.992126,0.867691,0.036514,0.074205,266.004824,268.979717,0.073988
2,3,0.030087,0.833666,3.660048,3.679933,0.984127,0.840015,0.989474,0.858514,0.036514,0.110718,266.004824,267.993305,0.110285
3,4,0.040063,0.824127,3.660048,3.674981,0.984127,0.828707,0.988142,0.851092,0.036514,0.147232,266.004824,267.498149,0.146582
4,5,0.05004,0.811471,3.364883,3.613158,0.904762,0.817454,0.971519,0.844385,0.033569,0.180801,236.488306,261.315807,0.178852
5,6,0.100079,0.718798,3.142388,3.377773,0.844937,0.764425,0.908228,0.804405,0.157244,0.338045,214.238829,237.777318,0.325482
6,7,0.150119,0.62787,2.589234,3.114927,0.696203,0.674201,0.837553,0.761004,0.129564,0.467609,158.92338,211.492672,0.434254
7,8,0.2,0.531385,2.255062,2.900471,0.606349,0.5818,0.779889,0.716309,0.112485,0.580094,125.506198,190.047114,0.519882
8,9,0.300079,0.339456,1.694771,2.498359,0.455696,0.432018,0.671768,0.621496,0.169611,0.749706,69.477121,149.835908,0.614986
9,10,0.4,0.207601,1.231835,2.181979,0.33122,0.26782,0.586698,0.533147,0.123086,0.872792,23.183516,118.19788,0.646671




ModelMetricsBinomialGLM: stackedensemble
** Reported on cross-validation data. **

MSE: 0.13580146905250012
RMSE: 0.36851250867847096
LogLoss: 0.42263903311982504
Null degrees of freedom: 6314
Residual degrees of freedom: 6309
Null deviance: 7356.739768857438
Residual deviance: 5337.93098830339
AIC: 5349.93098830339
AUC: 0.8482796971197497
AUCPR: 0.6665711546853638
Gini: 0.6965593942394994

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.30500930156720935: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,3783.0,834.0,0.1806,(834.0/4617.0)
1,1,495.0,1203.0,0.2915,(495.0/1698.0)
2,Total,4278.0,2037.0,0.2105,(1329.0/6315.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.305009,0.644177,234.0
1,max f2,0.116105,0.750873,337.0
2,max f0point5,0.521812,0.641423,147.0
3,max accuracy,0.521812,0.805384,147.0
4,max precision,0.906656,1.0,0.0
5,max recall,0.054119,1.0,399.0
6,max specificity,0.906656,1.0,0.0
7,max absolute_mcc,0.368613,0.501301,209.0
8,max min_per_class_accuracy,0.233425,0.766784,266.0
9,max mean_per_class_accuracy,0.200804,0.770429,283.0



Gains/Lift Table: Avg response rate: 26,89 %, avg score: 26,86 %


Unnamed: 0,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
0,1,0.010135,0.863577,3.312307,3.312307,0.890625,0.87716,0.890625,0.87716,0.033569,0.033569,231.230676,231.230676,0.032053
1,2,0.020111,0.844495,2.951652,3.133399,0.793651,0.852943,0.84252,0.865147,0.029446,0.063015,195.16518,213.339918,0.058683
2,3,0.030087,0.832989,3.010685,3.09271,0.809524,0.837812,0.831579,0.856083,0.030035,0.093051,201.068484,209.270969,0.08612
3,4,0.040063,0.819571,3.364883,3.160484,0.904762,0.826653,0.849802,0.848755,0.033569,0.12662,236.488306,216.048408,0.118389
4,5,0.05004,0.802712,2.833586,3.095311,0.761905,0.810159,0.832278,0.84106,0.028269,0.154888,183.358573,209.531131,0.143409
5,6,0.100079,0.716085,2.624542,2.859926,0.705696,0.761348,0.768987,0.801204,0.131331,0.286219,162.454153,185.992642,0.254597
6,7,0.150119,0.619933,2.212618,2.644157,0.594937,0.667902,0.71097,0.75677,0.110718,0.396938,121.261797,164.415694,0.337592
7,8,0.2,0.523716,2.266869,2.550059,0.609524,0.572644,0.685669,0.710848,0.113074,0.510012,126.686858,155.005889,0.424025
8,9,0.300079,0.338539,1.671233,2.256962,0.449367,0.431045,0.60686,0.617531,0.167256,0.677267,67.123272,125.696225,0.515907
9,10,0.4,0.209118,1.14932,1.980271,0.309033,0.267788,0.532462,0.530165,0.114841,0.792108,14.931989,98.027091,0.536315





