In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import h2o

In [None]:
h2o.init()

In [None]:
train = pd.read_csv('../input/mnist-in-csv/mnist_train.csv')
test = pd.read_csv('../input/mnist-in-csv/mnist_test.csv')

In [None]:
train.head()

In [None]:
test.head()

In [None]:
trainh = h2o.H2OFrame(train)
testh = h2o.H2OFrame(test)

In [None]:
train.head()

In [None]:
test.head()

In [None]:
trainh.names

In [None]:
trainh.types

In [None]:
trainh.describe()

In [None]:
train_h,test_h,valid_h = trainh.split_frame([0.6,0.2])

In [None]:
y = 'label'

In [None]:
x = trainh.names

In [None]:
x.remove(y)

In [None]:
import math
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.grid.grid_search import H2OGridSearch

In [None]:
gbm = H2OGradientBoostingEstimator()

In [None]:
gbm.train(x=x,y=y,training_frame=train_h)

In [None]:
print(gbm)

In [None]:
rf = h2o.estimators.H2ORandomForestEstimator()

In [None]:
rf.train(x=x,y=y,training_frame=train_h,validation_frame=valid_h)

In [None]:
print(rf)

In [None]:
cv_gbm = H2OGradientBoostingEstimator(nfolds = 4, seed = 0xDECAF)
cv_gbm.train(x = x, y = y, training_frame = train_h.rbind(valid_h))

In [None]:
gbm_params1 = {'learn_rate': [0.01, 0.1],
                'max_depth': [3, 5, 9],
                'sample_rate': [0.8, 1.0],
                'col_sample_rate': [0.2, 0.5, 1.0]}

In [None]:
gbm_grid1 = H2OGridSearch(model=H2OGradientBoostingEstimator,
                          grid_id='gbm_grid1',
                          hyper_params=gbm_params1)


In [None]:
gbm_grid1.train(x=x, y=y,
                training_frame=train_h,
                validation_frame=valid_h,
                ntrees=100,stopping_metric = "AUC",
                seed=1)

In [None]:
print(gbm_grid1)

In [None]:
gbm_params2 = {'learn_rate': [i * 0.01 for i in range(1, 11)],
                'max_depth': list(range(2, 11)),
                'sample_rate': [i * 0.1 for i in range(5, 11)],
                'col_sample_rate': [i * 0.1 for i in range(1, 11)]}

# Search criteria
search_criteria = {'strategy': 'RandomDiscrete', 'max_models': 36, 'seed': 1}

# Train and validate a random grid of GBMs
gbm_grid2 = H2OGridSearch(model=H2OGradientBoostingEstimator,
                          grid_id='gbm_grid2',
                          hyper_params=gbm_params2,
                          search_criteria=search_criteria)
gbm_grid2.train(x=x, y=y,
                training_frame=train_h,
                validation_frame=valid_h,
                ntrees=100,
                seed=1)

In [None]:
print(gbm_grid2)

In [None]:
gbm_params2 = {'learn_rate': [i * 0.01 for i in range(1, 11)],
                'max_depth': list(range(2, 11)),
                'sample_rate': [i * 0.1 for i in range(5, 11)],
                'col_sample_rate': [i * 0.1 for i in range(1, 11)]}

# Search criteria
search_criteria = {'strategy': 'RandomDiscrete', 'max_models': 36, 'seed': 1}

# Train and validate a random grid of GBMs
gbm_grid2 = H2OGridSearch(model=H2OGradientBoostingEstimator,
                          grid_id='gbm_grid2',
                          hyper_params=gbm_params2,
                          search_criteria=search_criteria)
gbm_grid2.train(x=x, y=y,
                training_frame=train_h,
                validation_frame=valid_h,
                ntrees=100,
                seed=1)

In [None]:
from h2o.automl import H2OAutoML

In [None]:
aml = H2OAutoML(max_models=25, seed=1)
aml.train(x=x, y=y, training_frame=train_h)

In [None]:
lb = aml.leaderboard
lb

In [None]:
preds = aml.leader.predict(test_h)

In [None]:
preds

In [None]:
preds = aml.predict(test_h)

In [None]:
preds

In [None]:
aml.sort_metric

In [None]:


aml.leaderboard



In [None]:
rom h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator

nfolds = 5

# There are a few ways to assemble a list of models to stack together:
# 1. Train individual models and put them in a list
# 2. Train a grid of models
# 3. Train several grids of models
# Note: All base models must have the same cross-validation folds and
# the cross-validated predicted values must be kept.


# 1. Generate a 2-model ensemble (GBM + RF)

# Train and cross-validate a GBM
my_gbm = H2OGradientBoostingEstimator(
                                      ntrees=10,
                                      max_depth=3,
                                      min_rows=2,
                                      learn_rate=0.2,
                                      nfolds=nfolds,
                                      fold_assignment="Modulo",
                                      keep_cross_validation_predictions=True,
                                      seed=1)
my_gbm.train(x=x, y=y, training_frame=train_h)


# Train and cross-validate a RF
my_rf = H2ORandomForestEstimator(ntrees=50,
                                 nfolds=nfolds,
                                 fold_assignment="Modulo",
                                 keep_cross_validation_predictions=True,
                                 seed=1)
my_rf.train(x=x, y=y, training_frame=train_h)


# Train a stacked ensemble using the GBM and GLM above
ensemble = H2OStackedEnsembleEstimator(model_id="my_ensemble_binomiale",
                                       base_models=[my_gbm, my_rf])
ensemble.train(x=x, y=y, training_frame=train_h)

# Eval ensemble performance on the test data
perf_stack_test = ensemble.model_performance(test_h)



In [None]:
# Compare to base learner performance on the test set
perf_gbm_test = my_gbm.model_performance(test_h)
perf_rf_test = my_rf.model_performance(test_h)
baselearner_best_auc_test = max(perf_gbm_test.mse(), perf_rf_test.mse())
stack_auc_test = perf_stack_test.mse()
print("Best Base-learner Test MSE:  {0}".format(baselearner_best_auc_test))
print("Ensemble Test MSE:  {0}".format(stack_auc_test))

# Generate predictions on a test set (if neccessary)
pred = ensemble.predict(test_h)

In [None]:
pred

In [None]:
from h2o.estimators import deeplearning
m = h2o.estimators.deeplearning.H2ODeepLearningEstimator()
m.train(x, y, train_h)
p = m.predict(test_h)


In [None]:
p

In [None]:
r2 = m.r2()
mse = m.mse()
rmse = m.rmse()

In [None]:
mse

In [None]:
from h2o.estimators.deeplearning import H2ODeepLearningEstimator

In [None]:
model = H2ODeepLearningEstimator(
distribution="multinomial",
activation="RectifierWithDropout",
hidden=[128,128,128,128],
input_dropout_ratio=0.2,
sparse=True,
l1=1e-5,
epochs=10)

In [None]:


model.train(
x=x,
y=y,
training_frame=train_h,
validation_frame=test_h)



In [None]:
model

In [None]:
model.predict(test_h)