In [None]:
import numpy as np 
import pandas as pd 

import h2o
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
from h2o.estimators.random_forest import H2ORandomForestEstimator
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator

h2o.init()

In [None]:
train = h2o.import_file("../input/elo-feature-engineering/train.csv")

In [None]:
test = h2o.import_file("../input/elo-feature-engineering/test.csv")

In [None]:
for category in ["feature_1", "feature_2", "feature_3", "year", "weekofyear", "month"]:
    train[category] = train[category].asfactor()
    test[category] = test[category].asfactor()
    
predictors = train.columns
response = "target"
predictors.remove(response)
predictors.remove("card_id")
predictors.remove("first_active_month")

In [None]:
nfolds = 6

In [None]:
glm = H2OGeneralizedLinearEstimator(nfolds=nfolds,
                                    fold_assignment="Modulo",
                                    keep_cross_validation_predictions=True,
                                    seed=42,
                                    alpha=0.5,
                                    lambda_=0.5)
glm.train(x=predictors, y=response, training_frame=train)
glm

In [None]:
rf = H2ORandomForestEstimator(nfolds=nfolds,
                              fold_assignment="Modulo",
                              keep_cross_validation_predictions=True,
                              seed=42)
rf.train(x=predictors, y=response, training_frame=train)
rf

In [None]:
gbm = H2OGradientBoostingEstimator(nfolds=nfolds,
                                   fold_assignment="Modulo",
                                   keep_cross_validation_predictions=True,
                                   seed=42,
                                   max_depth = 6,
                                   ntrees = 10000,
                                   learn_rate=0.01,
                                   learn_rate_annealing=0.999,
                                   stopping_rounds = 5,
                                   stopping_tolerance = 0.001,
                                   stopping_metric = "rmse",
                                   sample_rate = 0.8,
                                   col_sample_rate = 0.8,
                                   score_tree_interval = 10)
gbm.train(x=predictors, y=response, training_frame=train)
gbm

In [None]:
ensemble = H2OStackedEnsembleEstimator(base_models=[glm, rf, gbm])
ensemble.train(x=predictors, y=response, training_frame=train)
ensemble

In [None]:
predictions = ensemble.predict(test)

In [None]:
sub_df = pd.DataFrame({"card_id": test["card_id"].as_data_frame().card_id})
sub_df["target"] = predictions.as_data_frame()   
sub_df.to_csv("submit.csv", index=False)