In [None]:
#import libraries

import numpy as np
import pandas as pd

import h2o
from h2o.automl import H2OAutoML

In [None]:
#load data

train = pd.read_csv('../input/tabular-playground-series-apr-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-apr-2021/test.csv')
sample = pd.read_csv('../input/tabular-playground-series-apr-2021/sample_submission.csv')

In [None]:
#initialise h2o instance

h2o.init(max_mem_size='16g',nthreads=-1)

In [None]:
#copy the data into h2o dataframes and specify the features and target variables

train_h2o = h2o.H2OFrame(train.copy())
test_h2o = h2o.H2OFrame(test.copy())


train_h2o['Survived'] = train_h2o['Survived'].asfactor()

X = train_h2o.columns[2:]
y = 'Survived'

In [None]:
#train

aml = H2OAutoML(
    nfolds=5,
    sort_metric='auc',
    stopping_metric='auc',
    max_runtime_secs=60*120,
    seed=0,
    exclude_algos = ["DeepLearning"]
)

aml.train(x=X, y=y, training_frame=train_h2o)

In [None]:
#View the AutoML Leaderboard

lb = aml.leaderboard
lb.head(rows=lb.nrows)

In [None]:
#predict

output = aml.leader.predict(test_h2o)
y_pred = h2o.as_list(output['p1'])

In [None]:
output.head()

In [None]:
#create submission 

sample['Survived'] = y_pred
sample['Survived'] = np.where(y_pred.values>= 0.5, 1, 0)
sample.head()

In [None]:
#save submission

sample.to_csv("h2o automl 1 hour.csv",index=False)