# Directory settings

In [None]:
# ====================================================
# Directory settings
# ====================================================
import os

OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

# CFG

In [None]:
# ====================================================
# CFG
# ====================================================
class CFG:   
    seed=2021
    n_fold=5
    max_model=20

# Import Libraries

In [None]:
import random

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Utils

In [None]:
# ====================================================
# Utils
# ====================================================
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(seed=CFG.seed)

# Data Loading

In [None]:
train = pd.read_csv('/kaggle/input/tabular-playground-series-apr-2021/train.csv')
test = pd.read_csv('/kaggle/input/tabular-playground-series-apr-2021/test.csv')
submission = pd.read_csv('/kaggle/input/tabular-playground-series-apr-2021/sample_submission.csv')

# Basic EDA

In [None]:
display(train.head())
display(test.head())

In [None]:
display(train.describe())
display(test.describe())

In [None]:
display(train.shape)
display(test.shape)

# H2O

In [None]:
import h2o
from h2o.automl import H2OAutoML

In [None]:
h2o.init(max_mem_size='16G', nthreads=16)

In [None]:
train_df = h2o.H2OFrame(train.copy())

In [None]:
train_df['Survived'] = train_df['Survived'].asfactor()

x = train_df.columns[2:]
y = 'Survived'

In [None]:
display(train_df.head())

In [None]:
aml = H2OAutoML(
    max_models=CFG.max_model, 
    seed=CFG.seed, 
    max_runtime_secs=10800,
    nfolds = CFG.n_fold,
    exclude_algos = ["DeepLearning"]
)
aml.train(x=x, y=y, training_frame=train_df)

In [None]:
lb = aml.leaderboard 
lb.head(rows = lb.nrows)

# Make a submission

In [None]:
preds = aml.predict(h2o.H2OFrame(test.copy()))
preds_df = h2o.as_list(preds)

submission['Survived'] = preds_df['predict']
submission.to_csv('submission.csv', index=False)
display(submission.head())