In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

from tqdm import tqdm
import h2o
from h2o.automl import H2OAutoML


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df_train = pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/train.csv')
df_test = pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/test.csv')
df_train.date_time = df_train.date_time.astype('datetime64[ns]')
df_test.date_time = df_test.date_time.astype('datetime64[ns]')

In [None]:
h2o.init(nthreads=-1)

In [None]:
hf_train = h2o.H2OFrame(df_train)
hf_test = h2o.H2OFrame(df_test)

## Predict Carbon monoxide

In [None]:
## run model for carbon monoxide
features = [x for x in hf_train.columns if x not in ['target_carbon_monoxide', 'target_benzene', 'target_nitrogen_oxides']]

target_carbon_monoxide = H2OAutoML(
    max_runtime_secs=360,
    stopping_metric='RMSLE',
    sort_metric='RMSLE'
)

target_carbon_monoxide.train(x=features, y='target_carbon_monoxide', training_frame=hf_train)

In [None]:
## check leaderboard for carbon monoxide
target_carbon_monoxide.leaderboard

## Preict Benzene

In [None]:
target_benzene = H2OAutoML(
    max_runtime_secs=360,
    stopping_metric='RMSLE',
    sort_metric='RMSLE'
)

target_benzene.train(x=features, y='target_benzene', training_frame=hf_train)

In [None]:
# check leaderboard for benzene
target_benzene.leaderboard

## Predict Nitrogen Oxides

In [None]:
target_nitrogen_oxides = H2OAutoML(
    max_runtime_secs=720,
    stopping_metric='RMSLE',
    sort_metric='RMSLE'
)

target_nitrogen_oxides.train(x=features, y='target_nitrogen_oxides', training_frame=hf_train)

In [None]:
# check leaderboard for nitrogen oxides
target_nitrogen_oxides.leaderboard

In [None]:
submission = pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/sample_submission.csv')
submission

In [None]:
submission.target_carbon_monoxide = target_carbon_monoxide.leader.predict(hf_test).as_data_frame()
submission.target_benzene = target_benzene.leader.predict(hf_test).as_data_frame()
submission.target_nitrogen_oxides = target_nitrogen_oxides.leader.predict(hf_test).as_data_frame()
submission




In [None]:
submission.describe()

In [None]:
submission.to_csv('h2o_base_submission.csv',index=False)