# Packages

In [None]:
!pip install lightautoml

In [None]:
# Data handling
import pandas as pd 
import numpy as np

# AutoML
from lightautoml.automl.base import AutoML
from lightautoml.ml_algo.boost_lgbm import BoostLGBM
from lightautoml.ml_algo.tuning.optuna import OptunaTuner
from lightautoml.pipelines.features.lgb_pipeline import LGBSimpleFeatures
from lightautoml.pipelines.ml.base import MLPipeline
from lightautoml.pipelines.selection.importance_based import ImportanceCutoffSelector, ModelBasedImportanceEstimator
from lightautoml.reader.base import PandasToPandasReader
from lightautoml.tasks import Task

In [None]:
# Import dataset 
df_train = pd.read_csv("../input/tabular-playground-series-oct-2021/train.csv")
df_train.head()

# Model Development

In [None]:
# Create task and pandas reader
task = Task("binary", metric="auc")
reader = PandasToPandasReader(task, cv=5, random_state=100)

In [None]:
# Create default LightGBM model 
model_0 = BoostLGBM(default_params = {"learning_rate": 0.05, "num_leaves": 150, "seed": 45, "num_threads": 4})
pipe_0 = LGBSimpleFeatures()
mbie = ModelBasedImportanceEstimator()
selector = ImportanceCutoffSelector(pipe_0, model_0, mbie, cutoff=0)

In [None]:
# Create first level of ML Pipeline for AutoML
pipe = LGBSimpleFeatures()
params_tuner1 = OptunaTuner(n_trials=50, timeout=30)
model_1 = BoostLGBM(default_params = {"learning_rate":0.05, "num_leaves":150, "seed":1, "num_threads":4})
model_2 = BoostLGBM(default_params = {"learning_rate":0.025, "num_leaves":50, "seed":1, "num_threads":4})

# run pipeline 
pipeline_level1 = MLPipeline([
    (model_1, params_tuner1), 
    model_2
], pre_selection=selector, features_pipeline=pipe, post_selection=None)

In [None]:
# Create second level of ML Pipeline for AutoML
pipe_1 = LGBSimpleFeatures()
model = BoostLGBM(default_params = {"learning_rate":0.05, "num_leaves":50, "seed":1, "num_threads":4}, 
                 freeze_defaults=True)
pipeline_level2 = MLPipeline([model], pre_selection=None, features_pipeline=pipe_1, post_selection=None)

In [None]:
autoML = AutoML(reader, [[pipeline_level1], [pipeline_level2]], skip_conn=False)

In [None]:
# Train AutoML model
oof_pred = autoML.fit_predict(df_train, roles={"target":"target", "drop":"id"})

# Submission

In [None]:
# Import test data
test_df = pd.read_csv("../input/tabular-playground-series-oct-2021/test.csv")
test_df.head()

In [None]:
# Predictions
test_predictions = autoML.predict(test_df)

In [None]:
# Export results 
output = pd.DataFrame({"id":test_df["id"], 
                      "target": test_predictions.data[:, 0]})
output.to_csv("submission.csv", index=False)