In [None]:
!pip install lightautoml

In [None]:
# Data Handling 
import pandas as pd 
import numpy as np 
import datatable as dt

# AutoML
from sklearn.model_selection import train_test_split
from lightautoml.automl.base import AutoML
from lightautoml.ml_algo.boost_lgbm import BoostLGBM
from lightautoml.ml_algo.tuning.optuna import OptunaTuner
from lightautoml.pipelines.features.lgb_pipeline import LGBSimpleFeatures
from lightautoml.pipelines.ml.base import MLPipeline
from lightautoml.pipelines.selection.importance_based import ImportanceCutoffSelector, ModelBasedImportanceEstimator
from lightautoml.reader.base import PandasToPandasReader
from lightautoml.tasks import Task
from lightautoml.automl.blend import WeightedBlender
from lightautoml.dataset.roles import DatetimeRole
from lightautoml.automl.presets.tabular_presets import TabularAutoML, TabularUtilizedAutoML

# Evaluation
from sklearn.metrics import roc_auc_score

# Preprocessing

In [None]:
# Import data
df_train = pd.read_csv("../input/tabular-playground-series-sep-2021/train.csv")
df_train.head()

In [None]:
# Train test split
train, test = train_test_split(df_train, test_size=0.3, random_state=0)
train.head()

In [None]:
train.drop(columns=["id"], inplace=True)
test.drop(columns=["id"], inplace=True)

In [None]:
task = Task("binary")
reader = PandasToPandasReader(task, cv=5, random_state=0)

# Model Development

In [None]:
autoML = TabularAutoML(task=task, 
                      general_params = {"nested_cv":False, 
                                     "use_algos":[["linear_l1", "lgb", "lgb_tuned"]]}, 
                      reader_params = {"cv":5, "random_state":0}, 
                      tuning_params = {"max_tuning_iter":20, "max_tuning_time":30}, 
                      lgb_params = {"default_params":{"num_threads":8}})
oof_pred = autoML.fit_predict(train, roles = {"target":"claim", "drop":"id"})

# Submission

In [None]:
test_df = pd.read_csv("../input/tabular-playground-series-sep-2021/test.csv")
test_df.head()

In [None]:
test_data = test_df.drop(columns=["id"])

In [None]:
test_predictions = autoML.predict(test_data)

In [None]:
# Export results 
output = pd.DataFrame({"id":test_df["id"], 
                      "claim":test_predictions.data[:,0]})
output

In [None]:
output.to_csv("submission.csv", index=False)