# Libraries

In [None]:
!pip install autogluon

In [None]:
import numpy as np
import pandas as pd
import os
from autogluon.tabular import TabularPredictor, TabularDataset
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

In [None]:
train = pd.read_csv("/kaggle/input/tabular-playground-series-sep-2021/train.csv").drop('id', axis=1)
test = pd.read_csv('/kaggle/input/tabular-playground-series-sep-2021/test.csv').drop('id', axis=1)
train.head()

# Missing Values and Feature Engineering

In [None]:
#To impute and scale the data:
pipeline = Pipeline([('impute', SimpleImputer(strategy='mean')), ('scale', StandardScaler())])

In [None]:
#First, drop the claim column:
temp = train['claim']
train.drop(columns = ['claim'])

In [None]:
#Credit to https://www.kaggle.com/mlanhenke/tps-09-single-catboostclassifier for this idea.
#Add in some useful features
train['min'] = train.min(axis=1)
train['max'] = train.max(axis=1)
train['sum'] = train.isna().sum(axis=1)
train['mean'] = train.mean(axis=1)
train['std'] = train.std(axis=1)
test['min'] = test.min(axis=1)
test['max'] = test.max(axis=1)
test['sum'] = test.isna().sum(axis=1)
test['mean'] = test.mean(axis=1)
test['std'] = test.std(axis=1)

In [None]:
#Lastly, impute the data:
train = pd.DataFrame(columns = train.columns, data=pipeline.fit_transform(train))
test = pd.DataFrame(columns = test.columns, data=pipeline.fit_transform(test))
train['claim'] = temp
train.head()

# Modeling

In [None]:
model = TabularPredictor(label = 'claim', eval_metric = 'roc_auc')
model.fit(train_data=train, presets='best_quality', time_limit = 7 * 3600) #7 hours

In [None]:
model.leaderboard()

# Submission

In [None]:
predictions_autogluon = model.predict_proba(test)
predictions_autogluon = predictions_autogluon.reset_index()
predictions_autogluon.head()

In [None]:
sample_solution = pd.read_csv('/kaggle/input/tabular-playground-series-sep-2021/sample_solution.csv')
sample_solution['claim'] = predictions_autogluon[1]
sample_solution.head()

In [None]:
sample_solution.to_csv('submission.csv', index=False)