In [None]:
!pip install -U kaggle xgboost lightgbm flaml sweetviz catboost imbalanced-learn scikit-learn

In [None]:
import pandas as pd

df = pd.read_csv('../input/tabular-playground-series-mar-2021/train.csv', index_col='id')
df_test = pd.read_csv('../input/tabular-playground-series-mar-2021/test.csv', index_col='id')

print(df.shape)
print(df_test.shape)
res = pd.concat([df, df_test])
print(res.shape)

In [None]:
import numpy as np
import pandas as pd
from flaml import AutoML

from sklearn import compose as comp
from sklearn import preprocessing as prep
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.compose import make_column_selector as selector

from imblearn.pipeline import Pipeline
from imblearn import over_sampling as over
from imblearn import under_sampling as under

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value=-999)),
    #('scaler', prep.MinMaxScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer_nan', SimpleImputer(missing_values=np.nan, strategy='constant', fill_value='missing')),
    ('imputer_none', SimpleImputer(missing_values=None, strategy='constant', fill_value='missing')),
    ('ordinal', prep.OrdinalEncoder(handle_unknown='ignore')),
])

preprocessor = comp.ColumnTransformer(transformers=[
    ('num', numeric_transformer, selector(dtype_exclude="object")),
    ('cat', categorical_transformer, selector(dtype_include="object"))
])

pipe = Pipeline(steps=[
    ('preprocessor', preprocessor),
    #('under', RandomUnderSampler(sampling_strategy=0.5, random_state=42)),
    #('over', over.SMOTE(random_state=42)),
    #('classifier', automl)
])


X = df.drop(['target'], axis=1)
y = df.target
X_all = pd.concat([X, df_test])
preprocessor.fit(X_all)
X = preprocessor.transform(X)
X, y = over.SMOTE(random_state=42, n_jobs=-1).fit_resample(X, y)
print(X.shape)
print(y.value_counts())
#X = pipe.fit_transform(X)
X_train,X_val,y_train,y_val = train_test_split(
    X, 
    y, 
    test_size = 0.2, 
    random_state = 42
)

automl = AutoML()
automl_settings = {
    "metric": 'mse',
    "task": 'regression',
    "estimator_list": ['lgbm', 'xgboost', 'catboost'],
    "n_jobs": -1,
}
automl.fit(X_train, y_train, **automl_settings)

In [None]:
from sklearn.metrics import roc_auc_score

print(roc_auc_score(y_val, automl.predict(X_val), average="micro"))
print(roc_auc_score(y_train, automl.predict(X_train), average="micro"))

In [None]:
df_test = pd.read_csv('../input/tabular-playground-series-mar-2021/test.csv')
X_test = df_test.drop(['id'], axis=1)
X_test = preprocessor.transform(X_test)
df_test['target'] = automl.predict(X_test)
sub = df_test[['id', 'target']]
sub.head()
sub.to_csv('submission.csv', index=False)