In [None]:
!pip install pycaret

# Import Libraries

**PyCaret has quickly become my favourite AutoML/StructuredML library! In this notebook I wanted to share that with you. I still believe that the library has a ways to go especially with optimizing runtime and with the documentation (its so bad) but it has come a long way!**

In [None]:
import pandas as pd
import numpy as np

from pycaret.classification import *

import gc
train_path = '../input/tabular-playground-series-jun-2021/train.csv'
test_path = '../input/tabular-playground-series-jun-2021/test.csv'
RS = 69420

# Preprocess & Setup Data Pipeline

In [None]:
df = pd.read_csv(train_path, index_col=0)

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['target'] = le.fit_transform(df['target'])

df

In [None]:
%%time
# Fixing Imbalance takes ages
clf = setup(data=df,
            target='target',
            use_gpu=True,
            silent=True)

In [None]:
gc.collect()

# Tuned LGBMClassifier

In [None]:
%%time
lgbm = tune_model(create_model('lightgbm'),
                  optimize='AUC',
                  early_stopping=True,
                  early_stopping_max_iters=50,
                  choose_better=True,
                  search_library='optuna')

In [None]:
plot_model(lgbm)

In [None]:
plot_model(lgbm, plot='confusion_matrix')

# Tuned XGBClassifier

In [None]:
%%time
xgb = tune_model(create_model('xgboost'),
                  optimize='AUC',
                  early_stopping=True,
                  early_stopping_max_iters=10,
                  choose_better=True,
                  search_library='optuna')

In [None]:
plot_model(xgb)

In [None]:
plot_model(xgb, plot='confusion_matrix')

# Tuned CatboostClassifier

In [None]:
%%time
cat = tune_model(create_model('catboost'),
                 optimize='AUC',
                 early_stopping=True,
                 early_stopping_max_iters=50,
                 choose_better=True)

In [None]:
plot_model(cat)

In [None]:
plot_model(cat, plot='confusion_matrix')

# Ensemble that shi

In [None]:
models = [lgbm, xgb, cat]

In [None]:
%%time
blend = blend_models(estimator_list=models,
                     method='soft',
                     optimize='AUC')

# Submit Predictions

In [None]:
test = pd.read_csv(test_path, index_col=0)
test

In [None]:
test_preds = predict_model(blend, data=test, raw_score=True)
test_preds

In [None]:
test_preds = test_preds.values

In [None]:
output = pd.DataFrame({'id': test.index,
                       'Class_1': test_preds[:, -9],
                       'Class_2': test_preds[:, -8],
                       'Class_3': test_preds[:, -7],
                       'Class_4': test_preds[:, -6],
                       'Class_5': test_preds[:, -5],
                       'Class_6': test_preds[:, -4],
                       'Class_7': test_preds[:, -3],
                       'Class_8': test_preds[:, -2],
                       'Class_9': test_preds[:, -1],
                      })

output.to_csv('submission_tps.csv', index=False)

In [None]:
output.head()