In [None]:
!python --version

In [None]:
!pip install pycaret

In [None]:
import pandas as pd
from pycaret.classification import *

# Dataset

In [None]:
train = pd.read_csv("../input/santander-customer-satisfaction/train.csv")

In [None]:
train.head()

In [None]:
train.info()

In [None]:
train.TARGET.value_counts()

In [None]:
train.isnull().values.sum()

In [None]:
def process_feat(df):
    df = df.drop(["ID"], axis=1)
    return df

In [None]:
train = process_feat(train)

# Classification

In [None]:
setup(
    data=train,
    target='TARGET',
    imputation_type="iterative",
    fold=5,
    fix_imbalance=True,
    use_gpu=True,
    silent=True
)

In [None]:
get_config("X_train")

In [None]:
top5 = compare_models(sort="AUC", n_select=5)

In [None]:
top5

In [None]:
# xgboost = create_model('xgboost', fold=5)

In [None]:
tuned_top5 = []
for model in top5:
    tuned = tune_model(
        model,
        fold=5,
        optimize="AUC"
    )
    tuned_top5.append(tuned)

In [None]:
plot_model(tuned_top5[0])

In [None]:
plot_model(tuned_top5[0], plot="error")

In [None]:
plot_model(tuned_top5[0], plot="confusion_matrix")

In [None]:
plot_model(tuned_top5[0], fold=5, plot="learning")

In [None]:
plot_model(tuned_top5[0], plot="feature")

In [None]:
plot_model(tuned_top5[0], plot="boundary")

In [None]:
plot_model(tuned_top5[0], fold=5, plot="manifold")

In [None]:
blended = blend_models(
    estimator_list=tuned_top5, 
    fold=5,
    optimize="AUC",
    method='soft'
)

In [None]:
stacked = stack_models(
    estimator_list=[blended] + tuned_top5[1:], 
    meta_model=tuned_top5[0],
    fold=5,
    optimize="AUC",
    restack=False
)

In [None]:
calibrated_stacked = calibrate_model(
    stacked,
    fold=5
)

# Soumission

In [None]:
test = pd.read_csv("../input/santander-customer-satisfaction/test.csv")

In [None]:
test = process_feat(test)

In [None]:
predictions = predict_model(blended, data=test, raw_score=True)
predictions.head()

In [None]:
sample_submission = pd.read_csv("../input/santander-customer-satisfaction/sample_submission.csv")

In [None]:
sample_submission['TARGET'] = predictions['Score_1']
sample_submission.to_csv('submission.csv', index=False)

In [None]:
! kaggle competitions submit -c santander-customer-satisfaction -f submission.csv