In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Classifiers
from catboost import CatBoostClassifier
from catboost import Pool, cv

from sklearn.preprocessing import LabelEncoder, StandardScaler
# Model selection
from sklearn.model_selection import StratifiedKFold

In [None]:
train = pd.read_csv('/kaggle/input/tabular-playground-series-mar-2021/train.csv')
test = pd.read_csv('/kaggle/input/tabular-playground-series-mar-2021/test.csv')
sub = pd.read_csv('/kaggle/input/tabular-playground-series-mar-2021/sample_submission.csv')

In [None]:
categorical_cols = ['cat'+str(i) for i in range(19)]
continous_cols = ['cont'+str(i) for i in range(11)]

In [None]:
for c in categorical_cols:
    print(c)
    lbl = LabelEncoder() 
    lbl.fit(list(train[c].values)+list(test[c].values)) 
    train[c] = lbl.transform(list(train[c].values))
    test[c] = lbl.transform(list(test[c].values))

In [None]:
scaler = StandardScaler()
train[continous_cols] = scaler.fit_transform(train[continous_cols])
test[continous_cols] = scaler.transform(test[continous_cols])

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
features = categorical_cols+continous_cols
target_col = 'target'

In [None]:
cv_dataset = Pool(data=train[features],
                  label=train[target_col],
                  cat_features=categorical_cols)

params = {"task_type":"GPU",
          "loss_function": "Logloss",
          "custom_metric":"AUC",
          "iterations": 5000,
          "auto_class_weights": "Balanced",
          "verbose": False}

scores = cv(cv_dataset,
            params,
            fold_count=5,
            early_stopping_rounds=50,
            stratified=True, 
            plot="True")

In [None]:
scores['test-AUC-mean'].mean()

In [None]:
from catboost import CatBoostClassifier
train_data = train[features]
train_labels = train[target_col]
model = CatBoostClassifier(iterations=2000, 
                           task_type="GPU",
                           devices='0:1')
model.fit(train_data,
          train_labels,
          cat_features=categorical_cols,
          verbose=False)

In [None]:
sub['target'] = model.predict_proba(test[features])[:,1]
sub.to_csv('submission.csv', index=False)