In [None]:
from catboost import CatBoostClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

import pandas as pd
import numpy as np

train_df = pd.read_csv('../input/tabular-playground-series-mar-2021/train.csv')
test_df = pd.read_csv('../input/tabular-playground-series-mar-2021/test.csv')

In [None]:
y = train_df['target']
train_df = train_df.drop(columns=['target'])

In [None]:
categorical_columns = train_df.filter(regex=r'^cat').columns
concat_df = pd.concat([train_df, test_df], axis=0)
train_len = len(train_df)

for i in categorical_columns:
    
    le = LabelEncoder()
    concat_df[i] = le.fit_transform(concat_df[i])
    
train_df = concat_df[:train_len]
test_df = concat_df[train_len:]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train_df, y, test_size=0.20, random_state=2021)

In [None]:
cat_features_index = [i for i,col in enumerate(X_train.columns) if col in categorical_columns]

cb = CatBoostClassifier(n_estimators= 1000, eval_metric= 'AUC', early_stopping_rounds=30, cat_features=cat_features_index)
cb.fit(X_train, y_train, eval_set=(X_test,y_test), verbose=False)

In [None]:
train_predictions = cb.predict_proba(X_train)[:, 1]
nn_test_predictions = cb.predict_proba(X_test)[:, 1]

print(f'TRAIN ROC: {roc_auc_score(y_train, train_predictions)}')
print(f'TEST ROC: {roc_auc_score(y_test, nn_test_predictions)}')

In [None]:
cb = CatBoostClassifier(n_estimators= 1000, eval_metric= 'AUC', early_stopping_rounds=30, cat_features=cat_features_index)
cb.fit(train_df, y, verbose=True)

In [None]:
predictions = cb.predict_proba(test_df)[:, 1]

In [None]:
sub = pd.DataFrame({'id': test_df['id'].values, 'target': predictions})
sub.to_csv('sub.csv', index=False)