In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import random
import time
import os

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import seaborn as sns

from tqdm import tqdm

from catboost import CatBoostRegressor, CatBoostClassifier
from xgboost import XGBRegressor, XGBClassifier
from lightgbm import LGBMClassifier, LGBMRegressor

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import RobustScaler, StandardScaler, MinMaxScaler, QuantileTransformer
from sklearn.metrics import mean_squared_error, roc_auc_score
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.ensemble import StackingRegressor, StackingClassifier

import optuna
from optuna.samplers import TPESampler

In [None]:
SEED = 4000
TARGET = "target"
N_SPLITS = 10
N_ESTIMATORS=12000
LOSS = 'CrossEntropy'
EVAL_METRIC = "AUC"

def seed_everything(seed=4000):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(SEED)

In [None]:
df_train = pd.read_csv(r"../input/tabular-playground-series-oct-2021/train.csv", index_col=0)
df_test = pd.read_csv(r"../input/tabular-playground-series-oct-2021/test.csv", index_col=0)
X = df_train.iloc[:, :-1]
y = df_train.iloc[:, -1]
X_test = df_test
# Basic preprocessing
X["n_na"] = X.isna().sum(axis=1)
X_test["n_na"] = X_test.isna().sum(axis=1)

In [None]:
def run_kfold(model, test_data=None):
    kf = KFold(n_splits=N_SPLITS, random_state=SEED, shuffle=True)
    
    scores = []
    y_pred = dict()
    n_trees = []
    
    for i_fold,(train_idx, test_idx) in enumerate(kf.split(X)):
        print(25*"=" + f" Fold {i_fold} " + 25*"=")
        X_train = X.iloc[train_idx,:]
        y_train = y[train_idx]
        
        X_val = X.iloc[test_idx,:]
        y_val = y[test_idx]
        model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=1000, early_stopping_rounds = int(10/model.get_param("learning_rate")))

        y_oof = model.predict_proba(X_val)
        
        fold_score = roc_auc_score(y_val, y_oof[:,1])
        scores.append(fold_score)
        n_trees.append(model.get_best_iteration())
        print(f"*** Fold {i_fold} score :", fold_score, " ***")
        
        if test_data is not None :
            y_pred[i_fold] = model.predict_proba(test_data)[:,1]
        
    scores = np.array(scores)
    n_trees = int(np.mean(n_trees))
    
    print('N trees : ', n_trees)
    print('CV auc scores: ',scores.mean(), " +/- ",  scores.std())
    return scores, y_pred

In [None]:
cat_param = {'learning_rate': 0.0025,
 'iterations': 10000,
 'depth': 5,
 'l2_leaf_reg': 12.09463399692516,
 'random_strength': 3.5400249636744014}

model = CatBoostClassifier(
    grow_policy='Depthwise',
    leaf_estimation_method='Newton', 
    bootstrap_type='Bernoulli',
    loss_function= LOSS,
    eval_metric= EVAL_METRIC,
    task_type='GPU',
    silent=True,
    random_seed = SEED,
    **cat_param
)

scores, y_pred = run_kfold(model, X_test)
## save submission
pd.DataFrame(np.vstack(list(y_pred.values())).mean(axis=0), index=X_test.index, columns= [TARGET]).to_csv("cat_submission.csv")

**working with this kinda model latelyyy.!**
**please feel free to share your valuable feedback below.!!
