# Imports

In [None]:
import numpy as np 
import pandas as pd
import datatable as dt
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import roc_auc_score
from catboost import CatBoostClassifier
from sklearn.preprocessing import MinMaxScaler

# Load dataset

In [None]:
%%time
train = dt.fread('../input/tabular-playground-series-nov-2021/train.csv').to_pandas()
test  = dt.fread('../input/tabular-playground-series-nov-2021/test.csv').to_pandas()
sub   = dt.fread('../input/tabular-playground-series-nov-2021/sample_submission.csv').to_pandas()

In [None]:
train.drop('id',axis=1,inplace=True)
test.drop('id',axis=1,inplace=True)

In [None]:
y = train['target']
train.drop('target',axis=1,inplace=True)

# Model hyperparameters

In [None]:
# catboost params
cat_params = {'iterations': 296,
 'od_wait': 3385,
 'learning_rate': 0.15574579120098908,
 'reg_lambda': 0.32139709692279206,
 'subsample': 0.8442605943226449,
 'random_strength': 22.468752639603235,
 'depth': 7,
 'min_data_in_leaf': 31,
 'leaf_estimation_iterations': 15,
 'task_type':"GPU",
 'bootstrap_type':'Poisson'
}

# 5-fold model training and prediction

In [None]:
folds = KFold(n_splits = 5, random_state = 102021, shuffle = True)

predictions = np.zeros(len(test))
cat_oof = np.zeros(train.shape[0])

for fold, (trn_idx, val_idx) in enumerate(folds.split(train)):

    X_train, X_test = train.iloc[trn_idx], train.iloc[val_idx]
    y_train, y_test = y.iloc[trn_idx], y.iloc[val_idx]

    scaler = MinMaxScaler(feature_range=(0, 1)) 
    X_train = scaler.fit_transform(X_train) 
    X_test = scaler.transform(X_test)
    
    model = CatBoostClassifier(**cat_params)
    print(f"model fit started for Fold: {fold}")
    model.fit(X_train, 
              y_train,
              eval_set=[(X_test, y_test)],
              early_stopping_rounds=400,
              verbose=False)
    pred = model.predict_proba(X_test)[:,1]
    cat_oof[val_idx] = pred
    roc = roc_auc_score(y_test, pred)
    print(f" roc_auc_score: {roc}")
    print("-"*50)
    
    test = scaler.transform(test)
    predictions += model.predict_proba(test)[:,1] / folds.n_splits 

# Store submission and oof 

In [None]:
sub['target'] = predictions
sub.to_csv(f'submission_catboost_1.csv',index = False)
np.savez_compressed('oof_catboost.npz', cat_oof)