### 1.  [ Load data](#section-one)
### 2. [Scale features ](#section-two)
### 3.  [Randomized search of parametrs for catboost classifier](#section-twoB)
### 4. [Train CatBoostClassifier ](#section-three)

In [None]:
import pandas as pd
from sklearn.preprocessing import RobustScaler
import numpy as np
from sklearn.model_selection import StratifiedKFold
from catboost import CatBoostClassifier
from sklearn.metrics import roc_auc_score
from scipy.stats import uniform

### Load data
<a id="section-one"></a>

In [None]:
data_train =  pd.read_csv('../input/tabular-playground-series-oct-2021/train.csv')
data_test = pd.read_csv('../input/tabular-playground-series-oct-2021/test.csv')

In [None]:
float_columns = ['f'+str(i) for i in range(242)]
float_columns.remove('f22')
float_columns.remove('f43')
int_columns = ['f'+str(i) for i in range(242,285)]+['f22','f43']
cols = float_columns + int_columns

### Scale features 
<a id="section-two"></a>

In [None]:
scaler = RobustScaler()
data_train[float_columns] = scaler.fit_transform(data_train[float_columns])
data_test[float_columns] = scaler.transform(data_test[float_columns]) 

### Perform randomized search of parametrs for catboost classifier
<a id="section-twoB"></a>

In [None]:
#model = CatBoostClassifier(loss_function='CrossEntropy',
#                           eval_metric = 'AUC',
#                           random_state=0)
#distributions = dict(max_depth = np.random.randint(4, 10, 4), l2_leaf_reg = uniform(loc=0, scale=4),
#                    iterations = np.random.randint(100, 500, 5), 
#                     min_data_in_leaf = np.random.randint(50, 250, 5),
#                     learning_rate = uniform(loc=0, scale=1)
#                    )
#randomized_search_result = model.randomized_search(param_distributions = distributions,
#                                                   X=data_train.iloc[:,1:286],
#                                                   y=data_train.iloc[:, 286],
#                                                   cv = 5, refit = True,  n_iter = 10, 
#                                                   partition_random_seed=2)

In [None]:
#params = randomized_search_result['params']
#after randomized_search I`ve got the next parameters:
params = {'min_data_in_leaf': 116,
 'depth': 4,
 'iterations': 284,
 'learning_rate': 0.5307391048885213,
 'l2_leaf_reg': 3.766159322596347}

params.update({'loss_function':'CrossEntropy',
                'eval_metric' : 'AUC'})

### Train CatBoostClassifier with 5 stratified folds
<a id="section-three"></a>

In [None]:
# using above mentioned parameters we can train a model
model = CatBoostClassifier(**params)
skf = StratifiedKFold(n_splits=5)
prediction = np.zeros(data_test.shape[0])
for train_index, test_index in skf.split(data_train.iloc[:, 1:286], data_train.iloc[:, 286]):
    x = data_train.iloc[train_index,1:286]
    y = data_train.iloc[train_index, 286]
    x_val = data_train.iloc[test_index,1:286]
    y_val = data_train.iloc[test_index, 286]
    
    #fit model and make final prediction
    model.fit(x, y, eval_set = (x_val, y_val.values), use_best_model=True)
    prediction += model.predict_proba(data_test.iloc[:,1:])[:,1]/skf.get_n_splits()
    
    #define roc_auc for each test fold
    roc_auc = roc_auc_score(y_val.values, model.predict_proba(x_val)[:,1])
    print(f'AUC score = {roc_auc}')

In [None]:
submis = pd.DataFrame({'id': data_test.id, 'target': prediction})
submis.to_csv('submission.csv',header = True, index = False)