In [49]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

from catboost import CatBoostClassifier, Pool
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from lightgbm import LGBMClassifier

In [40]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
submission = pd.read_csv('sample_submission.csv')

In [31]:
X = train.iloc[:, 1:-1]
y = train.target
target = test.iloc[:, 1:]

In [32]:
skf = StratifiedKFold(n_splits = 10, random_state = 42, shuffle = True)

In [45]:
cb_acc = []
cb_pred = np.zeros((target.shape[0], 4))
for i, (tr_idx, val_idx) in enumerate(skf.split(X, y)) :
    print(f'{i + 1} Fold Training.....')
    tr_x, tr_y = X.iloc[tr_idx], y.iloc[tr_idx]
    val_x, val_y = X.iloc[val_idx], y.iloc[val_idx]
    
    tr_data = Pool(data = tr_x, label = tr_y)
    val_data = Pool(data = val_x, label = val_y)
    
    cb = CatBoostClassifier(random_state = 42, n_estimators = 1500, learning_rate = 0.03, max_depth = 5, use_best_model = True, loss_function = 'MultiClass')
    cb.fit(tr_data, eval_set = val_data, early_stopping_rounds = 250, verbose = 250)
    val_pred = cb.predict_proba(val_data)
    val_cls = np.argmax(val_pred, axis = 1)
    fold_val_acc = accuracy_score(val_y, val_cls)
    cb_acc.append(fold_val_acc)
    print(f'{i + 1} Fold ACCURACY of Validation = {fold_val_acc}\n')
    
    fold_pred = cb.predict_proba(target) / skf.n_splits
    cb_pred += fold_pred
print(np.mean(cb_acc))

1 Fold Training.....
0:	learn: 1.3756879	test: 1.3768443	best: 1.3768443 (0)	total: 10.3ms	remaining: 15.4s
250:	learn: 0.6716020	test: 0.7741736	best: 0.7741736 (250)	total: 726ms	remaining: 3.61s
500:	learn: 0.4368695	test: 0.5951924	best: 0.5951924 (500)	total: 1.52s	remaining: 3.04s
750:	learn: 0.3168457	test: 0.5275679	best: 0.5275679 (750)	total: 2.24s	remaining: 2.24s
1000:	learn: 0.2439541	test: 0.4909851	best: 0.4906415 (995)	total: 3.02s	remaining: 1.51s
1250:	learn: 0.1927972	test: 0.4738375	best: 0.4738375 (1250)	total: 3.74s	remaining: 744ms
1499:	learn: 0.1570263	test: 0.4605899	best: 0.4603414 (1497)	total: 4.53s	remaining: 0us

bestTest = 0.4603413614
bestIteration = 1497

Shrink model to first 1498 iterations.
1 Fold ACCURACY of Validation = 0.8418803418803419

2 Fold Training.....
0:	learn: 1.3766256	test: 1.3762224	best: 1.3762224 (0)	total: 7.46ms	remaining: 11.2s
250:	learn: 0.6621034	test: 0.7858659	best: 0.7858659 (250)	total: 769ms	remaining: 3.83s
500:	learn: 0

In [46]:
submission['target'] = np.argmax(cb_pred, axis = 1)

In [47]:
submission.to_csv('1st.csv', index = False)

In [48]:
submission.target.value_counts()

2    2524
1    2495
0    2227
3    2097
Name: target, dtype: int64