In [1]:
import gc
import joblib

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from catboost import CatBoostClassifier, Pool
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

from sklearn.model_selection import GroupKFold, GroupShuffleSplit, StratifiedGroupKFold
from sklearn.metrics import precision_recall_curve, auc

from collections import defaultdict
from tqdm import tqdm

In [None]:
MAIN_PATH = 'avito-train-combined/data/avito-merged-dataset/'
ECOM_PRETRAIN = 'avito-train-combined/data/clip-marqofashionsiglip-marqoecom-top2kaggle/'
RESNET_PATH = 'avito-train-combined/data/resnet-cossim/'
KAGGLE_TOP5 = 'avito-train-combined/data/top5-kaggle/'
BERTA_PATH = 'avito-train-combined/data/berta-pretrained-cossims/'
RUBERT_TINY_OOF_PATH = 'avito-train-combined/data/rubert-folds/'
RUBERT_TINY_PREDS_PATH = 'avito-train-combined/data/rubert-test-preds/'
E5LARGE_OOF_PATH = 'avito-train-combined/data/avito-e5-large-pretrain/'
E5LARGE_PREDS_PATH = 'avito-train-combined/data/avito-e5-large-test/'
REV_RUBERT_TINY_OOF_PATH = 'avito-train-combined/data/name_desc_bert_oof_rev/'
REV_RUBERT_TINY_PREDS_PATH = 'avito-train-combined/data/name-desc-bert-preds-rev/'
USERBGE_COSSIMS_PATH = 'avito-train-combined/data/userbge-cossims/'
RUBERT_BASE_TEST_PREDS_PATH = 'avito-train-combined/data/rubert-fixed-test-preds/'
RUBERT_BASE_TEST_PREDS_REV_PATH = 'avito-train-combined/data/rubert-fixed-test-preds-rev/'
FT_PREDS_PATH = 'avito-train-combined/data/ft-preds2/'
RUBERT_BASE_OOF_PATH = 'avito-train-combined/data/trained-rubert-base-preds/'
RUBERT_BASE_OOF_REV_PATH = 'avito-train-combined/data/trained-rubert-base-preds-rev/'

USE_MEAN_BASE_AND_REV = False

In [3]:
train = pd.read_parquet(MAIN_PATH + 'train_df.parquet')
test = pd.read_parquet(MAIN_PATH + 'test_df.parquet')

In [4]:
to_drop = [
    'category_level_1_1', 'category_level_1_2',
    'category_level_2_1', 'category_level_2_2',
    'category_level_3_1', 'category_level_3_2',
    'category_level_4_1', 'category_level_4_2',
]

train[to_drop] = train[to_drop].fillna('none')

train['unique_cat_1'] = train['category_level_1_1'] + '_' + train['category_level_1_2']
train['unique_cat_2'] = train['category_level_2_1'] + '_' + train['category_level_2_2']
train['unique_cat_3'] = train['category_level_3_1'] + '_' + train['category_level_3_2']
train['unique_cat_4'] = train['category_level_4_1'] + '_' + train['category_level_4_2']

test[to_drop] = test[to_drop].fillna('none')

test['unique_cat_1'] = test['category_level_1_1'] + '_' + test['category_level_1_2']
test['unique_cat_2'] = test['category_level_2_1'] + '_' + test['category_level_2_2']
test['unique_cat_3'] = test['category_level_3_1'] + '_' + test['category_level_3_2']
test['unique_cat_4'] = test['category_level_4_1'] + '_' + test['category_level_4_2']

train.drop(columns=to_drop, axis=1, inplace=True)
gc.collect()

test.drop(columns=to_drop, axis=1, inplace=True)
gc.collect()

0

In [5]:
def reduce_mem_usage(df):
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                    gc.collect()
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                    gc.collect()
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                    gc.collect()
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
                    gc.collect()
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                    gc.collect()
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                    gc.collect()
                else:
                    df[col] = df[col].astype(np.float64)
                    gc.collect()
        else:
            if df[col].nunique() == 2:
                df[col] = df[col].astype('bool')
            gc.collect()

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))

    gc.collect()
    
    return df

In [6]:
train = reduce_mem_usage(train)

Memory usage of dataframe is 6521.05 MB
Memory usage after optimization is: 1842.67 MB
Decreased by 71.7%


In [7]:
test = reduce_mem_usage(test)

Memory usage of dataframe is 1728.06 MB
Memory usage after optimization is: 490.67 MB
Decreased by 71.6%


In [8]:
train = train.sort_values(by=['variantid_1', 'variantid_2'])
test = test.sort_values(by=['variantid_1', 'variantid_2'])

In [None]:
# --- IMG FEATURES ---

# pretrain clip
train_clip = pd.read_parquet(ECOM_PRETRAIN + 'cossim_final_embeddings_train_CLIP.parquet')
test_clip = pd.read_parquet(ECOM_PRETRAIN + 'cossim_final_embeddings_test_CLIP.parquet')

train_clip = train_clip.sort_values(by=['variantid_1', 'variantid_2'])
test_clip = test_clip.sort_values(by=['variantid_1', 'variantid_2'])
train['clip_cosine_sim'] = train_clip['cosine_sim']
test['clip_cosine_sim'] = test_clip['cosine_sim']
del train_clip, test_clip

# pretrain fashion siglip
train_fashionsiglip = pd.read_parquet(ECOM_PRETRAIN + 'cossim_final_embeddings_fashion_clip_train.parquet')
test_fashionsiglip = pd.read_parquet(ECOM_PRETRAIN + 'cossim_final_embeddings_fashion_clip_test.parquet')

train_fashionsiglip = train_fashionsiglip.sort_values(by=['variantid_1', 'variantid_2'])
test_fashionsiglip = test_fashionsiglip.sort_values(by=['variantid_1', 'variantid_2'])
train['fashionsiglip_cosine_sim'] = train_fashionsiglip['cosine_sim']
test['fashionsiglip_cosine_sim'] = test_fashionsiglip['cosine_sim']
del train_fashionsiglip, test_fashionsiglip

# pretrain marqo ecom
train_ecom = pd.read_parquet(ECOM_PRETRAIN + 'cossim_final_embeddings_ecomm_train.parquet')
test_ecom = pd.read_parquet(ECOM_PRETRAIN + 'cossim_final_embeddings_ecomm_test.parquet')

train_ecom = train_ecom.sort_values(by=['variantid_1', 'variantid_2'])
test_ecom = test_ecom.sort_values(by=['variantid_1', 'variantid_2'])
train['ecom_cosine_sim'] = train_ecom['cosine_sim']
test['ecom_cosine_sim'] = test_ecom['cosine_sim']
del train_ecom, test_ecom

# kaggle top2 model, bugged
# train_top2kaggle = pd.read_parquet(ECOM_PRETRAIN + 'cossim_final_top2_kaggle_train.parquet')
# test_top2kaggle = pd.read_parquet(ECOM_PRETRAIN + 'cossim_final_top2_kaggle_test.parquet')

# train_top2kaggle = train_top2kaggle.sort_values(by=['variantid_1', 'variantid_2'])
# test_top2kaggle = test_top2kaggle.sort_values(by=['variantid_1', 'variantid_2'])
# train['top2kaggle_cosine_sim'] = train_top2kaggle['cosine_sim']
# test['top2kaggle_cosine_sim'] = test_top2kaggle['cosine_sim']
# del train_top2kaggle, test_top2kaggle

# kaggle top5 model
train_top5kaggle = pd.read_parquet(KAGGLE_TOP5 + 'cossim_final_concat_train.parquet')
test_top5kaggle = pd.read_parquet(KAGGLE_TOP5 + 'cossim_final_concat_test.parquet')

train_top5kaggle = train_top5kaggle.sort_values(by=['variantid_1', 'variantid_2'])
test_top5kaggle = test_top5kaggle.sort_values(by=['variantid_1', 'variantid_2'])
train['top5kaggle_cosine_sim'] = train_top5kaggle['cosine_sim']
test['top5kaggle_cosine_sim'] = test_top5kaggle['cosine_sim']
del train_top5kaggle, test_top5kaggle

# trained resnet
train_resnet = pd.read_parquet(RESNET_PATH + 'train_resnet_cossim.parquet')
test_resnet = pd.read_parquet(RESNET_PATH + 'test_resnet_cossim.parquet')

train_resnet = train_resnet.sort_values(by=['variantid_1', 'variantid_2']).reset_index(drop=True)
test_resnet = test_resnet.sort_values(by=['variantid_1', 'variantid_2']).reset_index(drop=True)
train['resnet_cosine_sim'] = train_resnet['cossims_resnet']
test['resnet_cosine_sim'] = test_resnet['cossims_resnet']
del train_resnet, test_resnet

gc.collect()

# --- TEXT FEATURES ---

# pretrain berta
berta_cossims_train_part1 = pd.read_parquet(BERTA_PATH + 'berta_cossims_train_part1.parquet')
berta_cossims_train_part2 = pd.read_parquet(BERTA_PATH + 'berta_cossims_train_part2.parquet')
berta_cossims_train = pd.concat([berta_cossims_train_part1, berta_cossims_train_part2])
berta_cossims_test = pd.read_parquet(BERTA_PATH + 'berta_cossims_test.parquet')

berta_cossims_train = berta_cossims_train.sort_values(by=['variantid_1', 'variantid_2'])
berta_cossims_test = berta_cossims_test.sort_values(by=['variantid_1', 'variantid_2'])
train['berta_cossim'] = berta_cossims_train['berta_cossim']
test['berta_cossim'] = berta_cossims_test['berta_cossim']

del berta_cossims_train_part1, berta_cossims_train_part2, berta_cossims_train, berta_cossims_test
gc.collect()

# trained rubert (cherez zhopu rukamu obuchen)
rubert_oof_fold0 = pd.read_parquet(RUBERT_TINY_OOF_PATH + 'name_desc_bert_fold0.parquet')
rubert_oof_fold1 = pd.read_parquet(RUBERT_TINY_OOF_PATH + 'name_desc_bert_fold1.parquet')
rubert_oof_fold2 = pd.read_parquet(RUBERT_TINY_OOF_PATH + 'name_desc_bert_fold2.parquet')
rubert_oof_fold3 = pd.read_parquet(RUBERT_TINY_OOF_PATH + 'name_desc_bert_fold3.parquet')
rubert_oof_fold4 = pd.read_parquet(RUBERT_TINY_OOF_PATH + 'name_desc_bert_fold4.parquet')

rubert_test_pred_fold0 = pd.read_parquet(RUBERT_TINY_PREDS_PATH + 'name_desc_rubert_tiny_turbo_2048_wce_0.parquet')
rubert_test_pred_fold1 = pd.read_parquet(RUBERT_TINY_PREDS_PATH + 'name_desc_rubert_tiny_turbo_2048_wce_1.parquet')
rubert_test_pred_fold2 = pd.read_parquet(RUBERT_TINY_PREDS_PATH + 'name_desc_rubert_tiny_turbo_2048_wce_2.parquet')
rubert_test_pred_fold3 = pd.read_parquet(RUBERT_TINY_PREDS_PATH + 'name_desc_rubert_tiny_turbo_2048_wce_3.parquet')
rubert_test_pred_fold4 = pd.read_parquet(RUBERT_TINY_PREDS_PATH + 'name_desc_rubert_tiny_turbo_2048_wce_4.parquet')

rubert_oof_fold0.rename(columns={'name_desc_bert_oof1': 'name_desc_rubert_tiny_turbo_2048_wce'}, inplace=True)
rubert_oof_fold1.rename(columns={'name_desc_bert_oof4': 'name_desc_rubert_tiny_turbo_2048_wce'}, inplace=True)
rubert_oof_fold2.rename(columns={'name_desc_bert_oof4': 'name_desc_rubert_tiny_turbo_2048_wce'}, inplace=True)
rubert_oof_fold3.rename(columns={'name_desc_bert_oof4': 'name_desc_rubert_tiny_turbo_2048_wce'}, inplace=True)
rubert_oof_fold4.rename(columns={'name_desc_bert_oof4': 'name_desc_rubert_tiny_turbo_2048_wce'}, inplace=True)

rubert_oof_fold0 = rubert_oof_fold0.sort_values(by=['variantid_1', 'variantid_2'])
rubert_oof_fold1 = rubert_oof_fold1.sort_values(by=['variantid_1', 'variantid_2'])
rubert_oof_fold2 = rubert_oof_fold2.sort_values(by=['variantid_1', 'variantid_2'])
rubert_oof_fold3 = rubert_oof_fold3.sort_values(by=['variantid_1', 'variantid_2'])
rubert_oof_fold4 = rubert_oof_fold4.sort_values(by=['variantid_1', 'variantid_2'])

rubert_oof = rubert_oof_fold0['name_desc_rubert_tiny_turbo_2048_wce'] + rubert_oof_fold1['name_desc_rubert_tiny_turbo_2048_wce'] + rubert_oof_fold2['name_desc_rubert_tiny_turbo_2048_wce'] + rubert_oof_fold3['name_desc_rubert_tiny_turbo_2048_wce'] + rubert_oof_fold4['name_desc_rubert_tiny_turbo_2048_wce']

rubert_test_pred_fold0 = rubert_test_pred_fold0.sort_values(by=['variantid_1', 'variantid_2'])
rubert_test_pred_fold1 = rubert_test_pred_fold1.sort_values(by=['variantid_1', 'variantid_2'])
rubert_test_pred_fold2 = rubert_test_pred_fold2.sort_values(by=['variantid_1', 'variantid_2'])
rubert_test_pred_fold3 = rubert_test_pred_fold3.sort_values(by=['variantid_1', 'variantid_2'])
rubert_test_pred_fold4 = rubert_test_pred_fold4.sort_values(by=['variantid_1', 'variantid_2'])

rubert_preds = (rubert_test_pred_fold0['name_desc_rubert_tiny_turbo_2048_wce_0'] + rubert_test_pred_fold1['name_desc_rubert_tiny_turbo_2048_wce_1'] + rubert_test_pred_fold2['name_desc_rubert_tiny_turbo_2048_wce_2'] + rubert_test_pred_fold3['name_desc_rubert_tiny_turbo_2048_wce_3'] + rubert_test_pred_fold4['name_desc_rubert_tiny_turbo_2048_wce_4']) / 5

train['name_desc_rubert_tiny_turbo_2048_wce'] = rubert_oof
test['name_desc_rubert_tiny_turbo_2048_wce'] = rubert_preds

del rubert_oof_fold0, rubert_oof_fold1, rubert_oof_fold2, rubert_oof_fold3, rubert_oof_fold4
del rubert_oof
del rubert_test_pred_fold0, rubert_test_pred_fold1, rubert_test_pred_fold2, rubert_test_pred_fold3, rubert_test_pred_fold4
del rubert_preds
gc.collect()

# pretrain e5large
e5large_cossims_train_part1 = pd.read_parquet(E5LARGE_OOF_PATH + 'e5large_cossims_fold0.parquet')
e5large_cossims_train_part2 = pd.read_parquet(E5LARGE_OOF_PATH + 'e5large_cossims_fold1.parquet')
e5large_cossims_train_part3 = pd.read_parquet(E5LARGE_OOF_PATH + 'e5large_cossims_fold2.parquet')
e5large_cossims_train_part4 = pd.read_parquet(E5LARGE_OOF_PATH + 'e5large_cossims_fold3.parquet')
e5large_cossims_train_part5 = pd.read_parquet(E5LARGE_OOF_PATH + 'e5large_cossims_fold4.parquet')

e5large_cossims_train = pd.concat([
    e5large_cossims_train_part1,
    e5large_cossims_train_part2,
    e5large_cossims_train_part3,
    e5large_cossims_train_part4,
    e5large_cossims_train_part5
])
e5large_cossims_train = e5large_cossims_train.sort_values(by=['variantid_1', 'variantid_2'])

del e5large_cossims_train_part1, e5large_cossims_train_part2, e5large_cossims_train_part3, e5large_cossims_train_part4, e5large_cossims_train_part5
gc.collect()

e5large_cossims_test_part1 = pd.read_parquet(E5LARGE_PREDS_PATH + 'e5large_cossims_part1.parquet')
e5large_cossims_test_part2 = pd.read_parquet(E5LARGE_PREDS_PATH + 'e5large_cossims_part2.parquet')

e5large_cossims_test = pd.concat([
    e5large_cossims_test_part1,
    e5large_cossims_test_part2,
])
e5large_cossims_test = e5large_cossims_test.sort_values(by=['variantid_1', 'variantid_2'])

del e5large_cossims_test_part1, e5large_cossims_test_part2
gc.collect()

train['e5large_cossim'] = e5large_cossims_train['e5large_cossim']
test['e5large_cossim'] = e5large_cossims_test['e5large_cossim']

del e5large_cossims_train, e5large_cossims_test
gc.collect()

# rev trained rubert (cherez zhopu rukamu obuchen)
rubert_oof_rev = pd.read_parquet(REV_RUBERT_TINY_OOF_PATH + 'name_desc_bert_oof_rev.parquet')
rubert_preds_rev = pd.read_parquet(REV_RUBERT_TINY_PREDS_PATH + 'name_desc_bert_preds_rev.parquet')

rubert_oof_rev = rubert_oof_rev.sort_values(by=['variantid_1', 'variantid_2'])
rubert_preds_rev = rubert_preds_rev.sort_values(by=['variantid_1', 'variantid_2'])
train['name_desc_rubert_tiny_turbo_2048_wce_rev'] = rubert_oof_rev['name_desc_bert_oof_rev']
test['name_desc_rubert_tiny_turbo_2048_wce_rev'] = rubert_preds_rev['name_desc_bert_preds_rev']
del rubert_oof_rev, rubert_preds_rev

gc.collect()

# rubert tta
if USE_MEAN_BASE_AND_REV:
    train['name_desc_rubert_tiny_turbo_2048_wce_tta'] = (train['name_desc_rubert_tiny_turbo_2048_wce'] + train['name_desc_rubert_tiny_turbo_2048_wce_rev']) / 2
    test['name_desc_rubert_tiny_turbo_2048_wce_tta'] = (test['name_desc_rubert_tiny_turbo_2048_wce'] + test['name_desc_rubert_tiny_turbo_2048_wce_rev']) / 2
    del train['name_desc_rubert_tiny_turbo_2048_wce'], train['name_desc_rubert_tiny_turbo_2048_wce_rev']
    del test['name_desc_rubert_tiny_turbo_2048_wce'], test['name_desc_rubert_tiny_turbo_2048_wce_rev']
    gc.collect()

# pretrain userbge
userbge_cossims_train = pd.DataFrame()
userbge_cossims_test = pd.DataFrame()

for i in range(1, 16):
    curr_df = pd.read_parquet(USERBGE_COSSIMS_PATH + f'userbge_cossims_train_part{i}.parquet')
    userbge_cossims_train = pd.concat([userbge_cossims_train, curr_df])
    print(f'{userbge_cossims_train.shape=}')
userbge_cossims_train = userbge_cossims_train.sort_values(by=['variantid_1', 'variantid_2'])

for i in range(1, 5):
    curr_df = pd.read_parquet(USERBGE_COSSIMS_PATH + f'/userbge_cossims_test_part{i}.parquet')
    userbge_cossims_test = pd.concat([userbge_cossims_test, curr_df])
    print(f'{userbge_cossims_test.shape=}')
userbge_cossims_test = userbge_cossims_test.sort_values(by=['variantid_1', 'variantid_2'])

del curr_df
gc.collect()

train['userbge_cossim'] = userbge_cossims_train['userbge_cossim']
test['userbge_cossim'] = userbge_cossims_test['userbge_cossim']

del userbge_cossims_train, userbge_cossims_test
gc.collect()

# --- тут именно до сэмпла! ---

# trained rubert-base test preds
rubert_base_pred = pd.read_csv(
    RUBERT_BASE_TEST_PREDS_PATH + 'rubert_ZAEBAL_SUKA.csv'
).rename(columns={'base_id': 'variantid_1', 'cand_id': 'variantid_2'})
rubert_base_pred = rubert_base_pred.sort_values(by=['variantid_1', 'variantid_2']).reset_index(drop=True)
test['rubert_base_trained'] = rubert_base_pred['probability']

rubert_base_pred_rev = pd.read_csv(
    RUBERT_BASE_TEST_PREDS_REV_PATH + 'rubert_ZAEBAL_SUKA_REV.csv'
).rename(columns={'base_id': 'variantid_1', 'cand_id': 'variantid_2'})
rubert_base_pred_rev = rubert_base_pred_rev.sort_values(by=['variantid_1', 'variantid_2']).reset_index(drop=True)
test['rubert_base_trained_rev'] = rubert_base_pred_rev['probability']

del rubert_base_pred, rubert_base_pred_rev
gc.collect()

# rubert base test tta
if USE_MEAN_BASE_AND_REV:
    test['rubert_base_trained_tta'] = (test['rubert_base_trained'] + test['rubert_base_trained_rev']) / 2
    del test['rubert_base_trained'],  test['rubert_base_trained_rev']
    gc.collect()

# --- sample!!! ---
train = train.sample(len(train), random_state=42)

# trained ft (именно после сэмпла! тут уже все карты в нужном порядке разложены)
fasttext_train = joblib.load(FT_PREDS_PATH + 'oof_preds.pkl')
fasttext_test = joblib.load(FT_PREDS_PATH + 'test_preds.pkl')
fasttext_train_rev = joblib.load(FT_PREDS_PATH + 'oof_preds_rev.pkl')
fasttext_test_rev = joblib.load(FT_PREDS_PATH + 'test_preds_rev.pkl')

train['fasttext'] = fasttext_train
test['fasttext'] = fasttext_test
train['fasttext_rev'] = fasttext_train_rev
test['fasttext_rev'] = fasttext_test_rev

del fasttext_train, fasttext_test, fasttext_train_rev, fasttext_test_rev
gc.collect()

# fasttext tta
if USE_MEAN_BASE_AND_REV:
    train['fasttext_tta'] = (train['fasttext'] + train['fasttext_rev']) / 2
    test['fasttext_tta'] = (test['fasttext'] + test['fasttext_rev']) / 2
    del train['fasttext'], train['fasttext_rev']
    del test['fasttext'], test['fasttext_rev']
    gc.collect()

# trained rubert-base train (именно после сэмпла! тут уже все карты в нужном порядке разложены)
rubert_base_oof = joblib.load(RUBERT_BASE_OOF_PATH + 'oof_preds_rubert_base.joblib')
train['rubert_base_trained'] = rubert_base_oof

rubert_base_oof_rev = joblib.load(RUBERT_BASE_OOF_REV_PATH + 'oof_preds_rubert_base_rev.joblib')
train['rubert_base_trained_rev'] = rubert_base_oof_rev

del rubert_base_oof, rubert_base_oof_rev
gc.collect()

# rubert train test tta
if USE_MEAN_BASE_AND_REV:
    train['rubert_base_trained_tta'] = (train['rubert_base_trained'] + train['rubert_base_trained_rev']) / 2
    del train['rubert_base_trained'],  train['rubert_base_trained_rev']
    gc.collect()

# add rouge features (w/ tta)

train_rouge = pd.read_csv('rouge-avito/train_rouge.csv')
test_rouge = pd.read_csv('/rouge-avito/test_rouge.csv')

train_rouge = train_rouge.sort_values(by=['variantid_1', 'variantid_2']).reset_index(drop=True)
test_rouge = test_rouge.sort_values(by=['variantid_1', 'variantid_2']).reset_index(drop=True)

train['rouge_1'] = train_rouge['rouge_1']
train['rouge_2'] = train_rouge['rouge_2']
train['rouge_3'] = train_rouge['rouge_3']
train['rouge_4'] = train_rouge['rouge_4']
train['rouge_s4'] = train_rouge['rouge_s4']
train['rouge_su4'] = train_rouge['rouge_su4']

test['rouge_1'] = test_rouge['rouge_1']
test['rouge_2'] = test_rouge['rouge_2']
test['rouge_3'] = test_rouge['rouge_3']
test['rouge_4'] = test_rouge['rouge_4']
test['rouge_s4'] = test_rouge['rouge_s4']
test['rouge_su4'] = test_rouge['rouge_su4']

userbge_cossims_train.shape=(125303, 3)
userbge_cossims_train.shape=(250606, 3)
userbge_cossims_train.shape=(375909, 3)
userbge_cossims_train.shape=(501212, 3)
userbge_cossims_train.shape=(626515, 3)
userbge_cossims_train.shape=(751818, 3)
userbge_cossims_train.shape=(877121, 3)
userbge_cossims_train.shape=(1002424, 3)
userbge_cossims_train.shape=(1127727, 3)
userbge_cossims_train.shape=(1253030, 3)
userbge_cossims_train.shape=(1378333, 3)
userbge_cossims_train.shape=(1503636, 3)
userbge_cossims_train.shape=(1628939, 3)
userbge_cossims_train.shape=(1754242, 3)
userbge_cossims_train.shape=(1879555, 3)
userbge_cossims_test.shape=(125000, 3)
userbge_cossims_test.shape=(250000, 3)
userbge_cossims_test.shape=(375000, 3)
userbge_cossims_test.shape=(500000, 3)


In [10]:
# pl_test = test.copy()
# pl_test = pl_test.sort_values(by=['variantid_1', 'variantid_2'])

# top_submit = pd.read_csv('/kaggle/input/top-sub-19-05/zhiv_el_trans_coef_1_25_models_0_5baselama_0_5(0_3catlamas_0_7catcbs)(1).csv')
# top_submit = top_submit.sort_values(by=['base_id', 'cand_id'])

# pl_test = pl_test.reset_index(drop=True)
# top_submit = top_submit.reset_index(drop=True)

# pl_test['is_double'] = top_submit['probability']

# pl_test = pl_test[pl_test['unique_cat_1'].isin(['Услуги_Услуги', 'Недвижимость_Недвижимость', 'Работа_Работа'])]

# pl_test = pl_test[~pl_test['is_double'].between(0.02, 0.98)]
# pl_test['is_double'] = np.where(
#     pl_test['is_double'] < 0.02, 0, 
#     np.where(pl_test['is_double'] > 0.98, 1, pl_test['is_double'])
# )
# pl_test['group_id'] = 777777

# train = pd.concat([train, pl_test], axis=0).reset_index(drop=True)

In [11]:
train.drop(columns=['variantid_1', 'variantid_2', 'base_title_image', 'cand_title_image'], axis=1, inplace=True)
test.drop(columns=['variantid_1', 'variantid_2', 'base_title_image', 'cand_title_image'], axis=1, inplace=True)
train.drop(columns=['action_date'], inplace=True)

In [12]:
cat_features = [
    'is_same_location',
    'is_same_region',
    'category_level_1_match',
    'category_level_2_match',
    'category_level_3_match',
    'category_level_4_match',
    'category_level_3_fillness',
    'category_level_4_fillness',
    'n_images_fillness',
    'unique_cat_1',
    'unique_cat_2',
    'unique_cat_3',
    'unique_cat_4',
]

train[cat_features] = train[cat_features].astype(str)
test[cat_features] = test[cat_features].astype(str)

In [13]:
# cat_mapping = {
#     '1.0': 'True',
#     '0.0': 'False'
# }

In [14]:
# test['category_level_1_match'] = test['category_level_1_match'].map(cat_mapping)
# test['category_level_2_match'] = test['category_level_2_match'].map(cat_mapping)

In [15]:
features = [col for col in train.columns if col not in ['group_id', 'is_double']]
target = 'is_double'

In [16]:
gc.collect()

0

In [17]:
len(features)

470

In [18]:
# # обычный кб

# gkf = StratifiedGroupKFold(n_splits=5) # GroupKFold(n_splits=5)

# oof_preds_cb = np.zeros(len(train))

# for fold, (train_idx, val_idx) in enumerate(gkf.split(train, train[target], groups=train['group_id'])):
#     print(f"fold {fold+1}...")
    
#     X_train, X_val = train.iloc[train_idx][features], train.iloc[val_idx][features]
#     y_train, y_val = train.iloc[train_idx][target], train.iloc[val_idx][target]

#     model_cb = CatBoostClassifier(
#         iterations=50000,
#         learning_rate=0.1,
#         cat_features=cat_features,
#         # auto_class_weights='Balanced',
#         # loss_function='Focal:focal_alpha=0.75;focal_gamma=2',
#         eval_metric='PRAUC',
#         random_seed=42+fold,
#         early_stopping_rounds=2500,
#         task_type='GPU',
#         verbose=100
#     )
    
#     model_cb.fit(X_train, y_train, eval_set=(X_val, y_val))
#     oof_preds_cb[val_idx] = model_cb.predict_proba(X_val)[:, 1]

#     joblib.dump(model_cb, f"catboost_model_fold_{fold+1}.joblib")

#     del model_cb, X_train, X_val, y_train, y_val
#     gc.collect()

# joblib.dump(oof_preds_cb, "oof_preds_cb_5folds.joblib")

In [19]:
# обычный кб но 5 сидов

# gkf = StratifiedGroupKFold(n_splits=5)
# oof_preds_cb = np.zeros(len(train))

# for fold, (train_idx, val_idx) in enumerate(gkf.split(train, train[target], groups=train['group_id'])):
#     print(f"Fold {fold+1}...")
    
#     X_train, X_val = train.iloc[train_idx][features], train.iloc[val_idx][features]
#     y_train, y_val = train.iloc[train_idx][target], train.iloc[val_idx][target]
    
#     fold_val_preds = np.zeros(len(X_val))

#     for seed in range(5):
#         print(f"  Seed {seed+1}...")
#         model_cb = CatBoostClassifier(
#             iterations=50000,
#             learning_rate=0.1,
#             cat_features=cat_features,
#             eval_metric='PRAUC',
#             random_seed=42+fold*10+seed, 
#             early_stopping_rounds=2500,
#             task_type='GPU',
#             verbose=100
#         )
        
#         model_cb.fit(X_train, y_train, eval_set=(X_val, y_val))
#         fold_val_preds += model_cb.predict_proba(X_val)[:, 1]
        
#         joblib.dump(model_cb, f"catboost_model_fold_{fold+1}_seed_{seed+1}.joblib")
        
#         del model_cb
#         gc.collect()
    
#     oof_preds_cb[val_idx] = fold_val_preds / 5

# joblib.dump(oof_preds_cb, "oof_preds_cb_5folds_5seeds.joblib")

In [20]:
# precision, recall, thresholds = precision_recall_curve(train[target], oof_preds_cb)
# oof_prauc_cb = auc(recall, precision)
# oof_prauc_cb

In [21]:
def predict_ensemble(test_data, models, features):
    predictions = np.zeros((len(test_data), len(models)))
    
    for i, model in tqdm(enumerate(models)):
        predictions[:, i] = model.predict_proba(test_data[features])[:, 1]
    
    ensemble_pred = predictions.mean(axis=1)
    
    return ensemble_pred

In [22]:
# models_cb = [joblib.load(f'/kaggle/working/catboost_model_fold_{fold+1}.joblib') for fold in range(5)]
# models_cb = []
# for fold in range(5):
#     for seed in range(5):
#         model_path = f'/kaggle/input/catboost-123-models/catboost_model_fold_{fold+1}_seed_{seed+1}.joblib'
#         models_cb.append(joblib.load(model_path))

In [23]:
# ensemble_predictions_cb = predict_ensemble(test, models_cb, features)

In [24]:
# joblib.dump(ensemble_predictions_cb, f"ensemble_predictions_cb.joblib")

In [25]:
# fig, axes = plt.subplots(5, 1, figsize=(12, 36))

# for i, model in enumerate(models_cb):
#     feature_importances = model.get_feature_importance()
#     feature_names = features
#     sorted_idx = np.argsort(feature_importances)[::-1][:50]
#     top_features = np.array(feature_names)[sorted_idx]
#     top_importances = feature_importances[sorted_idx]
    
#     axes[i].barh(top_features[::-1], top_importances[::-1])
#     axes[i].set_title(f'Feature Importance Top 50 - Fold {i+1}')
#     axes[i].set_xlabel('Importance')
#     axes[i].set_ylabel('Feature')

# plt.tight_layout()
# plt.show()

In [26]:
# кб по категориям 1 уровня

categories = train['unique_cat_1'].unique()
categories = [cat.split('_')[0] for cat in categories if cat.split('_')[0] == cat.split('_')[1]]

gkf = StratifiedGroupKFold(n_splits=5) # GroupKFold(n_splits=5)

cat_oof_preds_cb = {}
oof_metrics_cat = {}

progress_bar = tqdm(categories)

for category in progress_bar:
    progress_bar.set_description(f"{category=}")

    train_category = train[train['unique_cat_1'] == f'{category}_{category}']
    test_category = test[test['unique_cat_1'] == f'{category}_{category}']

    cat_oof_preds_cb[category] = np.zeros(len(train_category))

    for fold, (train_idx, val_idx) in enumerate(gkf.split(train_category, train_category[target], groups=train_category['group_id'])):
        print(f"fold {fold+1}...")
    
        X_train_cat, X_val_cat = train_category.iloc[train_idx][features], train_category.iloc[val_idx][features]
        y_train_cat, y_val_cat = train_category.iloc[train_idx][target], train_category.iloc[val_idx][target]

        model_fold_cb = CatBoostClassifier(
            iterations=50000,
            # learning_rate=0.1,
            cat_features=cat_features,
            # auto_class_weights='Balanced',
            eval_metric='PRAUC',
            random_seed=69+fold,
            early_stopping_rounds=1000,
            task_type='GPU',
            verbose=100
        )

        model_fold_cb.fit(X_train_cat, y_train_cat, eval_set=(X_val_cat, y_val_cat))
        cat_oof_preds_cb[category][val_idx] = model_fold_cb.predict_proba(X_val_cat)[:, 1]

        joblib.dump(model_fold_cb, f"{category}_catboost_model_fold_{fold+1}.joblib")

        del model_fold_cb
        gc.collect()

    precision, recall, thresholds = precision_recall_curve(train_category[target], cat_oof_preds_cb[category])
    cat_oof_prauc_cb = auc(recall, precision)
    print(f'{cat_oof_prauc_cb=}')

    oof_metrics_cat[category] = {'catboost': cat_oof_prauc_cb}

    curr_cat_models = []
    for fold in range(5):
        model_cb = joblib.load(f"{category}_catboost_model_fold_{fold+1}.joblib")
        curr_cat_models.append(model_cb)

    ensemble_predictions_cb = predict_ensemble(test_category, curr_cat_models, features)
    joblib.dump(ensemble_predictions_cb, f"{category}_ensemble_predictions_cb.joblib")

    del ensemble_predictions_cb, model_cb, curr_cat_models
    gc.collect()

joblib.dump(cat_oof_preds_cb, "oof_preds_cb_5folds_cat1lvl.joblib")

category='Готовый бизнес и оборудование':   0%|          | 0/7 [00:00<?, ?it/s]

fold 1...
Learning rate set to 0.009044


Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.4627636	test: 0.7385600	best: 0.7385600 (0)	total: 407ms	remaining: 5h 38m 52s
100:	learn: 0.5947639	test: 0.7856841	best: 0.7856841 (100)	total: 3.38s	remaining: 27m 50s
200:	learn: 0.6320305	test: 0.7888772	best: 0.7889414 (198)	total: 6.42s	remaining: 26m 30s
300:	learn: 0.6568757	test: 0.7911281	best: 0.7913776 (275)	total: 9.41s	remaining: 25m 54s
400:	learn: 0.6756496	test: 0.7912112	best: 0.7914934 (339)	total: 12.4s	remaining: 25m 29s
500:	learn: 0.6901884	test: 0.7908997	best: 0.7916216 (428)	total: 15.3s	remaining: 25m 11s
600:	learn: 0.7014128	test: 0.7907505	best: 0.7916216 (428)	total: 18.4s	remaining: 25m 8s
700:	learn: 0.7112560	test: 0.7917445	best: 0.7918474 (694)	total: 21.7s	remaining: 25m 23s
800:	learn: 0.7199480	test: 0.7926989	best: 0.7927471 (798)	total: 24.8s	remaining: 25m 25s
900:	learn: 0.7268824	test: 0.7932790	best: 0.7932828 (890)	total: 28s	remaining: 25m 26s
1000:	learn: 0.7329153	test: 0.7940448	best: 0.7940885 (996)	total: 31.1s	remaining:

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.5672228	test: 0.4533943	best: 0.4533943 (0)	total: 48.9ms	remaining: 40m 43s
100:	learn: 0.6439662	test: 0.4973976	best: 0.4974665 (98)	total: 3.15s	remaining: 25m 56s
200:	learn: 0.6765060	test: 0.5170552	best: 0.5171921 (199)	total: 6.33s	remaining: 26m 9s
300:	learn: 0.6985274	test: 0.5248396	best: 0.5248396 (300)	total: 9.62s	remaining: 26m 28s
400:	learn: 0.7156800	test: 0.5273569	best: 0.5279917 (390)	total: 13s	remaining: 26m 51s
500:	learn: 0.7294381	test: 0.5289335	best: 0.5289428 (499)	total: 16.2s	remaining: 26m 42s
600:	learn: 0.7398397	test: 0.5310780	best: 0.5310780 (600)	total: 19.4s	remaining: 26m 33s
700:	learn: 0.7483359	test: 0.5305722	best: 0.5314076 (664)	total: 22.5s	remaining: 26m 23s
800:	learn: 0.7559947	test: 0.5314099	best: 0.5314675 (779)	total: 25.4s	remaining: 26m 2s
900:	learn: 0.7624092	test: 0.5318372	best: 0.5321942 (862)	total: 28.3s	remaining: 25m 44s
1000:	learn: 0.7679679	test: 0.5316476	best: 0.5321942 (862)	total: 31.3s	remaining: 25m

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.5713073	test: 0.4467728	best: 0.4467728 (0)	total: 55.9ms	remaining: 46m 33s
100:	learn: 0.6595376	test: 0.4654086	best: 0.4702318 (88)	total: 3.12s	remaining: 25m 39s
200:	learn: 0.6896342	test: 0.4960958	best: 0.4961067 (198)	total: 6.17s	remaining: 25m 28s
300:	learn: 0.7094209	test: 0.5040710	best: 0.5040710 (300)	total: 9.32s	remaining: 25m 38s
400:	learn: 0.7240231	test: 0.5104233	best: 0.5105095 (397)	total: 12.5s	remaining: 25m 47s
500:	learn: 0.7356343	test: 0.5182118	best: 0.5182118 (500)	total: 15.7s	remaining: 25m 48s
600:	learn: 0.7448487	test: 0.5254666	best: 0.5254666 (600)	total: 18.9s	remaining: 25m 49s
700:	learn: 0.7529245	test: 0.5299854	best: 0.5299854 (700)	total: 21.8s	remaining: 25m 30s
800:	learn: 0.7599789	test: 0.5346331	best: 0.5346331 (800)	total: 24.7s	remaining: 25m 14s
900:	learn: 0.7665987	test: 0.5384328	best: 0.5386463 (894)	total: 27.7s	remaining: 25m 9s
1000:	learn: 0.7719843	test: 0.5418965	best: 0.5420487 (996)	total: 31s	remaining: 25

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.5704197	test: 0.3575032	best: 0.3575032 (0)	total: 47.5ms	remaining: 39m 35s
100:	learn: 0.6716778	test: 0.4496903	best: 0.4496903 (100)	total: 3.09s	remaining: 25m 26s
200:	learn: 0.7040559	test: 0.4585108	best: 0.4588034 (196)	total: 6.3s	remaining: 25m 59s
300:	learn: 0.7230555	test: 0.4602139	best: 0.4611478 (257)	total: 9.54s	remaining: 26m 15s
400:	learn: 0.7377798	test: 0.4623895	best: 0.4627170 (391)	total: 12.8s	remaining: 26m 23s
500:	learn: 0.7489995	test: 0.4644106	best: 0.4644120 (498)	total: 16s	remaining: 26m 18s
600:	learn: 0.7587066	test: 0.4664429	best: 0.4666421 (599)	total: 19.2s	remaining: 26m 19s
700:	learn: 0.7665361	test: 0.4687888	best: 0.4689982 (690)	total: 22.2s	remaining: 25m 59s
800:	learn: 0.7731453	test: 0.4711408	best: 0.4711408 (800)	total: 25.2s	remaining: 25m 47s
900:	learn: 0.7786636	test: 0.4732031	best: 0.4732602 (894)	total: 28.3s	remaining: 25m 44s
1000:	learn: 0.7837310	test: 0.4749897	best: 0.4750547 (999)	total: 31.5s	remaining: 2

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.5259169	test: 0.3116763	best: 0.3116763 (0)	total: 48.4ms	remaining: 40m 21s
100:	learn: 0.6608010	test: 0.4543478	best: 0.4546195 (98)	total: 3.16s	remaining: 25m 59s
200:	learn: 0.6886373	test: 0.4655841	best: 0.4657497 (199)	total: 6.24s	remaining: 25m 46s
300:	learn: 0.7084014	test: 0.4699348	best: 0.4700175 (299)	total: 9.21s	remaining: 25m 20s
400:	learn: 0.7232112	test: 0.4735651	best: 0.4735651 (400)	total: 12.2s	remaining: 25m 5s
500:	learn: 0.7348684	test: 0.4763755	best: 0.4767043 (485)	total: 15.2s	remaining: 24m 58s
600:	learn: 0.7440693	test: 0.4788864	best: 0.4788864 (600)	total: 18.3s	remaining: 25m 6s
700:	learn: 0.7522609	test: 0.4834308	best: 0.4834308 (700)	total: 21.5s	remaining: 25m 11s
800:	learn: 0.7599077	test: 0.4876345	best: 0.4877640 (794)	total: 24.7s	remaining: 25m 14s
900:	learn: 0.7661634	test: 0.4895645	best: 0.4895853 (899)	total: 27.9s	remaining: 25m 19s
1000:	learn: 0.7717175	test: 0.4911438	best: 0.4912085 (999)	total: 31s	remaining: 25m


0it [00:00, ?it/s][A
1it [00:02,  2.95s/it][A
2it [00:05,  2.85s/it][A
3it [00:08,  2.99s/it][A
4it [00:12,  3.10s/it][A
5it [00:15,  3.02s/it]
category='Для дома и дачи':  14%|█▍        | 1/7 [28:18<2:49:53, 1698.89s/it]              

fold 1...
Learning rate set to 0.008555


Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.5489233	test: 0.6062254	best: 0.6062254 (0)	total: 93.9ms	remaining: 1h 18m 17s
100:	learn: 0.6011040	test: 0.6657857	best: 0.6658529 (99)	total: 5.03s	remaining: 41m 25s
200:	learn: 0.6202160	test: 0.6742062	best: 0.6742062 (200)	total: 10s	remaining: 41m 25s
300:	learn: 0.6343464	test: 0.6805459	best: 0.6806573 (297)	total: 15.2s	remaining: 41m 55s
400:	learn: 0.6450617	test: 0.6852127	best: 0.6852127 (400)	total: 20.6s	remaining: 42m 24s
500:	learn: 0.6535340	test: 0.6885696	best: 0.6885696 (500)	total: 25.8s	remaining: 42m 27s
600:	learn: 0.6614945	test: 0.6921546	best: 0.6922315 (598)	total: 31.1s	remaining: 42m 34s
700:	learn: 0.6678342	test: 0.6932938	best: 0.6932938 (700)	total: 36.3s	remaining: 42m 31s
800:	learn: 0.6737511	test: 0.6947999	best: 0.6947999 (800)	total: 41.5s	remaining: 42m 31s
900:	learn: 0.6792564	test: 0.6962634	best: 0.6963066 (896)	total: 46.8s	remaining: 42m 31s
1000:	learn: 0.6838169	test: 0.6969902	best: 0.6969902 (1000)	total: 52s	remaining:

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.5556658	test: 0.5708051	best: 0.5708051 (0)	total: 78.2ms	remaining: 1h 5m 9s
100:	learn: 0.6219119	test: 0.6131897	best: 0.6131897 (100)	total: 4.98s	remaining: 41m 2s
200:	learn: 0.6456874	test: 0.6251482	best: 0.6251797 (199)	total: 10.2s	remaining: 42m 3s
300:	learn: 0.6600763	test: 0.6334362	best: 0.6334362 (300)	total: 15.5s	remaining: 42m 36s
400:	learn: 0.6715287	test: 0.6389194	best: 0.6389194 (400)	total: 20.9s	remaining: 43m 9s
500:	learn: 0.6797403	test: 0.6417114	best: 0.6417834 (498)	total: 26.3s	remaining: 43m 18s
600:	learn: 0.6868930	test: 0.6436736	best: 0.6436736 (600)	total: 31.5s	remaining: 43m 8s
700:	learn: 0.6935572	test: 0.6453859	best: 0.6453859 (700)	total: 36.5s	remaining: 42m 48s
800:	learn: 0.6993773	test: 0.6468127	best: 0.6468127 (800)	total: 41.8s	remaining: 42m 46s
900:	learn: 0.7044973	test: 0.6473602	best: 0.6474305 (883)	total: 46.7s	remaining: 42m 22s
1000:	learn: 0.7092037	test: 0.6483501	best: 0.6484176 (986)	total: 51.6s	remaining: 4

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.5538060	test: 0.5060615	best: 0.5060615 (0)	total: 66.4ms	remaining: 55m 19s
100:	learn: 0.6292088	test: 0.5744958	best: 0.5744958 (100)	total: 5.18s	remaining: 42m 38s
200:	learn: 0.6542003	test: 0.5828078	best: 0.5828078 (200)	total: 10.1s	remaining: 41m 51s
300:	learn: 0.6698165	test: 0.5866911	best: 0.5867951 (296)	total: 15.6s	remaining: 42m 53s
400:	learn: 0.6806280	test: 0.5889326	best: 0.5889556 (399)	total: 20.9s	remaining: 43m 4s
500:	learn: 0.6892364	test: 0.5906372	best: 0.5906372 (500)	total: 26.3s	remaining: 43m 14s
600:	learn: 0.6961544	test: 0.5925477	best: 0.5925477 (600)	total: 31.6s	remaining: 43m 14s
700:	learn: 0.7019631	test: 0.5943100	best: 0.5943100 (700)	total: 36.8s	remaining: 43m 6s
800:	learn: 0.7073612	test: 0.5955385	best: 0.5955431 (799)	total: 42s	remaining: 42m 59s
900:	learn: 0.7124231	test: 0.5971339	best: 0.5971339 (900)	total: 47.1s	remaining: 42m 48s
1000:	learn: 0.7166294	test: 0.5982727	best: 0.5982727 (1000)	total: 52.2s	remaining: 4

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.5571427	test: 0.5118571	best: 0.5118571 (0)	total: 60.8ms	remaining: 50m 38s
100:	learn: 0.6245956	test: 0.5721153	best: 0.5721957 (99)	total: 4.93s	remaining: 40m 35s
200:	learn: 0.6473960	test: 0.5838366	best: 0.5838366 (200)	total: 10.1s	remaining: 41m 42s
300:	learn: 0.6625407	test: 0.5890364	best: 0.5890364 (300)	total: 15.4s	remaining: 42m 26s
400:	learn: 0.6725361	test: 0.5941848	best: 0.5942408 (399)	total: 20.6s	remaining: 42m 26s
500:	learn: 0.6807939	test: 0.5992956	best: 0.5992956 (500)	total: 26s	remaining: 42m 45s
600:	learn: 0.6880484	test: 0.6029661	best: 0.6029661 (600)	total: 31.2s	remaining: 42m 46s
700:	learn: 0.6938724	test: 0.6052412	best: 0.6053712 (691)	total: 36.3s	remaining: 42m 31s
800:	learn: 0.6991661	test: 0.6080041	best: 0.6080041 (800)	total: 40.9s	remaining: 41m 53s
900:	learn: 0.7040340	test: 0.6110649	best: 0.6110649 (900)	total: 46.1s	remaining: 41m 52s
1000:	learn: 0.7084884	test: 0.6134438	best: 0.6135339 (997)	total: 51.2s	remaining: 4

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.5555032	test: 0.4883733	best: 0.4883733 (0)	total: 64.9ms	remaining: 54m 5s
100:	learn: 0.6260012	test: 0.5621995	best: 0.5621995 (100)	total: 4.94s	remaining: 40m 42s
200:	learn: 0.6507792	test: 0.5716254	best: 0.5716254 (200)	total: 10.1s	remaining: 41m 37s
300:	learn: 0.6664257	test: 0.5778845	best: 0.5778845 (300)	total: 15.4s	remaining: 42m 21s
400:	learn: 0.6777044	test: 0.5829174	best: 0.5829174 (400)	total: 20.8s	remaining: 42m 48s
500:	learn: 0.6858672	test: 0.5866012	best: 0.5866012 (500)	total: 26s	remaining: 42m 50s
600:	learn: 0.6930719	test: 0.5896911	best: 0.5896911 (600)	total: 30.8s	remaining: 42m 8s
700:	learn: 0.6998145	test: 0.5910428	best: 0.5911213 (677)	total: 35.6s	remaining: 41m 40s
800:	learn: 0.7055735	test: 0.5926264	best: 0.5926264 (800)	total: 40.7s	remaining: 41m 38s
900:	learn: 0.7107645	test: 0.5940706	best: 0.5942244 (881)	total: 45.7s	remaining: 41m 31s
1000:	learn: 0.7156386	test: 0.5950054	best: 0.5950905 (987)	total: 50.9s	remaining: 41


0it [00:00, ?it/s][A
1it [00:14, 14.69s/it][A
2it [00:27, 13.42s/it][A
3it [00:39, 13.01s/it][A
4it [00:54, 13.68s/it][A
5it [01:07, 13.56s/it]
category='Личные вещи':  29%|██▊       | 2/7 [1:54:32<5:11:55, 3743.11s/it]    

fold 1...
Learning rate set to 0.008499


Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.3943392	test: 0.3508704	best: 0.3508704 (0)	total: 62.9ms	remaining: 52m 22s
100:	learn: 0.5576797	test: 0.5329775	best: 0.5350434 (55)	total: 4.79s	remaining: 39m 29s
200:	learn: 0.5819795	test: 0.5387492	best: 0.5391305 (194)	total: 10.1s	remaining: 41m 43s
300:	learn: 0.5983119	test: 0.5472726	best: 0.5472760 (299)	total: 15.3s	remaining: 42m 10s
400:	learn: 0.6106934	test: 0.5582688	best: 0.5582688 (400)	total: 20.7s	remaining: 42m 42s
500:	learn: 0.6203110	test: 0.5672934	best: 0.5674572 (495)	total: 26s	remaining: 42m 46s
600:	learn: 0.6291691	test: 0.5715093	best: 0.5715602 (598)	total: 31.3s	remaining: 42m 55s
700:	learn: 0.6366938	test: 0.5766684	best: 0.5766684 (700)	total: 36.4s	remaining: 42m 38s
800:	learn: 0.6433013	test: 0.5801228	best: 0.5801542 (798)	total: 41.6s	remaining: 42m 33s
900:	learn: 0.6495134	test: 0.5819487	best: 0.5819752 (896)	total: 46.7s	remaining: 42m 27s
1000:	learn: 0.6553863	test: 0.5844739	best: 0.5845004 (998)	total: 51.6s	remaining: 4

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.4751445	test: 0.4391512	best: 0.4391512 (0)	total: 111ms	remaining: 1h 32m 12s
100:	learn: 0.5690729	test: 0.5152994	best: 0.5152994 (100)	total: 5.22s	remaining: 42m 58s
200:	learn: 0.5936932	test: 0.5317763	best: 0.5317763 (200)	total: 10.5s	remaining: 43m 23s
300:	learn: 0.6084596	test: 0.5383925	best: 0.5384252 (299)	total: 15.9s	remaining: 43m 44s
400:	learn: 0.6195904	test: 0.5423912	best: 0.5423912 (400)	total: 21.3s	remaining: 43m 54s
500:	learn: 0.6290778	test: 0.5459038	best: 0.5459038 (500)	total: 26.6s	remaining: 43m 52s
600:	learn: 0.6369521	test: 0.5478477	best: 0.5478477 (600)	total: 32s	remaining: 43m 47s
700:	learn: 0.6440548	test: 0.5496646	best: 0.5496646 (700)	total: 37.3s	remaining: 43m 46s
800:	learn: 0.6504269	test: 0.5515792	best: 0.5515792 (800)	total: 42.7s	remaining: 43m 40s
900:	learn: 0.6558820	test: 0.5529517	best: 0.5529517 (900)	total: 47.9s	remaining: 43m 31s
1000:	learn: 0.6607868	test: 0.5537757	best: 0.5538619 (983)	total: 53.1s	remaining

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.4317589	test: 0.4144482	best: 0.4144482 (0)	total: 87.1ms	remaining: 1h 12m 35s
100:	learn: 0.5576650	test: 0.5390575	best: 0.5390575 (100)	total: 5.1s	remaining: 41m 59s
200:	learn: 0.5829931	test: 0.5576357	best: 0.5576357 (200)	total: 9.92s	remaining: 40m 58s
300:	learn: 0.5992238	test: 0.5673207	best: 0.5673207 (300)	total: 15.2s	remaining: 41m 47s
400:	learn: 0.6117189	test: 0.5731832	best: 0.5731832 (400)	total: 20.6s	remaining: 42m 31s
500:	learn: 0.6219253	test: 0.5763965	best: 0.5763965 (500)	total: 26s	remaining: 42m 50s
600:	learn: 0.6306688	test: 0.5786295	best: 0.5786295 (600)	total: 31.5s	remaining: 43m 5s
700:	learn: 0.6382190	test: 0.5806690	best: 0.5806810 (699)	total: 36.8s	remaining: 43m 9s
800:	learn: 0.6452813	test: 0.5821880	best: 0.5821880 (800)	total: 42.2s	remaining: 43m 9s
900:	learn: 0.6514034	test: 0.5837054	best: 0.5837054 (900)	total: 47.5s	remaining: 43m 6s
1000:	learn: 0.6570821	test: 0.5848222	best: 0.5848222 (1000)	total: 52.8s	remaining: 4

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.4404034	test: 0.4034160	best: 0.4034160 (0)	total: 70.1ms	remaining: 58m 23s
100:	learn: 0.5451798	test: 0.5531998	best: 0.5531998 (100)	total: 5.01s	remaining: 41m 16s
200:	learn: 0.5684936	test: 0.5807778	best: 0.5807778 (200)	total: 10.2s	remaining: 42m 15s
300:	learn: 0.5835474	test: 0.5993436	best: 0.5993436 (300)	total: 15.3s	remaining: 42m 11s
400:	learn: 0.5948631	test: 0.6113076	best: 0.6113076 (400)	total: 20.6s	remaining: 42m 28s
500:	learn: 0.6039791	test: 0.6191507	best: 0.6191507 (500)	total: 25.8s	remaining: 42m 26s
600:	learn: 0.6122780	test: 0.6253049	best: 0.6253049 (600)	total: 31s	remaining: 42m 28s
700:	learn: 0.6194178	test: 0.6304605	best: 0.6304605 (700)	total: 35.9s	remaining: 42m 4s
800:	learn: 0.6259025	test: 0.6342049	best: 0.6342049 (800)	total: 41.1s	remaining: 42m 4s
900:	learn: 0.6319960	test: 0.6375892	best: 0.6375892 (900)	total: 46.4s	remaining: 42m 8s
1000:	learn: 0.6379469	test: 0.6404929	best: 0.6404929 (1000)	total: 51.6s	remaining: 42

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.4249159	test: 0.3773828	best: 0.3773828 (0)	total: 70.5ms	remaining: 58m 44s
100:	learn: 0.5714514	test: 0.4993688	best: 0.4993688 (100)	total: 4.83s	remaining: 39m 44s
200:	learn: 0.5960792	test: 0.5173231	best: 0.5173231 (200)	total: 10s	remaining: 41m 27s
300:	learn: 0.6119119	test: 0.5249204	best: 0.5249204 (300)	total: 15.4s	remaining: 42m 20s
400:	learn: 0.6236412	test: 0.5293410	best: 0.5293637 (399)	total: 20.7s	remaining: 42m 34s
500:	learn: 0.6332773	test: 0.5331157	best: 0.5331157 (500)	total: 25.9s	remaining: 42m 37s
600:	learn: 0.6412764	test: 0.5359749	best: 0.5359749 (600)	total: 31.1s	remaining: 42m 39s
700:	learn: 0.6481760	test: 0.5376344	best: 0.5376424 (699)	total: 36.4s	remaining: 42m 36s
800:	learn: 0.6543655	test: 0.5392619	best: 0.5392743 (799)	total: 41.6s	remaining: 42m 32s
900:	learn: 0.6599804	test: 0.5409461	best: 0.5409461 (900)	total: 46.8s	remaining: 42m 28s
1000:	learn: 0.6648882	test: 0.5421545	best: 0.5421545 (1000)	total: 52s	remaining: 4


0it [00:00, ?it/s][A
1it [00:18, 18.48s/it][A
2it [00:37, 19.03s/it][A
3it [00:56, 18.67s/it][A
4it [01:13, 18.26s/it][A
5it [01:31, 18.38s/it]
category='Электроника':  43%|████▎     | 3/7 [2:55:18<4:06:34, 3698.60s/it]

fold 1...
Learning rate set to 0.008948


Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.3272864	test: 0.2392880	best: 0.2392880 (0)	total: 52.6ms	remaining: 43m 48s
100:	learn: 0.5427836	test: 0.4035666	best: 0.4046363 (75)	total: 3.49s	remaining: 28m 44s
200:	learn: 0.5725288	test: 0.4145424	best: 0.4147758 (199)	total: 6.92s	remaining: 28m 35s
300:	learn: 0.5922212	test: 0.4198130	best: 0.4198130 (300)	total: 10.4s	remaining: 28m 29s
400:	learn: 0.6072270	test: 0.4259727	best: 0.4259727 (400)	total: 13.8s	remaining: 28m 26s
500:	learn: 0.6189420	test: 0.4302861	best: 0.4302861 (500)	total: 17s	remaining: 27m 58s
600:	learn: 0.6289496	test: 0.4346163	best: 0.4346163 (600)	total: 20.2s	remaining: 27m 38s
700:	learn: 0.6372397	test: 0.4379484	best: 0.4379484 (700)	total: 23.5s	remaining: 27m 29s
800:	learn: 0.6446947	test: 0.4409641	best: 0.4409641 (800)	total: 26.9s	remaining: 27m 31s
900:	learn: 0.6516725	test: 0.4435353	best: 0.4435353 (900)	total: 30.4s	remaining: 27m 36s
1000:	learn: 0.6587119	test: 0.4455840	best: 0.4456080 (993)	total: 34s	remaining: 27m

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.3995530	test: 0.4044019	best: 0.4044019 (0)	total: 48.6ms	remaining: 40m 31s
100:	learn: 0.5135233	test: 0.5184759	best: 0.5184759 (100)	total: 3.51s	remaining: 28m 54s
200:	learn: 0.5414652	test: 0.5371448	best: 0.5371448 (200)	total: 6.95s	remaining: 28m 40s
300:	learn: 0.5599076	test: 0.5475751	best: 0.5475751 (300)	total: 10.4s	remaining: 28m 32s
400:	learn: 0.5743831	test: 0.5540552	best: 0.5540881 (399)	total: 13.8s	remaining: 28m 22s
500:	learn: 0.5854651	test: 0.5591942	best: 0.5592346 (499)	total: 17.3s	remaining: 28m 25s
600:	learn: 0.5955814	test: 0.5634950	best: 0.5635156 (599)	total: 20.7s	remaining: 28m 22s
700:	learn: 0.6047980	test: 0.5667310	best: 0.5667991 (698)	total: 24.1s	remaining: 28m 13s
800:	learn: 0.6125581	test: 0.5690139	best: 0.5690139 (800)	total: 27.3s	remaining: 27m 59s
900:	learn: 0.6193350	test: 0.5706571	best: 0.5706607 (897)	total: 30.8s	remaining: 27m 57s
1000:	learn: 0.6257839	test: 0.5726435	best: 0.5726749 (994)	total: 34.3s	remaining

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.3669946	test: 0.3974519	best: 0.3974519 (0)	total: 52.1ms	remaining: 43m 24s
100:	learn: 0.5051442	test: 0.5215009	best: 0.5215009 (100)	total: 3.36s	remaining: 27m 39s
200:	learn: 0.5314086	test: 0.5344590	best: 0.5344590 (200)	total: 6.75s	remaining: 27m 52s
300:	learn: 0.5486492	test: 0.5414583	best: 0.5414583 (300)	total: 10.2s	remaining: 28m 11s
400:	learn: 0.5621749	test: 0.5463574	best: 0.5463574 (400)	total: 13.7s	remaining: 28m 12s
500:	learn: 0.5726311	test: 0.5476076	best: 0.5476076 (500)	total: 17.1s	remaining: 28m 10s
600:	learn: 0.5823108	test: 0.5498003	best: 0.5498003 (600)	total: 20.5s	remaining: 28m 7s
700:	learn: 0.5904580	test: 0.5518150	best: 0.5518176 (699)	total: 24s	remaining: 28m 10s
800:	learn: 0.5978000	test: 0.5545502	best: 0.5545557 (799)	total: 27.5s	remaining: 28m 9s
900:	learn: 0.6046722	test: 0.5565112	best: 0.5565537 (887)	total: 31s	remaining: 28m 7s
1000:	learn: 0.6109328	test: 0.5592936	best: 0.5595039 (998)	total: 34.3s	remaining: 28m 1

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.3641482	test: 0.3914269	best: 0.3914269 (0)	total: 48.5ms	remaining: 40m 23s
100:	learn: 0.5271494	test: 0.4671164	best: 0.4672272 (98)	total: 3.6s	remaining: 29m 40s
200:	learn: 0.5541324	test: 0.4815269	best: 0.4816148 (199)	total: 7.08s	remaining: 29m 14s
300:	learn: 0.5729026	test: 0.4858959	best: 0.4861561 (284)	total: 10.6s	remaining: 29m 9s
400:	learn: 0.5868342	test: 0.4895050	best: 0.4895050 (400)	total: 14s	remaining: 28m 47s
500:	learn: 0.5984465	test: 0.4914420	best: 0.4915199 (496)	total: 17.2s	remaining: 28m 15s
600:	learn: 0.6078915	test: 0.4925950	best: 0.4927852 (592)	total: 20.4s	remaining: 27m 53s
700:	learn: 0.6162608	test: 0.4936319	best: 0.4936416 (697)	total: 23.9s	remaining: 28m
800:	learn: 0.6237427	test: 0.4946753	best: 0.4947534 (798)	total: 27.3s	remaining: 27m 59s
900:	learn: 0.6305134	test: 0.4959117	best: 0.4961029 (897)	total: 30.8s	remaining: 27m 58s
1000:	learn: 0.6368055	test: 0.4972190	best: 0.4972190 (1000)	total: 34s	remaining: 27m 46s


Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.3283432	test: 0.3372665	best: 0.3372665 (0)	total: 50.8ms	remaining: 42m 17s
100:	learn: 0.5157024	test: 0.5276756	best: 0.5277499 (99)	total: 3.47s	remaining: 28m 32s
200:	learn: 0.5446366	test: 0.5418418	best: 0.5418418 (200)	total: 6.83s	remaining: 28m 11s
300:	learn: 0.5640372	test: 0.5510081	best: 0.5510339 (298)	total: 10.1s	remaining: 27m 45s
400:	learn: 0.5788584	test: 0.5583687	best: 0.5583687 (400)	total: 13.5s	remaining: 27m 47s
500:	learn: 0.5902897	test: 0.5625602	best: 0.5625602 (500)	total: 16.9s	remaining: 27m 48s
600:	learn: 0.5999923	test: 0.5671123	best: 0.5671123 (600)	total: 20.3s	remaining: 27m 52s
700:	learn: 0.6086240	test: 0.5699529	best: 0.5699529 (700)	total: 23.8s	remaining: 27m 54s
800:	learn: 0.6167357	test: 0.5732578	best: 0.5732578 (800)	total: 27.4s	remaining: 28m 3s
900:	learn: 0.6237725	test: 0.5761116	best: 0.5761116 (900)	total: 30.9s	remaining: 28m 1s
1000:	learn: 0.6305045	test: 0.5779327	best: 0.5780034 (991)	total: 34.3s	remaining: 2


0it [00:00, ?it/s][A
1it [00:08,  8.75s/it][A
2it [00:17,  8.66s/it][A
3it [00:26,  9.03s/it][A
4it [00:35,  9.05s/it][A
5it [00:44,  8.98s/it]
category='Хобби и отдых':  57%|█████▋    | 4/7 [3:24:52<2:26:56, 2938.86s/it]

fold 1...
Learning rate set to 0.00927


Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.4514917	test: 0.4587897	best: 0.4587897 (0)	total: 53.3ms	remaining: 44m 26s
100:	learn: 0.6047289	test: 0.6249909	best: 0.6250195 (98)	total: 2.93s	remaining: 24m 8s
200:	learn: 0.6350857	test: 0.6355086	best: 0.6355086 (200)	total: 5.81s	remaining: 24m
300:	learn: 0.6547407	test: 0.6413509	best: 0.6413509 (300)	total: 8.66s	remaining: 23m 50s
400:	learn: 0.6683287	test: 0.6456299	best: 0.6456399 (383)	total: 11.4s	remaining: 23m 28s
500:	learn: 0.6800313	test: 0.6489886	best: 0.6489886 (500)	total: 14.1s	remaining: 23m 12s
600:	learn: 0.6901491	test: 0.6503299	best: 0.6503299 (600)	total: 16.8s	remaining: 22m 58s
700:	learn: 0.6987244	test: 0.6512300	best: 0.6513865 (690)	total: 19.5s	remaining: 22m 50s
800:	learn: 0.7060926	test: 0.6529213	best: 0.6529239 (798)	total: 22.2s	remaining: 22m 44s
900:	learn: 0.7124726	test: 0.6538483	best: 0.6539752 (898)	total: 25.1s	remaining: 22m 47s
1000:	learn: 0.7182141	test: 0.6548814	best: 0.6548826 (999)	total: 28s	remaining: 22m 48

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.5354715	test: 0.4395836	best: 0.4395836 (0)	total: 43.3ms	remaining: 36m 5s
100:	learn: 0.6280517	test: 0.5434208	best: 0.5434460 (99)	total: 3.11s	remaining: 25m 35s
200:	learn: 0.6536227	test: 0.5526806	best: 0.5527477 (198)	total: 5.97s	remaining: 24m 38s
300:	learn: 0.6693799	test: 0.5563367	best: 0.5563367 (300)	total: 8.85s	remaining: 24m 21s
400:	learn: 0.6809804	test: 0.5599316	best: 0.5599414 (399)	total: 11.8s	remaining: 24m 14s
500:	learn: 0.6903503	test: 0.5617386	best: 0.5617386 (500)	total: 14.7s	remaining: 24m 9s
600:	learn: 0.6981878	test: 0.5633621	best: 0.5633707 (595)	total: 17.5s	remaining: 24m 1s
700:	learn: 0.7056751	test: 0.5631911	best: 0.5634320 (602)	total: 20.4s	remaining: 23m 52s
800:	learn: 0.7116526	test: 0.5642189	best: 0.5644877 (780)	total: 23.2s	remaining: 23m 47s
900:	learn: 0.7168060	test: 0.5652976	best: 0.5652976 (900)	total: 26s	remaining: 23m 36s
1000:	learn: 0.7220228	test: 0.5661572	best: 0.5661572 (1000)	total: 28.7s	remaining: 23m

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.4590361	test: 0.4074480	best: 0.4074480 (0)	total: 44.7ms	remaining: 37m 17s
100:	learn: 0.5913617	test: 0.6575770	best: 0.6575770 (100)	total: 3s	remaining: 24m 43s
200:	learn: 0.6195884	test: 0.6640147	best: 0.6640147 (200)	total: 5.93s	remaining: 24m 30s
300:	learn: 0.6380464	test: 0.6675162	best: 0.6675162 (300)	total: 8.81s	remaining: 24m 15s
400:	learn: 0.6528685	test: 0.6721724	best: 0.6722846 (398)	total: 11.6s	remaining: 23m 59s
500:	learn: 0.6652147	test: 0.6755021	best: 0.6755021 (500)	total: 14.3s	remaining: 23m 34s
600:	learn: 0.6743723	test: 0.6773009	best: 0.6773229 (592)	total: 17.2s	remaining: 23m 36s
700:	learn: 0.6830161	test: 0.6792019	best: 0.6792172 (699)	total: 20s	remaining: 23m 29s
800:	learn: 0.6909582	test: 0.6796865	best: 0.6797373 (795)	total: 22.7s	remaining: 23m 16s
900:	learn: 0.6976147	test: 0.6803267	best: 0.6804989 (896)	total: 25.5s	remaining: 23m 11s
1000:	learn: 0.7037825	test: 0.6798718	best: 0.6804989 (896)	total: 28.5s	remaining: 23m

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.4691460	test: 0.2870397	best: 0.2870397 (0)	total: 46.6ms	remaining: 38m 50s
100:	learn: 0.6376854	test: 0.4468718	best: 0.4469350 (99)	total: 2.88s	remaining: 23m 44s
200:	learn: 0.6619415	test: 0.4609222	best: 0.4609222 (200)	total: 5.94s	remaining: 24m 31s
300:	learn: 0.6780173	test: 0.4718329	best: 0.4718329 (300)	total: 8.73s	remaining: 24m 2s
400:	learn: 0.6903037	test: 0.4784113	best: 0.4784113 (400)	total: 11.5s	remaining: 23m 36s
500:	learn: 0.7000726	test: 0.4836591	best: 0.4836591 (500)	total: 14.1s	remaining: 23m 17s
600:	learn: 0.7086889	test: 0.4872684	best: 0.4873145 (594)	total: 16.9s	remaining: 23m 5s
700:	learn: 0.7152439	test: 0.4897398	best: 0.4897717 (699)	total: 19.6s	remaining: 22m 55s
800:	learn: 0.7215316	test: 0.4914131	best: 0.4914495 (799)	total: 22.2s	remaining: 22m 45s
900:	learn: 0.7272186	test: 0.4928230	best: 0.4928823 (889)	total: 24.9s	remaining: 22m 38s
1000:	learn: 0.7321922	test: 0.4945627	best: 0.4945842 (997)	total: 27.7s	remaining: 2

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.4650879	test: 0.4596571	best: 0.4596571 (0)	total: 58.8ms	remaining: 49m 2s
100:	learn: 0.6104985	test: 0.5977570	best: 0.5977570 (100)	total: 2.94s	remaining: 24m 12s
200:	learn: 0.6406977	test: 0.6108213	best: 0.6108213 (200)	total: 5.83s	remaining: 24m 5s
300:	learn: 0.6601452	test: 0.6171541	best: 0.6171541 (300)	total: 8.73s	remaining: 24m 2s
400:	learn: 0.6736419	test: 0.6200478	best: 0.6201098 (389)	total: 11.6s	remaining: 23m 53s
500:	learn: 0.6843396	test: 0.6227749	best: 0.6229468 (490)	total: 14.5s	remaining: 23m 50s
600:	learn: 0.6936531	test: 0.6251782	best: 0.6252084 (596)	total: 17.4s	remaining: 23m 48s
700:	learn: 0.7010685	test: 0.6269048	best: 0.6269048 (700)	total: 20.3s	remaining: 23m 48s
800:	learn: 0.7085812	test: 0.6293582	best: 0.6294826 (794)	total: 23.2s	remaining: 23m 43s
900:	learn: 0.7150585	test: 0.6312674	best: 0.6312825 (889)	total: 26.3s	remaining: 23m 54s
1000:	learn: 0.7204905	test: 0.6334678	best: 0.6336166 (993)	total: 29.2s	remaining: 2


0it [00:00, ?it/s][A
1it [00:09,  9.19s/it][A
2it [00:18,  9.47s/it][A
3it [00:27,  9.16s/it][A
4it [00:37,  9.51s/it][A
5it [00:46,  9.36s/it]
category='Транспорт':  71%|███████▏  | 5/7 [3:47:57<1:19:16, 2378.40s/it]    

fold 1...
Learning rate set to 0.009017


Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.6309551	test: 0.4178618	best: 0.4178618 (0)	total: 52.9ms	remaining: 44m 5s
100:	learn: 0.7023719	test: 0.5924830	best: 0.5924830 (100)	total: 3.26s	remaining: 26m 48s
200:	learn: 0.7252630	test: 0.6121453	best: 0.6121453 (200)	total: 6.42s	remaining: 26m 31s
300:	learn: 0.7371424	test: 0.6188749	best: 0.6188923 (297)	total: 9.6s	remaining: 26m 24s
400:	learn: 0.7450700	test: 0.6228032	best: 0.6228178 (398)	total: 12.8s	remaining: 26m 20s
500:	learn: 0.7526558	test: 0.6267221	best: 0.6267221 (500)	total: 15.9s	remaining: 26m 13s
600:	learn: 0.7585020	test: 0.6293947	best: 0.6293947 (600)	total: 18.9s	remaining: 25m 56s
700:	learn: 0.7635956	test: 0.6309752	best: 0.6310142 (696)	total: 21.9s	remaining: 25m 37s
800:	learn: 0.7682054	test: 0.6319027	best: 0.6319144 (798)	total: 24.8s	remaining: 25m 21s
900:	learn: 0.7721820	test: 0.6326442	best: 0.6326442 (900)	total: 27.8s	remaining: 25m 17s
1000:	learn: 0.7759649	test: 0.6325801	best: 0.6327531 (964)	total: 31s	remaining: 25

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.6165324	test: 0.7533919	best: 0.7533919 (0)	total: 47.1ms	remaining: 39m 13s
100:	learn: 0.6609666	test: 0.7734961	best: 0.7736364 (23)	total: 3.19s	remaining: 26m 16s
200:	learn: 0.6852606	test: 0.7854568	best: 0.7854568 (200)	total: 6.29s	remaining: 25m 57s
300:	learn: 0.6981859	test: 0.7904236	best: 0.7904236 (300)	total: 9.5s	remaining: 26m 8s
400:	learn: 0.7070719	test: 0.7922960	best: 0.7924393 (398)	total: 12.7s	remaining: 26m 14s
500:	learn: 0.7138260	test: 0.7939636	best: 0.7939636 (500)	total: 15.9s	remaining: 26m 8s
600:	learn: 0.7196819	test: 0.7945978	best: 0.7945978 (600)	total: 18.9s	remaining: 25m 52s
700:	learn: 0.7246392	test: 0.7952874	best: 0.7952874 (700)	total: 21.9s	remaining: 25m 42s
800:	learn: 0.7289580	test: 0.7961670	best: 0.7962416 (791)	total: 24.9s	remaining: 25m 27s
900:	learn: 0.7328918	test: 0.7964878	best: 0.7966607 (889)	total: 28s	remaining: 25m 26s
1000:	learn: 0.7368029	test: 0.7972771	best: 0.7972852 (983)	total: 31.2s	remaining: 25m 

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.5894790	test: 0.5643819	best: 0.5643819 (0)	total: 46.1ms	remaining: 38m 25s
100:	learn: 0.6877120	test: 0.6182141	best: 0.6182141 (100)	total: 2.94s	remaining: 24m 13s
200:	learn: 0.7061488	test: 0.6308355	best: 0.6308355 (200)	total: 5.83s	remaining: 24m 4s
300:	learn: 0.7174043	test: 0.6366410	best: 0.6366935 (299)	total: 8.85s	remaining: 24m 21s
400:	learn: 0.7259733	test: 0.6405701	best: 0.6405701 (400)	total: 12.1s	remaining: 24m 54s
500:	learn: 0.7325429	test: 0.6447775	best: 0.6449725 (497)	total: 15.3s	remaining: 25m 7s
600:	learn: 0.7382631	test: 0.6486090	best: 0.6486928 (597)	total: 18.4s	remaining: 25m 11s
700:	learn: 0.7433073	test: 0.6516440	best: 0.6516573 (699)	total: 21.6s	remaining: 25m 18s
800:	learn: 0.7476607	test: 0.6537899	best: 0.6538114 (799)	total: 24.8s	remaining: 25m 20s
900:	learn: 0.7515727	test: 0.6569062	best: 0.6569633 (898)	total: 27.9s	remaining: 25m 20s
1000:	learn: 0.7551771	test: 0.6587236	best: 0.6587364 (999)	total: 31.2s	remaining: 

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.6489828	test: 0.5232709	best: 0.5232709 (0)	total: 47ms	remaining: 39m 11s
100:	learn: 0.7137394	test: 0.5911317	best: 0.5911317 (100)	total: 3.09s	remaining: 25m 26s
200:	learn: 0.7343477	test: 0.6006508	best: 0.6006508 (200)	total: 6.2s	remaining: 25m 35s
300:	learn: 0.7470938	test: 0.6033341	best: 0.6033341 (300)	total: 9.16s	remaining: 25m 12s
400:	learn: 0.7565667	test: 0.6047340	best: 0.6047843 (398)	total: 12.2s	remaining: 25m 10s
500:	learn: 0.7641178	test: 0.6056520	best: 0.6056520 (500)	total: 15.6s	remaining: 25m 39s
600:	learn: 0.7696196	test: 0.6053927	best: 0.6058652 (526)	total: 18.5s	remaining: 25m 23s
700:	learn: 0.7745050	test: 0.6052049	best: 0.6058652 (526)	total: 21.5s	remaining: 25m 9s
800:	learn: 0.7792247	test: 0.6054456	best: 0.6058652 (526)	total: 24.4s	remaining: 24m 57s
900:	learn: 0.7836191	test: 0.6055488	best: 0.6058652 (526)	total: 27.5s	remaining: 24m 59s
1000:	learn: 0.7873233	test: 0.6055511	best: 0.6058652 (526)	total: 30.7s	remaining: 25

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.6123984	test: 0.6395150	best: 0.6395150 (0)	total: 65.4ms	remaining: 54m 32s
100:	learn: 0.6859466	test: 0.7015696	best: 0.7016750 (94)	total: 3.1s	remaining: 25m 34s
200:	learn: 0.7060550	test: 0.7152949	best: 0.7152949 (200)	total: 6.25s	remaining: 25m 47s
300:	learn: 0.7189534	test: 0.7219254	best: 0.7219254 (300)	total: 9.47s	remaining: 26m 3s
400:	learn: 0.7283035	test: 0.7256909	best: 0.7256909 (400)	total: 12.7s	remaining: 26m 5s
500:	learn: 0.7359119	test: 0.7276984	best: 0.7277391 (499)	total: 15.8s	remaining: 26m 2s
600:	learn: 0.7417650	test: 0.7289208	best: 0.7289208 (600)	total: 19s	remaining: 26m 3s
700:	learn: 0.7468685	test: 0.7303451	best: 0.7303451 (700)	total: 22s	remaining: 25m 46s
800:	learn: 0.7515371	test: 0.7313496	best: 0.7313496 (800)	total: 25.2s	remaining: 25m 46s
900:	learn: 0.7554707	test: 0.7319965	best: 0.7320043 (896)	total: 28.2s	remaining: 25m 34s
1000:	learn: 0.7593105	test: 0.7326545	best: 0.7326545 (1000)	total: 31.3s	remaining: 25m 30s


0it [00:00, ?it/s][A
1it [00:05,  5.86s/it][A
2it [00:11,  5.97s/it][A
3it [00:18,  6.45s/it][A
4it [00:24,  6.15s/it][A
5it [00:30,  6.11s/it]
category='Животные':  86%|████████▌ | 6/7 [4:13:18<34:46, 2086.96s/it] 

fold 1...
Learning rate set to 0.010408


Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.2870915	test: 0.1903728	best: 0.1903728 (0)	total: 93.7ms	remaining: 1h 18m 4s
100:	learn: 0.4678264	test: 0.3114868	best: 0.3180462 (24)	total: 5.29s	remaining: 43m 33s
200:	learn: 0.4987678	test: 0.3228617	best: 0.3229745 (198)	total: 10.5s	remaining: 43m 32s
300:	learn: 0.5175472	test: 0.3316988	best: 0.3317786 (299)	total: 15.8s	remaining: 43m 36s
400:	learn: 0.5321413	test: 0.3355936	best: 0.3355936 (400)	total: 21.3s	remaining: 43m 53s
500:	learn: 0.5453479	test: 0.3369387	best: 0.3375334 (436)	total: 26.6s	remaining: 43m 46s
600:	learn: 0.5570102	test: 0.3407528	best: 0.3408692 (597)	total: 31.9s	remaining: 43m 38s
700:	learn: 0.5675706	test: 0.3431137	best: 0.3431674 (698)	total: 37.2s	remaining: 43m 32s
800:	learn: 0.5781490	test: 0.3465866	best: 0.3467098 (791)	total: 42.4s	remaining: 43m 26s
900:	learn: 0.5868691	test: 0.3479464	best: 0.3479464 (900)	total: 47.6s	remaining: 43m 14s
1000:	learn: 0.5959174	test: 0.3526798	best: 0.3529365 (968)	total: 53.1s	remainin

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


100:	learn: 0.4329714	test: 0.4586356	best: 0.4601698 (87)	total: 5.28s	remaining: 43m 28s
200:	learn: 0.4671403	test: 0.4606996	best: 0.4618224 (193)	total: 10.6s	remaining: 43m 53s
300:	learn: 0.4902006	test: 0.4627279	best: 0.4649278 (265)	total: 15.9s	remaining: 43m 39s
400:	learn: 0.5065133	test: 0.4629767	best: 0.4649278 (265)	total: 21s	remaining: 43m 22s
500:	learn: 0.5198218	test: 0.4686220	best: 0.4694462 (483)	total: 26.4s	remaining: 43m 26s
600:	learn: 0.5320497	test: 0.4708041	best: 0.4712270 (593)	total: 31.6s	remaining: 43m 19s
700:	learn: 0.5433697	test: 0.4736959	best: 0.4738452 (692)	total: 36.9s	remaining: 43m 14s
800:	learn: 0.5527914	test: 0.4746111	best: 0.4748600 (766)	total: 42.3s	remaining: 43m 16s
900:	learn: 0.5622353	test: 0.4773652	best: 0.4773652 (900)	total: 47.6s	remaining: 43m 11s
1000:	learn: 0.5725052	test: 0.4779691	best: 0.4781062 (997)	total: 52.8s	remaining: 43m 4s
1100:	learn: 0.5833786	test: 0.4792380	best: 0.4801488 (1084)	total: 58.1s	remainin

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	learn: 0.2873978	test: 0.1989241	best: 0.1989241 (0)	total: 97ms	remaining: 1h 20m 51s
100:	learn: 0.4526366	test: 0.4010447	best: 0.4010447 (100)	total: 5.28s	remaining: 43m 28s
200:	learn: 0.4842945	test: 0.4289999	best: 0.4290356 (199)	total: 10.5s	remaining: 43m 23s
300:	learn: 0.5040805	test: 0.4365479	best: 0.4365479 (300)	total: 16s	remaining: 43m 53s
400:	learn: 0.5194041	test: 0.4388699	best: 0.4389251 (398)	total: 21.3s	remaining: 43m 49s
500:	learn: 0.5333873	test: 0.4411207	best: 0.4415305 (487)	total: 26.6s	remaining: 43m 49s
600:	learn: 0.5463155	test: 0.4433319	best: 0.4435955 (598)	total: 32s	remaining: 43m 47s
700:	learn: 0.5578604	test: 0.4436985	best: 0.4437768 (699)	total: 37.3s	remaining: 43m 42s
800:	learn: 0.5668013	test: 0.4448941	best: 0.4450794 (794)	total: 42.6s	remaining: 43m 37s
900:	learn: 0.5774362	test: 0.4473079	best: 0.4473079 (900)	total: 48s	remaining: 43m 34s
1000:	learn: 0.5887340	test: 0.4491772	best: 0.4491772 (1000)	total: 53.3s	remaining: 43

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


100:	learn: 0.4612075	test: 0.4065386	best: 0.4065386 (100)	total: 5.22s	remaining: 43m
200:	learn: 0.4904948	test: 0.4108786	best: 0.4111138 (199)	total: 10.6s	remaining: 43m 43s
300:	learn: 0.5105297	test: 0.4076879	best: 0.4111138 (199)	total: 15.8s	remaining: 43m 35s
400:	learn: 0.5252225	test: 0.4069528	best: 0.4111138 (199)	total: 21.1s	remaining: 43m 27s
500:	learn: 0.5372766	test: 0.4065139	best: 0.4111138 (199)	total: 26.4s	remaining: 43m 28s
600:	learn: 0.5495169	test: 0.4049029	best: 0.4111138 (199)	total: 31.7s	remaining: 43m 28s
700:	learn: 0.5603737	test: 0.4040437	best: 0.4111138 (199)	total: 37s	remaining: 43m 24s
800:	learn: 0.5713882	test: 0.4022146	best: 0.4111138 (199)	total: 42.5s	remaining: 43m 32s
900:	learn: 0.5810877	test: 0.4013412	best: 0.4111138 (199)	total: 47.9s	remaining: 43m 31s
1000:	learn: 0.5912793	test: 0.4001764	best: 0.4111138 (199)	total: 53.2s	remaining: 43m 24s
1100:	learn: 0.6019987	test: 0.3988009	best: 0.4111138 (199)	total: 58.5s	remaining: 

Default metric period is 5 because PRAUC is/are not implemented for GPU
Metric PRAUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


100:	learn: 0.4562806	test: 0.3985692	best: 0.3985692 (100)	total: 5.4s	remaining: 44m 28s
200:	learn: 0.4888981	test: 0.4214360	best: 0.4214929 (199)	total: 10.6s	remaining: 43m 48s
300:	learn: 0.5113080	test: 0.4302917	best: 0.4302917 (300)	total: 15.9s	remaining: 43m 45s
400:	learn: 0.5259517	test: 0.4351537	best: 0.4353422 (396)	total: 21.2s	remaining: 43m 41s
500:	learn: 0.5392405	test: 0.4401704	best: 0.4401704 (500)	total: 26.5s	remaining: 43m 42s
600:	learn: 0.5516400	test: 0.4436260	best: 0.4438179 (596)	total: 31.9s	remaining: 43m 40s
700:	learn: 0.5646785	test: 0.4440407	best: 0.4440652 (690)	total: 37.5s	remaining: 43m 54s
800:	learn: 0.5757685	test: 0.4462745	best: 0.4464061 (799)	total: 42.8s	remaining: 43m 46s
900:	learn: 0.5871766	test: 0.4474502	best: 0.4476353 (889)	total: 48.1s	remaining: 43m 42s
1000:	learn: 0.5990742	test: 0.4486024	best: 0.4486884 (986)	total: 53.4s	remaining: 43m 35s
1100:	learn: 0.6102335	test: 0.4486025	best: 0.4490004 (1080)	total: 58.8s	remai


0it [00:00, ?it/s][A
1it [00:02,  2.45s/it][A
2it [00:04,  2.43s/it][A
3it [00:07,  2.43s/it][A
4it [00:09,  2.31s/it][A
5it [00:11,  2.33s/it]
category='Животные': 100%|██████████| 7/7 [4:31:21<00:00, 2325.86s/it]


['oof_preds_cb_5folds_cat1lvl.joblib']

In [27]:
# # кб по категориям 2 уровня

# categories = train['unique_cat_2'].unique()
# categories = [cat.split('_')[0] for cat in categories if cat.split('_')[0] == cat.split('_')[1]]

# gkf = StratifiedGroupKFold(n_splits=5) # GroupKFold(n_splits=5)

# cat_oof_preds_cb = {}
# oof_metrics_cat = {}

# progress_bar = tqdm(categories)

# for category in progress_bar:
#     progress_bar.set_description(f"{category=}")

#     train_category = train[train['unique_cat_2'] == f'{category}_{category}']
#     test_category = test[test['unique_cat_2'] == f'{category}_{category}']

#     print(f'{len(train_category)=}, {len(test_category)=}')

#     if len(train_category) < 40000:
#         print('ээ ну маловато чет! соси')
#         continue
        
#     cat_oof_preds_cb[category] = np.zeros(len(train_category))

#     for fold, (train_idx, val_idx) in enumerate(gkf.split(train_category, train_category[target], groups=train_category['group_id'])):
#         print(f"fold {fold+1}...")
    
#         X_train_cat, X_val_cat = train_category.iloc[train_idx][features], train_category.iloc[val_idx][features]
#         y_train_cat, y_val_cat = train_category.iloc[train_idx][target], train_category.iloc[val_idx][target]

#         model_fold_cb = CatBoostClassifier(
#             iterations=50000,
#             # learning_rate=0.1,
#             cat_features=cat_features,
#             # auto_class_weights='Balanced',
#             eval_metric='PRAUC',
#             random_seed=3*42+fold,
#             early_stopping_rounds=1000,
#             task_type='GPU',
#             verbose=100
#         )

#         model_fold_cb.fit(X_train_cat, y_train_cat, eval_set=(X_val_cat, y_val_cat))
#         cat_oof_preds_cb[category][val_idx] = model_fold_cb.predict_proba(X_val_cat)[:, 1]

#         joblib.dump(model_fold_cb, f"{category}_catboost_model_fold_{fold+1}.joblib")

#         del model_fold_cb
#         gc.collect()

#     precision, recall, thresholds = precision_recall_curve(train_category[target], cat_oof_preds_cb[category])
#     cat_oof_prauc_cb = auc(recall, precision)
#     print(f'{cat_oof_prauc_cb=}')

#     oof_metrics_cat[category] = {'catboost': cat_oof_prauc_cb}

#     curr_cat_models = []
#     for fold in range(5):
#         model_cb = joblib.load(f"{category}_catboost_model_fold_{fold+1}.joblib")
#         curr_cat_models.append(model_cb)

#     ensemble_predictions_cb = predict_ensemble(test_category, curr_cat_models, features)
#     joblib.dump(ensemble_predictions_cb, f"{category}_ensemble_predictions_cb.joblib")

#     del ensemble_predictions_cb, model_cb, curr_cat_models
#     gc.collect()

# joblib.dump(cat_oof_preds_cb, "oof_preds_cb_5folds_cat2lvl.joblib")