In [1]:
from ozon_matching.kopatych_solution.utils import read_parquet, extract_category_levels
import polars as pl
from sklearn.model_selection import StratifiedKFold
from lightgbm import LGBMClassifier
from sklearn.metrics import roc_auc_score
from tqdm.auto import tqdm
import numpy as np

In [2]:
data = pl.concat(
    [
        read_parquet('../data/test_data.parquet', columns=['variantid', 'categories']),
        read_parquet('../data/train_data.parquet', columns=['variantid', 'categories']),
    ]
)
data = data.unique(subset=['variantid'])

[32m2023-05-23 11:31:17.916[0m | [1mINFO    [0m | [36mozon_matching.kopatych_solution.utils[0m:[36mread_parquet[0m:[36m54[0m - [1mRead Parquet from ../data/test_data.parquet[0m
[32m2023-05-23 11:31:17.927[0m | [1mINFO    [0m | [36mozon_matching.kopatych_solution.utils[0m:[36mread_parquet[0m:[36m56[0m - [1mN Rows - 35730, N Cols - 2[0m
[32m2023-05-23 11:31:17.928[0m | [1mINFO    [0m | [36mozon_matching.kopatych_solution.utils[0m:[36mread_parquet[0m:[36m54[0m - [1mRead Parquet from ../data/train_data.parquet[0m
[32m2023-05-23 11:31:18.047[0m | [1mINFO    [0m | [36mozon_matching.kopatych_solution.utils[0m:[36mread_parquet[0m:[36m56[0m - [1mN Rows - 457063, N Cols - 2[0m


In [3]:
data = extract_category_levels(data, [3, 4])
data = data.select(pl.col(['variantid', 'category_level_3', 'category_level_4']))

In [4]:
data = data.join(
    (
        data
        .select(pl.col('category_level_3'))
        .unique()
        .with_row_count(name='category_level_3_id')
    ),
    on=['category_level_3']
)

data = data.join(
    (
        data
        .select(pl.col('category_level_4'))
        .unique()
        .with_row_count(name='category_level_4_id')
    ),
    on=['category_level_4']
)

data = data.drop(['category_level_3', 'category_level_4'])

In [5]:
train = read_parquet('../data/train_pairs.parquet')
train = train.drop(['target']).with_columns([pl.lit(1).cast(pl.Int8).alias('is_train')])

test = read_parquet('../data/test_pairs_wo_target.parquet')
test = test.drop(['__index_level_0__']).with_columns([pl.lit(0).cast(pl.Int8).alias('is_train')])

[32m2023-05-23 11:31:18.912[0m | [1mINFO    [0m | [36mozon_matching.kopatych_solution.utils[0m:[36mread_parquet[0m:[36m54[0m - [1mRead Parquet from ../data/train_pairs.parquet[0m
[32m2023-05-23 11:31:18.922[0m | [1mINFO    [0m | [36mozon_matching.kopatych_solution.utils[0m:[36mread_parquet[0m:[36m56[0m - [1mN Rows - 306540, N Cols - 3[0m
[32m2023-05-23 11:31:18.923[0m | [1mINFO    [0m | [36mozon_matching.kopatych_solution.utils[0m:[36mread_parquet[0m:[36m54[0m - [1mRead Parquet from ../data/test_pairs_wo_target.parquet[0m
[32m2023-05-23 11:31:18.926[0m | [1mINFO    [0m | [36mozon_matching.kopatych_solution.utils[0m:[36mread_parquet[0m:[36m56[0m - [1mN Rows - 18084, N Cols - 3[0m


In [6]:
pairs = pl.concat([train, test])

In [7]:
pairs = pairs.join(
    data.rename(
        {
            'variantid': 'variantid1',
            'category_level_3_id': 'category_level_3_id_1',
            'category_level_4_id': 'category_level_4_id_1',
        }
    ),
    on=['variantid1']
).join(
    data.rename(
        {
            'variantid': 'variantid2',
            'category_level_3_id': 'category_level_3_id_2',
            'category_level_4_id': 'category_level_4_id_2',
        }
    ),
    on=['variantid2']
).drop(['variantid1', 'variantid2']).to_pandas()

In [10]:

cv1 = StratifiedKFold(n_splits=5, random_state=13, shuffle=True)
cv2 = StratifiedKFold(n_splits=3, random_state=13, shuffle=True)

X = pairs.drop(columns=['is_train']).values
y = pairs['is_train'].values

scores_cv = []

for cv_index, holdout_index in tqdm(cv1.split(X, y)):
    X_cv, X_holdout = X[cv_index], X[holdout_index]
    y_cv, y_holdout = y[cv_index], y[holdout_index]
    for train_index, valid_index in tqdm(cv2.split(X_cv, y_cv)):
        X_train, X_valid = X_cv[train_index], X_cv[valid_index]
        y_train, y_valid = y_cv[train_index], y_cv[valid_index]

    
        model = LGBMClassifier(
            n_estimators=1000,
        )
        model.fit(
            X=X_train,
            y=y_train,
            eval_set=[(X_valid, y_valid)],
            eval_metric=['auc'],
            categorical_feature=[0,1,2,3],
            early_stopping_rounds=50
        )
        scores_cv.append(roc_auc_score(y_holdout, model.predict_proba(X_holdout)[:, 1]))

0it [00:00, ?it/s]

0it [00:00, ?it/s]

New categorical_feature is [0, 1, 2, 3]


[1]	valid_0's auc: 0.78224	valid_0's binary_logloss: 0.207761
[2]	valid_0's auc: 0.784512	valid_0's binary_logloss: 0.202928
[3]	valid_0's auc: 0.785514	valid_0's binary_logloss: 0.199343
[4]	valid_0's auc: 0.785763	valid_0's binary_logloss: 0.196546
[5]	valid_0's auc: 0.785699	valid_0's binary_logloss: 0.194334
[6]	valid_0's auc: 0.785663	valid_0's binary_logloss: 0.192543
[7]	valid_0's auc: 0.785923	valid_0's binary_logloss: 0.191055
[8]	valid_0's auc: 0.786015	valid_0's binary_logloss: 0.189815
[9]	valid_0's auc: 0.786224	valid_0's binary_logloss: 0.188745
[10]	valid_0's auc: 0.786124	valid_0's binary_logloss: 0.18786
[11]	valid_0's auc: 0.786112	valid_0's binary_logloss: 0.187101
[12]	valid_0's auc: 0.786036	valid_0's binary_logloss: 0.18646
[13]	valid_0's auc: 0.786193	valid_0's binary_logloss: 0.185895
[14]	valid_0's auc: 0.786314	valid_0's binary_logloss: 0.185401
[15]	valid_0's auc: 0.786272	valid_0's binary_logloss: 0.184974
[16]	valid_0's auc: 0.786267	valid_0's binary_loglos

New categorical_feature is [0, 1, 2, 3]


[13]	valid_0's auc: 0.78512	valid_0's binary_logloss: 0.185719
[14]	valid_0's auc: 0.785223	valid_0's binary_logloss: 0.185202
[15]	valid_0's auc: 0.785292	valid_0's binary_logloss: 0.184769
[16]	valid_0's auc: 0.785408	valid_0's binary_logloss: 0.184395
[17]	valid_0's auc: 0.785401	valid_0's binary_logloss: 0.184067
[18]	valid_0's auc: 0.785469	valid_0's binary_logloss: 0.18379
[19]	valid_0's auc: 0.785492	valid_0's binary_logloss: 0.18354
[20]	valid_0's auc: 0.785452	valid_0's binary_logloss: 0.18333
[21]	valid_0's auc: 0.785424	valid_0's binary_logloss: 0.183137
[22]	valid_0's auc: 0.78546	valid_0's binary_logloss: 0.182969
[23]	valid_0's auc: 0.785287	valid_0's binary_logloss: 0.182826
[24]	valid_0's auc: 0.785399	valid_0's binary_logloss: 0.182687
[25]	valid_0's auc: 0.785535	valid_0's binary_logloss: 0.182562
[26]	valid_0's auc: 0.785626	valid_0's binary_logloss: 0.182452
[27]	valid_0's auc: 0.785612	valid_0's binary_logloss: 0.18237
[28]	valid_0's auc: 0.785657	valid_0's binary_

New categorical_feature is [0, 1, 2, 3]


[13]	valid_0's auc: 0.777638	valid_0's binary_logloss: 0.187718
[14]	valid_0's auc: 0.777787	valid_0's binary_logloss: 0.187274
[15]	valid_0's auc: 0.777889	valid_0's binary_logloss: 0.1869
[16]	valid_0's auc: 0.777921	valid_0's binary_logloss: 0.186575
[17]	valid_0's auc: 0.777916	valid_0's binary_logloss: 0.186294
[18]	valid_0's auc: 0.777987	valid_0's binary_logloss: 0.186037
[19]	valid_0's auc: 0.778081	valid_0's binary_logloss: 0.185822
[20]	valid_0's auc: 0.778054	valid_0's binary_logloss: 0.185644
[21]	valid_0's auc: 0.778024	valid_0's binary_logloss: 0.185478
[22]	valid_0's auc: 0.777923	valid_0's binary_logloss: 0.18533
[23]	valid_0's auc: 0.777955	valid_0's binary_logloss: 0.185206
[24]	valid_0's auc: 0.778075	valid_0's binary_logloss: 0.185102
[25]	valid_0's auc: 0.778121	valid_0's binary_logloss: 0.185002
[26]	valid_0's auc: 0.778194	valid_0's binary_logloss: 0.184892
[27]	valid_0's auc: 0.778248	valid_0's binary_logloss: 0.184797
[28]	valid_0's auc: 0.778334	valid_0's bina

0it [00:00, ?it/s]

[1]	valid_0's auc: 0.780484	valid_0's binary_logloss: 0.207573
[2]	valid_0's auc: 0.782468	valid_0's binary_logloss: 0.202659
[3]	valid_0's auc: 0.782731	valid_0's binary_logloss: 0.199024
[4]	valid_0's auc: 0.782908	valid_0's binary_logloss: 0.196245
[5]	valid_0's auc: 0.78271	valid_0's binary_logloss: 0.194057
[6]	valid_0's auc: 0.7829	valid_0's binary_logloss: 0.192277
[7]	valid_0's auc: 0.782951	valid_0's binary_logloss: 0.1908
[8]	valid_0's auc: 0.783303	valid_0's binary_logloss: 0.189543
[9]	valid_0's auc: 0.783399	valid_0's binary_logloss: 0.18849
[10]	valid_0's auc: 0.783491	valid_0's binary_logloss: 0.18761


New categorical_feature is [0, 1, 2, 3]


[11]	valid_0's auc: 0.783536	valid_0's binary_logloss: 0.186864
[12]	valid_0's auc: 0.783623	valid_0's binary_logloss: 0.186242
[13]	valid_0's auc: 0.783534	valid_0's binary_logloss: 0.185706
[14]	valid_0's auc: 0.783607	valid_0's binary_logloss: 0.185221
[15]	valid_0's auc: 0.783506	valid_0's binary_logloss: 0.184833
[16]	valid_0's auc: 0.783446	valid_0's binary_logloss: 0.184509
[17]	valid_0's auc: 0.783475	valid_0's binary_logloss: 0.184219
[18]	valid_0's auc: 0.783555	valid_0's binary_logloss: 0.183948
[19]	valid_0's auc: 0.783518	valid_0's binary_logloss: 0.183731
[20]	valid_0's auc: 0.783516	valid_0's binary_logloss: 0.183532
[21]	valid_0's auc: 0.78355	valid_0's binary_logloss: 0.183374
[22]	valid_0's auc: 0.783501	valid_0's binary_logloss: 0.183223
[23]	valid_0's auc: 0.78356	valid_0's binary_logloss: 0.183102
[24]	valid_0's auc: 0.783657	valid_0's binary_logloss: 0.182988
[25]	valid_0's auc: 0.783715	valid_0's binary_logloss: 0.1829
[26]	valid_0's auc: 0.783585	valid_0's binar

New categorical_feature is [0, 1, 2, 3]


[15]	valid_0's auc: 0.783671	valid_0's binary_logloss: 0.185341
[16]	valid_0's auc: 0.783672	valid_0's binary_logloss: 0.184985
[17]	valid_0's auc: 0.783707	valid_0's binary_logloss: 0.184684
[18]	valid_0's auc: 0.783738	valid_0's binary_logloss: 0.184428
[19]	valid_0's auc: 0.78383	valid_0's binary_logloss: 0.184196
[20]	valid_0's auc: 0.783843	valid_0's binary_logloss: 0.184004
[21]	valid_0's auc: 0.783917	valid_0's binary_logloss: 0.183824
[22]	valid_0's auc: 0.783859	valid_0's binary_logloss: 0.183667
[23]	valid_0's auc: 0.784004	valid_0's binary_logloss: 0.183518
[24]	valid_0's auc: 0.784106	valid_0's binary_logloss: 0.183386
[25]	valid_0's auc: 0.784126	valid_0's binary_logloss: 0.183266
[26]	valid_0's auc: 0.784277	valid_0's binary_logloss: 0.183159
[27]	valid_0's auc: 0.784212	valid_0's binary_logloss: 0.183081
[28]	valid_0's auc: 0.78429	valid_0's binary_logloss: 0.182986
[29]	valid_0's auc: 0.784296	valid_0's binary_logloss: 0.182918
[30]	valid_0's auc: 0.784218	valid_0's bin

New categorical_feature is [0, 1, 2, 3]


[19]	valid_0's auc: 0.78733	valid_0's binary_logloss: 0.183673
[20]	valid_0's auc: 0.787414	valid_0's binary_logloss: 0.183447
[21]	valid_0's auc: 0.787469	valid_0's binary_logloss: 0.183248
[22]	valid_0's auc: 0.787499	valid_0's binary_logloss: 0.183062
[23]	valid_0's auc: 0.787563	valid_0's binary_logloss: 0.182904
[24]	valid_0's auc: 0.787548	valid_0's binary_logloss: 0.182768
[25]	valid_0's auc: 0.787477	valid_0's binary_logloss: 0.182643
[26]	valid_0's auc: 0.787531	valid_0's binary_logloss: 0.182533
[27]	valid_0's auc: 0.787536	valid_0's binary_logloss: 0.182435
[28]	valid_0's auc: 0.78753	valid_0's binary_logloss: 0.182354
[29]	valid_0's auc: 0.787495	valid_0's binary_logloss: 0.182269
[30]	valid_0's auc: 0.787483	valid_0's binary_logloss: 0.182199
[31]	valid_0's auc: 0.787469	valid_0's binary_logloss: 0.182119
[32]	valid_0's auc: 0.787503	valid_0's binary_logloss: 0.182053
[33]	valid_0's auc: 0.787524	valid_0's binary_logloss: 0.182007
[34]	valid_0's auc: 0.787525	valid_0's bin

0it [00:00, ?it/s]

New categorical_feature is [0, 1, 2, 3]


[1]	valid_0's auc: 0.775974	valid_0's binary_logloss: 0.207866
[2]	valid_0's auc: 0.776675	valid_0's binary_logloss: 0.203178
[3]	valid_0's auc: 0.777452	valid_0's binary_logloss: 0.199702
[4]	valid_0's auc: 0.777442	valid_0's binary_logloss: 0.197002
[5]	valid_0's auc: 0.77754	valid_0's binary_logloss: 0.194889
[6]	valid_0's auc: 0.77757	valid_0's binary_logloss: 0.193172
[7]	valid_0's auc: 0.777809	valid_0's binary_logloss: 0.191761
[8]	valid_0's auc: 0.777958	valid_0's binary_logloss: 0.190558
[9]	valid_0's auc: 0.777952	valid_0's binary_logloss: 0.189559
[10]	valid_0's auc: 0.778082	valid_0's binary_logloss: 0.188734
[11]	valid_0's auc: 0.778143	valid_0's binary_logloss: 0.188018
[12]	valid_0's auc: 0.778071	valid_0's binary_logloss: 0.187427
[13]	valid_0's auc: 0.77829	valid_0's binary_logloss: 0.186919
[14]	valid_0's auc: 0.778349	valid_0's binary_logloss: 0.186474
[15]	valid_0's auc: 0.778203	valid_0's binary_logloss: 0.186092
[16]	valid_0's auc: 0.778282	valid_0's binary_loglos

New categorical_feature is [0, 1, 2, 3]


[20]	valid_0's auc: 0.78135	valid_0's binary_logloss: 0.184158
[21]	valid_0's auc: 0.781409	valid_0's binary_logloss: 0.183978
[22]	valid_0's auc: 0.781386	valid_0's binary_logloss: 0.18382
[23]	valid_0's auc: 0.781413	valid_0's binary_logloss: 0.18367
[24]	valid_0's auc: 0.781393	valid_0's binary_logloss: 0.183543
[25]	valid_0's auc: 0.781411	valid_0's binary_logloss: 0.183428
[26]	valid_0's auc: 0.781444	valid_0's binary_logloss: 0.183334
[27]	valid_0's auc: 0.781494	valid_0's binary_logloss: 0.183249
[28]	valid_0's auc: 0.781445	valid_0's binary_logloss: 0.183188
[29]	valid_0's auc: 0.781386	valid_0's binary_logloss: 0.183125
[30]	valid_0's auc: 0.781365	valid_0's binary_logloss: 0.183075
[31]	valid_0's auc: 0.781267	valid_0's binary_logloss: 0.183012
[32]	valid_0's auc: 0.781281	valid_0's binary_logloss: 0.182976
[33]	valid_0's auc: 0.781226	valid_0's binary_logloss: 0.182943
[34]	valid_0's auc: 0.781322	valid_0's binary_logloss: 0.182894
[35]	valid_0's auc: 0.781347	valid_0's bina

New categorical_feature is [0, 1, 2, 3]


[14]	valid_0's auc: 0.784526	valid_0's binary_logloss: 0.185375
[15]	valid_0's auc: 0.784494	valid_0's binary_logloss: 0.184951
[16]	valid_0's auc: 0.784409	valid_0's binary_logloss: 0.184577
[17]	valid_0's auc: 0.784319	valid_0's binary_logloss: 0.184254
[18]	valid_0's auc: 0.784365	valid_0's binary_logloss: 0.183979
[19]	valid_0's auc: 0.784442	valid_0's binary_logloss: 0.183745
[20]	valid_0's auc: 0.784493	valid_0's binary_logloss: 0.183528
[21]	valid_0's auc: 0.784497	valid_0's binary_logloss: 0.18335
[22]	valid_0's auc: 0.784542	valid_0's binary_logloss: 0.183169
[23]	valid_0's auc: 0.784531	valid_0's binary_logloss: 0.183018
[24]	valid_0's auc: 0.784503	valid_0's binary_logloss: 0.182887
[25]	valid_0's auc: 0.784376	valid_0's binary_logloss: 0.182788
[26]	valid_0's auc: 0.784347	valid_0's binary_logloss: 0.182698
[27]	valid_0's auc: 0.78435	valid_0's binary_logloss: 0.182608
[28]	valid_0's auc: 0.784377	valid_0's binary_logloss: 0.182532
[29]	valid_0's auc: 0.784306	valid_0's bin

0it [00:00, ?it/s]

New categorical_feature is [0, 1, 2, 3]


[1]	valid_0's auc: 0.777737	valid_0's binary_logloss: 0.207633
[2]	valid_0's auc: 0.778909	valid_0's binary_logloss: 0.202878
[3]	valid_0's auc: 0.779388	valid_0's binary_logloss: 0.199393
[4]	valid_0's auc: 0.780235	valid_0's binary_logloss: 0.19669
[5]	valid_0's auc: 0.780284	valid_0's binary_logloss: 0.194563
[6]	valid_0's auc: 0.780469	valid_0's binary_logloss: 0.192837
[7]	valid_0's auc: 0.780365	valid_0's binary_logloss: 0.19144
[8]	valid_0's auc: 0.780687	valid_0's binary_logloss: 0.190258
[9]	valid_0's auc: 0.780754	valid_0's binary_logloss: 0.189287
[10]	valid_0's auc: 0.780792	valid_0's binary_logloss: 0.188449
[11]	valid_0's auc: 0.781216	valid_0's binary_logloss: 0.187722
[12]	valid_0's auc: 0.781364	valid_0's binary_logloss: 0.187101
[13]	valid_0's auc: 0.781328	valid_0's binary_logloss: 0.186575
[14]	valid_0's auc: 0.781263	valid_0's binary_logloss: 0.186118
[15]	valid_0's auc: 0.781276	valid_0's binary_logloss: 0.185718
[16]	valid_0's auc: 0.781248	valid_0's binary_loglo

New categorical_feature is [0, 1, 2, 3]


[5]	valid_0's auc: 0.785332	valid_0's binary_logloss: 0.194073
[6]	valid_0's auc: 0.785688	valid_0's binary_logloss: 0.192291
[7]	valid_0's auc: 0.785895	valid_0's binary_logloss: 0.19081
[8]	valid_0's auc: 0.78607	valid_0's binary_logloss: 0.189584
[9]	valid_0's auc: 0.786264	valid_0's binary_logloss: 0.188539
[10]	valid_0's auc: 0.786197	valid_0's binary_logloss: 0.187654
[11]	valid_0's auc: 0.786188	valid_0's binary_logloss: 0.186904
[12]	valid_0's auc: 0.786182	valid_0's binary_logloss: 0.186247
[13]	valid_0's auc: 0.786246	valid_0's binary_logloss: 0.18569
[14]	valid_0's auc: 0.786345	valid_0's binary_logloss: 0.185206
[15]	valid_0's auc: 0.786369	valid_0's binary_logloss: 0.184789
[16]	valid_0's auc: 0.78637	valid_0's binary_logloss: 0.184419
[17]	valid_0's auc: 0.78641	valid_0's binary_logloss: 0.184105
[18]	valid_0's auc: 0.786552	valid_0's binary_logloss: 0.183833
[19]	valid_0's auc: 0.786688	valid_0's binary_logloss: 0.183591
[20]	valid_0's auc: 0.786609	valid_0's binary_logl

New categorical_feature is [0, 1, 2, 3]


[15]	valid_0's auc: 0.787599	valid_0's binary_logloss: 0.184122
[16]	valid_0's auc: 0.787646	valid_0's binary_logloss: 0.183746
[17]	valid_0's auc: 0.787571	valid_0's binary_logloss: 0.183425
[18]	valid_0's auc: 0.787508	valid_0's binary_logloss: 0.183132
[19]	valid_0's auc: 0.787698	valid_0's binary_logloss: 0.182875
[20]	valid_0's auc: 0.787718	valid_0's binary_logloss: 0.182653
[21]	valid_0's auc: 0.787702	valid_0's binary_logloss: 0.182467
[22]	valid_0's auc: 0.787654	valid_0's binary_logloss: 0.182293
[23]	valid_0's auc: 0.787625	valid_0's binary_logloss: 0.182156
[24]	valid_0's auc: 0.787538	valid_0's binary_logloss: 0.182034
[25]	valid_0's auc: 0.78758	valid_0's binary_logloss: 0.181928
[26]	valid_0's auc: 0.787569	valid_0's binary_logloss: 0.181816
[27]	valid_0's auc: 0.787615	valid_0's binary_logloss: 0.181732
[28]	valid_0's auc: 0.787625	valid_0's binary_logloss: 0.18165
[29]	valid_0's auc: 0.787586	valid_0's binary_logloss: 0.181576
[30]	valid_0's auc: 0.787511	valid_0's bin

0it [00:00, ?it/s]

New categorical_feature is [0, 1, 2, 3]


[1]	valid_0's auc: 0.779909	valid_0's binary_logloss: 0.207739
[2]	valid_0's auc: 0.781427	valid_0's binary_logloss: 0.20297
[3]	valid_0's auc: 0.781571	valid_0's binary_logloss: 0.199475
[4]	valid_0's auc: 0.78189	valid_0's binary_logloss: 0.196778
[5]	valid_0's auc: 0.782034	valid_0's binary_logloss: 0.19462
[6]	valid_0's auc: 0.782194	valid_0's binary_logloss: 0.192852
[7]	valid_0's auc: 0.782631	valid_0's binary_logloss: 0.191416
[8]	valid_0's auc: 0.782874	valid_0's binary_logloss: 0.190207
[9]	valid_0's auc: 0.782955	valid_0's binary_logloss: 0.18918
[10]	valid_0's auc: 0.783124	valid_0's binary_logloss: 0.188323
[11]	valid_0's auc: 0.783271	valid_0's binary_logloss: 0.187593
[12]	valid_0's auc: 0.78346	valid_0's binary_logloss: 0.186929
[13]	valid_0's auc: 0.78348	valid_0's binary_logloss: 0.186374
[14]	valid_0's auc: 0.783534	valid_0's binary_logloss: 0.185909
[15]	valid_0's auc: 0.783583	valid_0's binary_logloss: 0.185499
[16]	valid_0's auc: 0.783677	valid_0's binary_logloss: 

New categorical_feature is [0, 1, 2, 3]


[14]	valid_0's auc: 0.790437	valid_0's binary_logloss: 0.184748
[15]	valid_0's auc: 0.79056	valid_0's binary_logloss: 0.184322
[16]	valid_0's auc: 0.790621	valid_0's binary_logloss: 0.183935
[17]	valid_0's auc: 0.790733	valid_0's binary_logloss: 0.183609
[18]	valid_0's auc: 0.790655	valid_0's binary_logloss: 0.183326
[19]	valid_0's auc: 0.790593	valid_0's binary_logloss: 0.183068
[20]	valid_0's auc: 0.790746	valid_0's binary_logloss: 0.18285
[21]	valid_0's auc: 0.790709	valid_0's binary_logloss: 0.182656
[22]	valid_0's auc: 0.790685	valid_0's binary_logloss: 0.182491
[23]	valid_0's auc: 0.790647	valid_0's binary_logloss: 0.18234
[24]	valid_0's auc: 0.79064	valid_0's binary_logloss: 0.18221
[25]	valid_0's auc: 0.790696	valid_0's binary_logloss: 0.182099
[26]	valid_0's auc: 0.790681	valid_0's binary_logloss: 0.181997
[27]	valid_0's auc: 0.790503	valid_0's binary_logloss: 0.181915
[28]	valid_0's auc: 0.79045	valid_0's binary_logloss: 0.181844
[29]	valid_0's auc: 0.790311	valid_0's binary_

New categorical_feature is [0, 1, 2, 3]


[1]	valid_0's auc: 0.777747	valid_0's binary_logloss: 0.207712
[2]	valid_0's auc: 0.778499	valid_0's binary_logloss: 0.203008
[3]	valid_0's auc: 0.7789	valid_0's binary_logloss: 0.199572
[4]	valid_0's auc: 0.778905	valid_0's binary_logloss: 0.196929
[5]	valid_0's auc: 0.778633	valid_0's binary_logloss: 0.194854
[6]	valid_0's auc: 0.778653	valid_0's binary_logloss: 0.193145
[7]	valid_0's auc: 0.778516	valid_0's binary_logloss: 0.191761
[8]	valid_0's auc: 0.779283	valid_0's binary_logloss: 0.190586
[9]	valid_0's auc: 0.779543	valid_0's binary_logloss: 0.189611
[10]	valid_0's auc: 0.779576	valid_0's binary_logloss: 0.188787
[11]	valid_0's auc: 0.779718	valid_0's binary_logloss: 0.188095
[12]	valid_0's auc: 0.779651	valid_0's binary_logloss: 0.187483
[13]	valid_0's auc: 0.779767	valid_0's binary_logloss: 0.186954
[14]	valid_0's auc: 0.779805	valid_0's binary_logloss: 0.186512
[15]	valid_0's auc: 0.779715	valid_0's binary_logloss: 0.186117
[16]	valid_0's auc: 0.779776	valid_0's binary_loglo

In [11]:
scores_cv

[0.7876221624506864,
 0.7867872373773261,
 0.786519549789161,
 0.7797414551876096,
 0.7806722783473263,
 0.7792948011300385,
 0.7913433107929201,
 0.7907979311537467,
 0.7912293564211352,
 0.777790702181883,
 0.7783926835859292,
 0.7787539490909074,
 0.7825834469876747,
 0.7820520037807075,
 0.7832959901507028]

In [13]:
np.mean(scores_cv)

0.7837917905618502