In [1]:
import scipy
import string
import pandas as pd
import numpy as np

In [2]:
df_train = pd.read_csv('train.tsv', sep='\t', index_col=0)
df_test = pd.read_csv('test_nolabel.tsv', sep='\t', index_col=0)

In [3]:
dates = sorted(set(df_train['date_created']) | set(df_test['date_created']))
date2week = dict(zip(dates, np.arange(365) % 7))

df_train['week'] = df_train['date_created'].apply(lambda x: date2week[x])
df_test['week'] = df_test['date_created'].apply(lambda x: date2week[x])

In [4]:
def get_prop_text(data):
    prop_col = []
    for item_prop in data['properties'].values:
        prop_text = []
        for text in item_prop.split('}')[:-1]:
            props = text[text.find('{'):].split(',')
            name = props[1][props[1].find(': '):].translate(str.maketrans('','',string.punctuation))
            val = props[2][props[2].find(': '):].translate(str.maketrans('','',string.punctuation))
            if 'Нет' not in val.lower():
#                 prop_text.append(name)
                if 'Есть' not in val.lower():
                    prop_text.append(val)
        prop_col.append(' '.join(prop_text))
    return prop_col

df_train['prop_text'] = get_prop_text(df_train)
df_test['prop_text'] = get_prop_text(df_test)

In [5]:
X = df_train.drop(['sold_fast', 'properties', 'product_id', 'owner_id'], axis=1)
y = df_train['sold_fast']

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.5, random_state=42, shuffle=False)

# Preprocessing

In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import Normalizer, OneHotEncoder

In [8]:
name_text = X_train['name_text']
vectorizer_name_text = TfidfVectorizer(max_features=200, decode_error='ignore')
vectorizer_name_text.fit(name_text)

desc_text = X_train['desc_text']
vectorizer_desc_text = TfidfVectorizer(max_features=200, decode_error='ignore')
vectorizer_desc_text.fit(desc_text)
             
prop_text = X_train['prop_text']
vectorizer_prop_text = TfidfVectorizer(max_features=200, decode_error='ignore')
vectorizer_prop_text.fit(desc_text)

TfidfVectorizer(analyzer='word', binary=False, decode_error='ignore',
        dtype=<class 'numpy.float64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=200, min_df=1,
        ngram_range=(1, 1), norm='l2', preprocessor=None, smooth_idf=True,
        stop_words=None, strip_accents=None, sublinear_tf=False,
        token_pattern='(?u)\\b\\w\\w+\\b', tokenizer=None, use_idf=True,
        vocabulary=None)

In [9]:
onehot_preprocess = ColumnTransformer([("dummy_col", OneHotEncoder(
    handle_unknown='ignore',
    sparse=False,
    categories=[
        sorted(set(X_train['region']) & set(df_test['region'])),
        sorted(set(X_train['category_id']) & set(df_test['category_id'])),
        sorted(set(X_train['subcategory_id']) & set(df_test['subcategory_id'])),
        sorted(set(X_train['sold_mode']) & set(df_test['sold_mode'])),
        sorted(set(X_train['product_type']) & set(df_test['product_type']))
    ]), ['region', 'category_id', 'subcategory_id', 'sold_mode', 'product_type'])])

onehot_preprocess.fit(X_train)

ColumnTransformer(n_jobs=None, remainder='drop', sparse_threshold=0.3,
         transformer_weights=None,
         transformers=[('dummy_col', OneHotEncoder(categorical_features=None,
       categories=[['Адыгея', 'Алтайский край', 'Амурская область', 'Архангельская область', 'Астраханская область', 'Башкортостан', 'Белгородская область', 'Брянская область', 'Владимирская область', 'Волгоградская область', 'Воло...lues=None, sparse=False), ['region', 'category_id', 'subcategory_id', 'sold_mode', 'product_type'])])

In [10]:
def preprocessing(data):
    return np.concatenate([
        data[['week', 'category_id', 'lat', 'long', 'price', 'product_type',
                  'sold_mode', 'subcategory_id', 'img_num']].values,
        data[['payment_available', 'delivery_available']].values * 1.,
        onehot_preprocess.transform(data),
        vectorizer_name_text.transform(data['name_text']).toarray(),
        vectorizer_desc_text.transform(data['desc_text']).toarray(),
        vectorizer_desc_text.transform(data['prop_text']).toarray()
    ], axis=1)

# Model

In [11]:
from catboost import CatBoostClassifier
from sklearn.metrics import roc_auc_score

In [12]:
X_train = preprocessing(X_train)
X_val = preprocessing(X_val)
X_train.shape, X_val.shape

((175640, 990), (175641, 990))

In [13]:
cb = CatBoostClassifier(learning_rate=0.1, iterations=2000, depth=4)
cb.fit(X_train, y_train)

0:	learn: 0.6624654	total: 238ms	remaining: 7m 55s
1:	learn: 0.6379506	total: 334ms	remaining: 5m 33s
2:	learn: 0.6182442	total: 434ms	remaining: 4m 49s
3:	learn: 0.6020743	total: 565ms	remaining: 4m 41s
4:	learn: 0.5889718	total: 665ms	remaining: 4m 25s
5:	learn: 0.5789558	total: 770ms	remaining: 4m 15s
6:	learn: 0.5706205	total: 885ms	remaining: 4m 11s
7:	learn: 0.5639686	total: 999ms	remaining: 4m 8s
8:	learn: 0.5583274	total: 1.11s	remaining: 4m 5s
9:	learn: 0.5535298	total: 1.24s	remaining: 4m 5s
10:	learn: 0.5500536	total: 1.33s	remaining: 4m 1s
11:	learn: 0.5468826	total: 1.44s	remaining: 3m 58s
12:	learn: 0.5443482	total: 1.55s	remaining: 3m 57s
13:	learn: 0.5420949	total: 1.67s	remaining: 3m 57s
14:	learn: 0.5403234	total: 1.77s	remaining: 3m 55s
15:	learn: 0.5387592	total: 1.89s	remaining: 3m 54s
16:	learn: 0.5373334	total: 2s	remaining: 3m 53s
17:	learn: 0.5362803	total: 2.13s	remaining: 3m 54s
18:	learn: 0.5353527	total: 2.24s	remaining: 3m 53s
19:	learn: 0.5345621	total: 2

161:	learn: 0.5238139	total: 19.9s	remaining: 3m 45s
162:	learn: 0.5237831	total: 20s	remaining: 3m 45s
163:	learn: 0.5237579	total: 20.1s	remaining: 3m 45s
164:	learn: 0.5237009	total: 20.3s	remaining: 3m 45s
165:	learn: 0.5236713	total: 20.4s	remaining: 3m 45s
166:	learn: 0.5236533	total: 20.5s	remaining: 3m 45s
167:	learn: 0.5236355	total: 20.7s	remaining: 3m 45s
168:	learn: 0.5236013	total: 20.8s	remaining: 3m 45s
169:	learn: 0.5235836	total: 20.9s	remaining: 3m 45s
170:	learn: 0.5235550	total: 21s	remaining: 3m 45s
171:	learn: 0.5235301	total: 21.2s	remaining: 3m 45s
172:	learn: 0.5234965	total: 21.3s	remaining: 3m 45s
173:	learn: 0.5234639	total: 21.5s	remaining: 3m 45s
174:	learn: 0.5234352	total: 21.6s	remaining: 3m 45s
175:	learn: 0.5234012	total: 21.7s	remaining: 3m 45s
176:	learn: 0.5233735	total: 21.9s	remaining: 3m 45s
177:	learn: 0.5233541	total: 22s	remaining: 3m 45s
178:	learn: 0.5233001	total: 22.2s	remaining: 3m 45s
179:	learn: 0.5232696	total: 22.3s	remaining: 3m 45s

318:	learn: 0.5203554	total: 39.4s	remaining: 3m 27s
319:	learn: 0.5203353	total: 39.5s	remaining: 3m 27s
320:	learn: 0.5203155	total: 39.6s	remaining: 3m 27s
321:	learn: 0.5202887	total: 39.8s	remaining: 3m 27s
322:	learn: 0.5202752	total: 39.9s	remaining: 3m 27s
323:	learn: 0.5202642	total: 40s	remaining: 3m 26s
324:	learn: 0.5202492	total: 40.1s	remaining: 3m 26s
325:	learn: 0.5202155	total: 40.2s	remaining: 3m 26s
326:	learn: 0.5201878	total: 40.3s	remaining: 3m 26s
327:	learn: 0.5201635	total: 40.4s	remaining: 3m 26s
328:	learn: 0.5201432	total: 40.6s	remaining: 3m 25s
329:	learn: 0.5201246	total: 40.7s	remaining: 3m 25s
330:	learn: 0.5201085	total: 40.8s	remaining: 3m 25s
331:	learn: 0.5200835	total: 40.9s	remaining: 3m 25s
332:	learn: 0.5200686	total: 41s	remaining: 3m 25s
333:	learn: 0.5200464	total: 41.1s	remaining: 3m 24s
334:	learn: 0.5200158	total: 41.2s	remaining: 3m 24s
335:	learn: 0.5199974	total: 41.3s	remaining: 3m 24s
336:	learn: 0.5199851	total: 41.4s	remaining: 3m 2

475:	learn: 0.5177162	total: 58.8s	remaining: 3m 8s
476:	learn: 0.5177044	total: 58.9s	remaining: 3m 8s
477:	learn: 0.5176861	total: 59s	remaining: 3m 8s
478:	learn: 0.5176611	total: 59.2s	remaining: 3m 7s
479:	learn: 0.5176418	total: 59.3s	remaining: 3m 7s
480:	learn: 0.5176334	total: 59.4s	remaining: 3m 7s
481:	learn: 0.5176205	total: 59.5s	remaining: 3m 7s
482:	learn: 0.5176094	total: 59.6s	remaining: 3m 7s
483:	learn: 0.5175821	total: 59.8s	remaining: 3m 7s
484:	learn: 0.5175558	total: 59.9s	remaining: 3m 7s
485:	learn: 0.5175404	total: 60s	remaining: 3m 6s
486:	learn: 0.5175301	total: 1m	remaining: 3m 6s
487:	learn: 0.5175102	total: 1m	remaining: 3m 6s
488:	learn: 0.5174866	total: 1m	remaining: 3m 6s
489:	learn: 0.5174710	total: 1m	remaining: 3m 6s
490:	learn: 0.5174529	total: 1m	remaining: 3m 6s
491:	learn: 0.5174422	total: 1m	remaining: 3m 5s
492:	learn: 0.5174246	total: 1m	remaining: 3m 5s
493:	learn: 0.5174075	total: 1m	remaining: 3m 5s
494:	learn: 0.5173947	total: 1m	remainin

631:	learn: 0.5153850	total: 1m 17s	remaining: 2m 47s
632:	learn: 0.5153689	total: 1m 17s	remaining: 2m 47s
633:	learn: 0.5153521	total: 1m 17s	remaining: 2m 47s
634:	learn: 0.5153452	total: 1m 17s	remaining: 2m 46s
635:	learn: 0.5153368	total: 1m 17s	remaining: 2m 46s
636:	learn: 0.5153208	total: 1m 17s	remaining: 2m 46s
637:	learn: 0.5153102	total: 1m 17s	remaining: 2m 46s
638:	learn: 0.5153016	total: 1m 18s	remaining: 2m 46s
639:	learn: 0.5152842	total: 1m 18s	remaining: 2m 46s
640:	learn: 0.5152671	total: 1m 18s	remaining: 2m 46s
641:	learn: 0.5152580	total: 1m 18s	remaining: 2m 45s
642:	learn: 0.5152396	total: 1m 18s	remaining: 2m 45s
643:	learn: 0.5152229	total: 1m 18s	remaining: 2m 45s
644:	learn: 0.5152152	total: 1m 18s	remaining: 2m 45s
645:	learn: 0.5151972	total: 1m 18s	remaining: 2m 45s
646:	learn: 0.5151762	total: 1m 19s	remaining: 2m 45s
647:	learn: 0.5151651	total: 1m 19s	remaining: 2m 45s
648:	learn: 0.5151538	total: 1m 19s	remaining: 2m 45s
649:	learn: 0.5151418	total:

784:	learn: 0.5133258	total: 1m 35s	remaining: 2m 28s
785:	learn: 0.5133121	total: 1m 36s	remaining: 2m 28s
786:	learn: 0.5133009	total: 1m 36s	remaining: 2m 28s
787:	learn: 0.5132916	total: 1m 36s	remaining: 2m 28s
788:	learn: 0.5132728	total: 1m 36s	remaining: 2m 27s
789:	learn: 0.5132581	total: 1m 36s	remaining: 2m 27s
790:	learn: 0.5132409	total: 1m 36s	remaining: 2m 27s
791:	learn: 0.5132241	total: 1m 36s	remaining: 2m 27s
792:	learn: 0.5132205	total: 1m 36s	remaining: 2m 27s
793:	learn: 0.5132106	total: 1m 36s	remaining: 2m 27s
794:	learn: 0.5131974	total: 1m 36s	remaining: 2m 26s
795:	learn: 0.5131904	total: 1m 37s	remaining: 2m 26s
796:	learn: 0.5131814	total: 1m 37s	remaining: 2m 26s
797:	learn: 0.5131688	total: 1m 37s	remaining: 2m 26s
798:	learn: 0.5131641	total: 1m 37s	remaining: 2m 26s
799:	learn: 0.5131543	total: 1m 37s	remaining: 2m 26s
800:	learn: 0.5131429	total: 1m 37s	remaining: 2m 26s
801:	learn: 0.5131276	total: 1m 37s	remaining: 2m 26s
802:	learn: 0.5131173	total:

937:	learn: 0.5114083	total: 1m 54s	remaining: 2m 9s
938:	learn: 0.5113909	total: 1m 54s	remaining: 2m 9s
939:	learn: 0.5113727	total: 1m 54s	remaining: 2m 9s
940:	learn: 0.5113555	total: 1m 55s	remaining: 2m 9s
941:	learn: 0.5113478	total: 1m 55s	remaining: 2m 9s
942:	learn: 0.5113339	total: 1m 55s	remaining: 2m 9s
943:	learn: 0.5113277	total: 1m 55s	remaining: 2m 9s
944:	learn: 0.5113150	total: 1m 55s	remaining: 2m 8s
945:	learn: 0.5113006	total: 1m 55s	remaining: 2m 8s
946:	learn: 0.5112914	total: 1m 55s	remaining: 2m 8s
947:	learn: 0.5112815	total: 1m 55s	remaining: 2m 8s
948:	learn: 0.5112707	total: 1m 56s	remaining: 2m 8s
949:	learn: 0.5112605	total: 1m 56s	remaining: 2m 8s
950:	learn: 0.5112485	total: 1m 56s	remaining: 2m 8s
951:	learn: 0.5112387	total: 1m 56s	remaining: 2m 8s
952:	learn: 0.5112275	total: 1m 56s	remaining: 2m 8s
953:	learn: 0.5112154	total: 1m 56s	remaining: 2m 7s
954:	learn: 0.5112001	total: 1m 56s	remaining: 2m 7s
955:	learn: 0.5111873	total: 1m 56s	remaining:

1091:	learn: 0.5095182	total: 2m 13s	remaining: 1m 51s
1092:	learn: 0.5095032	total: 2m 13s	remaining: 1m 51s
1093:	learn: 0.5094879	total: 2m 13s	remaining: 1m 50s
1094:	learn: 0.5094795	total: 2m 14s	remaining: 1m 50s
1095:	learn: 0.5094655	total: 2m 14s	remaining: 1m 50s
1096:	learn: 0.5094552	total: 2m 14s	remaining: 1m 50s
1097:	learn: 0.5094477	total: 2m 14s	remaining: 1m 50s
1098:	learn: 0.5094362	total: 2m 14s	remaining: 1m 50s
1099:	learn: 0.5094289	total: 2m 14s	remaining: 1m 50s
1100:	learn: 0.5094142	total: 2m 14s	remaining: 1m 50s
1101:	learn: 0.5094070	total: 2m 14s	remaining: 1m 49s
1102:	learn: 0.5093979	total: 2m 14s	remaining: 1m 49s
1103:	learn: 0.5093917	total: 2m 15s	remaining: 1m 49s
1104:	learn: 0.5093729	total: 2m 15s	remaining: 1m 49s
1105:	learn: 0.5093591	total: 2m 15s	remaining: 1m 49s
1106:	learn: 0.5093449	total: 2m 15s	remaining: 1m 49s
1107:	learn: 0.5093341	total: 2m 15s	remaining: 1m 49s
1108:	learn: 0.5093230	total: 2m 15s	remaining: 1m 48s
1109:	lear

1240:	learn: 0.5076860	total: 2m 31s	remaining: 1m 32s
1241:	learn: 0.5076744	total: 2m 31s	remaining: 1m 32s
1242:	learn: 0.5076704	total: 2m 32s	remaining: 1m 32s
1243:	learn: 0.5076532	total: 2m 32s	remaining: 1m 32s
1244:	learn: 0.5076441	total: 2m 32s	remaining: 1m 32s
1245:	learn: 0.5076304	total: 2m 32s	remaining: 1m 32s
1246:	learn: 0.5076212	total: 2m 32s	remaining: 1m 32s
1247:	learn: 0.5076122	total: 2m 32s	remaining: 1m 31s
1248:	learn: 0.5075994	total: 2m 32s	remaining: 1m 31s
1249:	learn: 0.5075939	total: 2m 32s	remaining: 1m 31s
1250:	learn: 0.5075856	total: 2m 32s	remaining: 1m 31s
1251:	learn: 0.5075699	total: 2m 33s	remaining: 1m 31s
1252:	learn: 0.5075592	total: 2m 33s	remaining: 1m 31s
1253:	learn: 0.5075450	total: 2m 33s	remaining: 1m 31s
1254:	learn: 0.5075304	total: 2m 33s	remaining: 1m 31s
1255:	learn: 0.5075212	total: 2m 33s	remaining: 1m 31s
1256:	learn: 0.5075104	total: 2m 33s	remaining: 1m 30s
1257:	learn: 0.5074996	total: 2m 33s	remaining: 1m 30s
1258:	lear

1390:	learn: 0.5059977	total: 2m 50s	remaining: 1m 14s
1391:	learn: 0.5059921	total: 2m 50s	remaining: 1m 14s
1392:	learn: 0.5059774	total: 2m 50s	remaining: 1m 14s
1393:	learn: 0.5059713	total: 2m 50s	remaining: 1m 14s
1394:	learn: 0.5059584	total: 2m 50s	remaining: 1m 14s
1395:	learn: 0.5059473	total: 2m 51s	remaining: 1m 14s
1396:	learn: 0.5059378	total: 2m 51s	remaining: 1m 13s
1397:	learn: 0.5059249	total: 2m 51s	remaining: 1m 13s
1398:	learn: 0.5059116	total: 2m 51s	remaining: 1m 13s
1399:	learn: 0.5059016	total: 2m 51s	remaining: 1m 13s
1400:	learn: 0.5058945	total: 2m 51s	remaining: 1m 13s
1401:	learn: 0.5058890	total: 2m 51s	remaining: 1m 13s
1402:	learn: 0.5058748	total: 2m 52s	remaining: 1m 13s
1403:	learn: 0.5058640	total: 2m 52s	remaining: 1m 13s
1404:	learn: 0.5058534	total: 2m 52s	remaining: 1m 12s
1405:	learn: 0.5058404	total: 2m 52s	remaining: 1m 12s
1406:	learn: 0.5058304	total: 2m 52s	remaining: 1m 12s
1407:	learn: 0.5058151	total: 2m 52s	remaining: 1m 12s
1408:	lear

1545:	learn: 0.5043427	total: 3m 10s	remaining: 55.8s
1546:	learn: 0.5043265	total: 3m 10s	remaining: 55.7s
1547:	learn: 0.5043201	total: 3m 10s	remaining: 55.6s
1548:	learn: 0.5043067	total: 3m 10s	remaining: 55.5s
1549:	learn: 0.5042952	total: 3m 10s	remaining: 55.3s
1550:	learn: 0.5042886	total: 3m 10s	remaining: 55.2s
1551:	learn: 0.5042819	total: 3m 10s	remaining: 55.1s
1552:	learn: 0.5042685	total: 3m 10s	remaining: 54.9s
1553:	learn: 0.5042609	total: 3m 10s	remaining: 54.8s
1554:	learn: 0.5042556	total: 3m 11s	remaining: 54.7s
1555:	learn: 0.5042489	total: 3m 11s	remaining: 54.6s
1556:	learn: 0.5042366	total: 3m 11s	remaining: 54.4s
1557:	learn: 0.5042213	total: 3m 11s	remaining: 54.3s
1558:	learn: 0.5042096	total: 3m 11s	remaining: 54.2s
1559:	learn: 0.5041883	total: 3m 11s	remaining: 54.1s
1560:	learn: 0.5041832	total: 3m 11s	remaining: 53.9s
1561:	learn: 0.5041620	total: 3m 11s	remaining: 53.8s
1562:	learn: 0.5041541	total: 3m 12s	remaining: 53.7s
1563:	learn: 0.5041452	total

1699:	learn: 0.5027181	total: 3m 29s	remaining: 36.9s
1700:	learn: 0.5027097	total: 3m 29s	remaining: 36.8s
1701:	learn: 0.5027051	total: 3m 29s	remaining: 36.6s
1702:	learn: 0.5026964	total: 3m 29s	remaining: 36.5s
1703:	learn: 0.5026867	total: 3m 29s	remaining: 36.4s
1704:	learn: 0.5026667	total: 3m 29s	remaining: 36.3s
1705:	learn: 0.5026619	total: 3m 29s	remaining: 36.2s
1706:	learn: 0.5026536	total: 3m 29s	remaining: 36s
1707:	learn: 0.5026467	total: 3m 30s	remaining: 35.9s
1708:	learn: 0.5026333	total: 3m 30s	remaining: 35.8s
1709:	learn: 0.5026264	total: 3m 30s	remaining: 35.7s
1710:	learn: 0.5026219	total: 3m 30s	remaining: 35.5s
1711:	learn: 0.5026035	total: 3m 30s	remaining: 35.4s
1712:	learn: 0.5025946	total: 3m 30s	remaining: 35.3s
1713:	learn: 0.5025795	total: 3m 30s	remaining: 35.2s
1714:	learn: 0.5025586	total: 3m 30s	remaining: 35.1s
1715:	learn: 0.5025398	total: 3m 31s	remaining: 34.9s
1716:	learn: 0.5025261	total: 3m 31s	remaining: 34.8s
1717:	learn: 0.5025060	total: 

1852:	learn: 0.5010084	total: 3m 49s	remaining: 18.2s
1853:	learn: 0.5009945	total: 3m 49s	remaining: 18s
1854:	learn: 0.5009816	total: 3m 49s	remaining: 17.9s
1855:	learn: 0.5009729	total: 3m 49s	remaining: 17.8s
1856:	learn: 0.5009591	total: 3m 49s	remaining: 17.7s
1857:	learn: 0.5009440	total: 3m 49s	remaining: 17.6s
1858:	learn: 0.5009365	total: 3m 49s	remaining: 17.4s
1859:	learn: 0.5009289	total: 3m 49s	remaining: 17.3s
1860:	learn: 0.5009183	total: 3m 50s	remaining: 17.2s
1861:	learn: 0.5009038	total: 3m 50s	remaining: 17.1s
1862:	learn: 0.5008986	total: 3m 50s	remaining: 16.9s
1863:	learn: 0.5008826	total: 3m 50s	remaining: 16.8s
1864:	learn: 0.5008733	total: 3m 50s	remaining: 16.7s
1865:	learn: 0.5008691	total: 3m 50s	remaining: 16.6s
1866:	learn: 0.5008599	total: 3m 50s	remaining: 16.4s
1867:	learn: 0.5008456	total: 3m 51s	remaining: 16.3s
1868:	learn: 0.5008348	total: 3m 51s	remaining: 16.2s
1869:	learn: 0.5008297	total: 3m 51s	remaining: 16.1s
1870:	learn: 0.5008143	total: 

<catboost.core.CatBoostClassifier at 0x7f87283e1cf8>

In [14]:
print("train AUC: ", roc_auc_score(y_train, cb.predict_proba(X_train)[:, 1]))
print("val AUC: ", roc_auc_score(y_val, cb.predict_proba(X_val)[:, 1]))

train AUC:  0.6955546730688066
val AUC:  0.6232308590796508


# Submittion

In [15]:
X = preprocessing(X)
X.shape

(351281, 990)

In [16]:
cb.fit(X, y)

0:	learn: 0.6620660	total: 218ms	remaining: 7m 14s
1:	learn: 0.6365984	total: 412ms	remaining: 6m 51s
2:	learn: 0.6162150	total: 614ms	remaining: 6m 48s
3:	learn: 0.5996935	total: 849ms	remaining: 7m 3s
4:	learn: 0.5870720	total: 1.03s	remaining: 6m 50s
5:	learn: 0.5765380	total: 1.24s	remaining: 6m 51s
6:	learn: 0.5677110	total: 1.44s	remaining: 6m 48s
7:	learn: 0.5611944	total: 1.64s	remaining: 6m 47s
8:	learn: 0.5555832	total: 1.83s	remaining: 6m 44s
9:	learn: 0.5512290	total: 2.03s	remaining: 6m 44s
10:	learn: 0.5474102	total: 2.29s	remaining: 6m 53s
11:	learn: 0.5445813	total: 2.54s	remaining: 7m 1s
12:	learn: 0.5422935	total: 2.8s	remaining: 7m 7s
13:	learn: 0.5405782	total: 3.02s	remaining: 7m 8s
14:	learn: 0.5390553	total: 3.28s	remaining: 7m 14s
15:	learn: 0.5375435	total: 3.54s	remaining: 7m 19s
16:	learn: 0.5362892	total: 3.79s	remaining: 7m 22s
17:	learn: 0.5352135	total: 4.04s	remaining: 7m 25s
18:	learn: 0.5342691	total: 4.32s	remaining: 7m 30s
19:	learn: 0.5337061	total:

159:	learn: 0.5235052	total: 40.3s	remaining: 7m 42s
160:	learn: 0.5234705	total: 40.5s	remaining: 7m 42s
161:	learn: 0.5234520	total: 40.7s	remaining: 7m 41s
162:	learn: 0.5234278	total: 40.9s	remaining: 7m 40s
163:	learn: 0.5234099	total: 41.2s	remaining: 7m 40s
164:	learn: 0.5233851	total: 41.4s	remaining: 7m 40s
165:	learn: 0.5233734	total: 41.7s	remaining: 7m 40s
166:	learn: 0.5233619	total: 41.9s	remaining: 7m 39s
167:	learn: 0.5233408	total: 42.1s	remaining: 7m 39s
168:	learn: 0.5233275	total: 42.4s	remaining: 7m 39s
169:	learn: 0.5233076	total: 42.7s	remaining: 7m 39s
170:	learn: 0.5232791	total: 42.9s	remaining: 7m 39s
171:	learn: 0.5232547	total: 43.2s	remaining: 7m 38s
172:	learn: 0.5232243	total: 43.5s	remaining: 7m 38s
173:	learn: 0.5232015	total: 43.7s	remaining: 7m 38s
174:	learn: 0.5231736	total: 43.9s	remaining: 7m 38s
175:	learn: 0.5231344	total: 44.2s	remaining: 7m 37s
176:	learn: 0.5231192	total: 44.4s	remaining: 7m 37s
177:	learn: 0.5230873	total: 44.7s	remaining: 

315:	learn: 0.5208997	total: 1m 18s	remaining: 6m 59s
316:	learn: 0.5208889	total: 1m 19s	remaining: 6m 59s
317:	learn: 0.5208769	total: 1m 19s	remaining: 6m 59s
318:	learn: 0.5208581	total: 1m 19s	remaining: 6m 59s
319:	learn: 0.5208427	total: 1m 19s	remaining: 6m 58s
320:	learn: 0.5208369	total: 1m 20s	remaining: 6m 58s
321:	learn: 0.5208243	total: 1m 20s	remaining: 6m 58s
322:	learn: 0.5208098	total: 1m 20s	remaining: 6m 58s
323:	learn: 0.5207983	total: 1m 20s	remaining: 6m 57s
324:	learn: 0.5207855	total: 1m 21s	remaining: 6m 57s
325:	learn: 0.5207762	total: 1m 21s	remaining: 6m 57s
326:	learn: 0.5207640	total: 1m 21s	remaining: 6m 56s
327:	learn: 0.5207558	total: 1m 21s	remaining: 6m 56s
328:	learn: 0.5207477	total: 1m 21s	remaining: 6m 56s
329:	learn: 0.5207350	total: 1m 22s	remaining: 6m 55s
330:	learn: 0.5207252	total: 1m 22s	remaining: 6m 55s
331:	learn: 0.5207151	total: 1m 22s	remaining: 6m 55s
332:	learn: 0.5206980	total: 1m 22s	remaining: 6m 55s
333:	learn: 0.5206845	total:

467:	learn: 0.5191551	total: 1m 56s	remaining: 6m 20s
468:	learn: 0.5191468	total: 1m 56s	remaining: 6m 20s
469:	learn: 0.5191306	total: 1m 56s	remaining: 6m 20s
470:	learn: 0.5191160	total: 1m 57s	remaining: 6m 20s
471:	learn: 0.5191073	total: 1m 57s	remaining: 6m 20s
472:	learn: 0.5190965	total: 1m 57s	remaining: 6m 19s
473:	learn: 0.5190870	total: 1m 57s	remaining: 6m 19s
474:	learn: 0.5190762	total: 1m 58s	remaining: 6m 19s
475:	learn: 0.5190644	total: 1m 58s	remaining: 6m 18s
476:	learn: 0.5190547	total: 1m 58s	remaining: 6m 18s
477:	learn: 0.5190416	total: 1m 58s	remaining: 6m 18s
478:	learn: 0.5190303	total: 1m 59s	remaining: 6m 18s
479:	learn: 0.5190156	total: 1m 59s	remaining: 6m 17s
480:	learn: 0.5189946	total: 1m 59s	remaining: 6m 17s
481:	learn: 0.5189799	total: 1m 59s	remaining: 6m 17s
482:	learn: 0.5189719	total: 2m	remaining: 6m 17s
483:	learn: 0.5189657	total: 2m	remaining: 6m 16s
484:	learn: 0.5189569	total: 2m	remaining: 6m 16s
485:	learn: 0.5189504	total: 2m	remainin

621:	learn: 0.5177628	total: 2m 33s	remaining: 5m 39s
622:	learn: 0.5177564	total: 2m 33s	remaining: 5m 39s
623:	learn: 0.5177500	total: 2m 33s	remaining: 5m 39s
624:	learn: 0.5177390	total: 2m 34s	remaining: 5m 39s
625:	learn: 0.5177352	total: 2m 34s	remaining: 5m 38s
626:	learn: 0.5177259	total: 2m 34s	remaining: 5m 38s
627:	learn: 0.5177149	total: 2m 34s	remaining: 5m 38s
628:	learn: 0.5177061	total: 2m 35s	remaining: 5m 38s
629:	learn: 0.5176990	total: 2m 35s	remaining: 5m 37s
630:	learn: 0.5176881	total: 2m 35s	remaining: 5m 37s
631:	learn: 0.5176795	total: 2m 35s	remaining: 5m 37s
632:	learn: 0.5176739	total: 2m 36s	remaining: 5m 37s
633:	learn: 0.5176652	total: 2m 36s	remaining: 5m 36s
634:	learn: 0.5176579	total: 2m 36s	remaining: 5m 36s
635:	learn: 0.5176500	total: 2m 36s	remaining: 5m 36s
636:	learn: 0.5176405	total: 2m 37s	remaining: 5m 36s
637:	learn: 0.5176309	total: 2m 37s	remaining: 5m 35s
638:	learn: 0.5176170	total: 2m 37s	remaining: 5m 35s
639:	learn: 0.5176092	total:

775:	learn: 0.5164031	total: 3m 11s	remaining: 5m 1s
776:	learn: 0.5163953	total: 3m 11s	remaining: 5m 1s
777:	learn: 0.5163881	total: 3m 11s	remaining: 5m
778:	learn: 0.5163838	total: 3m 11s	remaining: 5m
779:	learn: 0.5163735	total: 3m 11s	remaining: 5m
780:	learn: 0.5163647	total: 3m 12s	remaining: 5m
781:	learn: 0.5163548	total: 3m 12s	remaining: 4m 59s
782:	learn: 0.5163485	total: 3m 12s	remaining: 4m 59s
783:	learn: 0.5163409	total: 3m 12s	remaining: 4m 59s
784:	learn: 0.5163324	total: 3m 13s	remaining: 4m 59s
785:	learn: 0.5163207	total: 3m 13s	remaining: 4m 58s
786:	learn: 0.5163118	total: 3m 13s	remaining: 4m 58s
787:	learn: 0.5163038	total: 3m 13s	remaining: 4m 58s
788:	learn: 0.5162935	total: 3m 14s	remaining: 4m 58s
789:	learn: 0.5162865	total: 3m 14s	remaining: 4m 57s
790:	learn: 0.5162755	total: 3m 14s	remaining: 4m 57s
791:	learn: 0.5162671	total: 3m 14s	remaining: 4m 57s
792:	learn: 0.5162612	total: 3m 15s	remaining: 4m 57s
793:	learn: 0.5162562	total: 3m 15s	remaining:

928:	learn: 0.5152076	total: 3m 48s	remaining: 4m 23s
929:	learn: 0.5152040	total: 3m 48s	remaining: 4m 22s
930:	learn: 0.5151962	total: 3m 48s	remaining: 4m 22s
931:	learn: 0.5151907	total: 3m 48s	remaining: 4m 22s
932:	learn: 0.5151831	total: 3m 49s	remaining: 4m 22s
933:	learn: 0.5151789	total: 3m 49s	remaining: 4m 21s
934:	learn: 0.5151687	total: 3m 49s	remaining: 4m 21s
935:	learn: 0.5151621	total: 3m 49s	remaining: 4m 21s
936:	learn: 0.5151552	total: 3m 50s	remaining: 4m 21s
937:	learn: 0.5151469	total: 3m 50s	remaining: 4m 20s
938:	learn: 0.5151410	total: 3m 50s	remaining: 4m 20s
939:	learn: 0.5151346	total: 3m 50s	remaining: 4m 20s
940:	learn: 0.5151242	total: 3m 51s	remaining: 4m 20s
941:	learn: 0.5151172	total: 3m 51s	remaining: 4m 19s
942:	learn: 0.5151093	total: 3m 51s	remaining: 4m 19s
943:	learn: 0.5151022	total: 3m 51s	remaining: 4m 19s
944:	learn: 0.5150980	total: 3m 52s	remaining: 4m 19s
945:	learn: 0.5150917	total: 3m 52s	remaining: 4m 18s
946:	learn: 0.5150694	total:

1081:	learn: 0.5141180	total: 4m 24s	remaining: 3m 44s
1082:	learn: 0.5141106	total: 4m 25s	remaining: 3m 44s
1083:	learn: 0.5141056	total: 4m 25s	remaining: 3m 44s
1084:	learn: 0.5140972	total: 4m 25s	remaining: 3m 44s
1085:	learn: 0.5140905	total: 4m 25s	remaining: 3m 43s
1086:	learn: 0.5140821	total: 4m 26s	remaining: 3m 43s
1087:	learn: 0.5140768	total: 4m 26s	remaining: 3m 43s
1088:	learn: 0.5140672	total: 4m 26s	remaining: 3m 43s
1089:	learn: 0.5140581	total: 4m 26s	remaining: 3m 42s
1090:	learn: 0.5140533	total: 4m 27s	remaining: 3m 42s
1091:	learn: 0.5140451	total: 4m 27s	remaining: 3m 42s
1092:	learn: 0.5140360	total: 4m 27s	remaining: 3m 42s
1093:	learn: 0.5140274	total: 4m 27s	remaining: 3m 41s
1094:	learn: 0.5140226	total: 4m 28s	remaining: 3m 41s
1095:	learn: 0.5140176	total: 4m 28s	remaining: 3m 41s
1096:	learn: 0.5140121	total: 4m 28s	remaining: 3m 41s
1097:	learn: 0.5140067	total: 4m 28s	remaining: 3m 40s
1098:	learn: 0.5139972	total: 4m 29s	remaining: 3m 40s
1099:	lear

1231:	learn: 0.5130846	total: 5m 1s	remaining: 3m 8s
1232:	learn: 0.5130783	total: 5m 2s	remaining: 3m 7s
1233:	learn: 0.5130679	total: 5m 2s	remaining: 3m 7s
1234:	learn: 0.5130589	total: 5m 2s	remaining: 3m 7s
1235:	learn: 0.5130494	total: 5m 2s	remaining: 3m 7s
1236:	learn: 0.5130410	total: 5m 3s	remaining: 3m 6s
1237:	learn: 0.5130321	total: 5m 3s	remaining: 3m 6s
1238:	learn: 0.5130261	total: 5m 3s	remaining: 3m 6s
1239:	learn: 0.5130196	total: 5m 3s	remaining: 3m 6s
1240:	learn: 0.5130113	total: 5m 3s	remaining: 3m 5s
1241:	learn: 0.5130038	total: 5m 4s	remaining: 3m 5s
1242:	learn: 0.5129964	total: 5m 4s	remaining: 3m 5s
1243:	learn: 0.5129888	total: 5m 4s	remaining: 3m 5s
1244:	learn: 0.5129851	total: 5m 4s	remaining: 3m 4s
1245:	learn: 0.5129761	total: 5m 5s	remaining: 3m 4s
1246:	learn: 0.5129713	total: 5m 5s	remaining: 3m 4s
1247:	learn: 0.5129614	total: 5m 5s	remaining: 3m 4s
1248:	learn: 0.5129531	total: 5m 5s	remaining: 3m 3s
1249:	learn: 0.5129465	total: 5m 6s	remaining:

1382:	learn: 0.5120749	total: 5m 38s	remaining: 2m 30s
1383:	learn: 0.5120714	total: 5m 38s	remaining: 2m 30s
1384:	learn: 0.5120653	total: 5m 38s	remaining: 2m 30s
1385:	learn: 0.5120570	total: 5m 38s	remaining: 2m 30s
1386:	learn: 0.5120519	total: 5m 39s	remaining: 2m 29s
1387:	learn: 0.5120463	total: 5m 39s	remaining: 2m 29s
1388:	learn: 0.5120438	total: 5m 39s	remaining: 2m 29s
1389:	learn: 0.5120385	total: 5m 39s	remaining: 2m 29s
1390:	learn: 0.5120342	total: 5m 39s	remaining: 2m 28s
1391:	learn: 0.5120277	total: 5m 40s	remaining: 2m 28s
1392:	learn: 0.5120195	total: 5m 40s	remaining: 2m 28s
1393:	learn: 0.5120147	total: 5m 40s	remaining: 2m 28s
1394:	learn: 0.5120081	total: 5m 40s	remaining: 2m 27s
1395:	learn: 0.5120020	total: 5m 40s	remaining: 2m 27s
1396:	learn: 0.5119950	total: 5m 41s	remaining: 2m 27s
1397:	learn: 0.5119920	total: 5m 41s	remaining: 2m 27s
1398:	learn: 0.5119820	total: 5m 41s	remaining: 2m 26s
1399:	learn: 0.5119762	total: 5m 41s	remaining: 2m 26s
1400:	lear

1533:	learn: 0.5110613	total: 6m 14s	remaining: 1m 53s
1534:	learn: 0.5110511	total: 6m 15s	remaining: 1m 53s
1535:	learn: 0.5110468	total: 6m 15s	remaining: 1m 53s
1536:	learn: 0.5110411	total: 6m 15s	remaining: 1m 53s
1537:	learn: 0.5110363	total: 6m 15s	remaining: 1m 52s
1538:	learn: 0.5110300	total: 6m 16s	remaining: 1m 52s
1539:	learn: 0.5110272	total: 6m 16s	remaining: 1m 52s
1540:	learn: 0.5110224	total: 6m 16s	remaining: 1m 52s
1541:	learn: 0.5110178	total: 6m 16s	remaining: 1m 51s
1542:	learn: 0.5110128	total: 6m 17s	remaining: 1m 51s
1543:	learn: 0.5110045	total: 6m 17s	remaining: 1m 51s
1544:	learn: 0.5110024	total: 6m 17s	remaining: 1m 51s
1545:	learn: 0.5109899	total: 6m 17s	remaining: 1m 50s
1546:	learn: 0.5109841	total: 6m 18s	remaining: 1m 50s
1547:	learn: 0.5109778	total: 6m 18s	remaining: 1m 50s
1548:	learn: 0.5109731	total: 6m 18s	remaining: 1m 50s
1549:	learn: 0.5109684	total: 6m 18s	remaining: 1m 50s
1550:	learn: 0.5109623	total: 6m 19s	remaining: 1m 49s
1551:	lear

1682:	learn: 0.5101334	total: 6m 52s	remaining: 1m 17s
1683:	learn: 0.5101263	total: 6m 52s	remaining: 1m 17s
1684:	learn: 0.5101206	total: 6m 52s	remaining: 1m 17s
1685:	learn: 0.5101148	total: 6m 52s	remaining: 1m 16s
1686:	learn: 0.5101092	total: 6m 53s	remaining: 1m 16s
1687:	learn: 0.5101054	total: 6m 53s	remaining: 1m 16s
1688:	learn: 0.5100992	total: 6m 53s	remaining: 1m 16s
1689:	learn: 0.5100900	total: 6m 54s	remaining: 1m 15s
1690:	learn: 0.5100818	total: 6m 54s	remaining: 1m 15s
1691:	learn: 0.5100734	total: 6m 54s	remaining: 1m 15s
1692:	learn: 0.5100658	total: 6m 55s	remaining: 1m 15s
1693:	learn: 0.5100606	total: 6m 55s	remaining: 1m 15s
1694:	learn: 0.5100525	total: 6m 55s	remaining: 1m 14s
1695:	learn: 0.5100452	total: 6m 55s	remaining: 1m 14s
1696:	learn: 0.5100391	total: 6m 56s	remaining: 1m 14s
1697:	learn: 0.5100310	total: 6m 56s	remaining: 1m 14s
1698:	learn: 0.5100235	total: 6m 56s	remaining: 1m 13s
1699:	learn: 0.5100201	total: 6m 56s	remaining: 1m 13s
1700:	lear

1835:	learn: 0.5091914	total: 7m 30s	remaining: 40.2s
1836:	learn: 0.5091861	total: 7m 30s	remaining: 40s
1837:	learn: 0.5091782	total: 7m 30s	remaining: 39.7s
1838:	learn: 0.5091734	total: 7m 30s	remaining: 39.5s
1839:	learn: 0.5091674	total: 7m 31s	remaining: 39.2s
1840:	learn: 0.5091625	total: 7m 31s	remaining: 39s
1841:	learn: 0.5091582	total: 7m 31s	remaining: 38.7s
1842:	learn: 0.5091540	total: 7m 31s	remaining: 38.5s
1843:	learn: 0.5091494	total: 7m 32s	remaining: 38.3s
1844:	learn: 0.5091372	total: 7m 32s	remaining: 38s
1845:	learn: 0.5091277	total: 7m 32s	remaining: 37.8s
1846:	learn: 0.5091191	total: 7m 32s	remaining: 37.5s
1847:	learn: 0.5091131	total: 7m 33s	remaining: 37.3s
1848:	learn: 0.5090999	total: 7m 33s	remaining: 37s
1849:	learn: 0.5090960	total: 7m 33s	remaining: 36.8s
1850:	learn: 0.5090878	total: 7m 33s	remaining: 36.5s
1851:	learn: 0.5090788	total: 7m 34s	remaining: 36.3s
1852:	learn: 0.5090720	total: 7m 34s	remaining: 36s
1853:	learn: 0.5090672	total: 7m 34s	r

1989:	learn: 0.5082452	total: 8m 7s	remaining: 2.45s
1990:	learn: 0.5082401	total: 8m 8s	remaining: 2.21s
1991:	learn: 0.5082364	total: 8m 8s	remaining: 1.96s
1992:	learn: 0.5082271	total: 8m 8s	remaining: 1.72s
1993:	learn: 0.5082249	total: 8m 8s	remaining: 1.47s
1994:	learn: 0.5082195	total: 8m 9s	remaining: 1.23s
1995:	learn: 0.5082131	total: 8m 9s	remaining: 981ms
1996:	learn: 0.5082095	total: 8m 9s	remaining: 736ms
1997:	learn: 0.5082015	total: 8m 9s	remaining: 490ms
1998:	learn: 0.5081952	total: 8m 10s	remaining: 245ms
1999:	learn: 0.5081885	total: 8m 10s	remaining: 0us


<catboost.core.CatBoostClassifier at 0x7f87283e1cf8>

In [18]:
print("train AUC: ", roc_auc_score(y, cb.predict_proba(X)[:, 1]))

train AUC:  0.6684793021220455


In [19]:
X_submit = preprocessing(df_test)

In [20]:
y_submit_pred = cb.predict_proba(X_submit)[:, 1]

In [21]:
product_id = df_test['product_id'].values
df_test = pd.DataFrame.from_dict({'product_id' : product_id, 'score' : y_submit_pred})
df_test.to_csv('./to_submit', sep = ',', index = False)