In [1]:
import pandas as pd
import numpy as np

import random

import datetime

from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_absolute_error
from sklearn.decomposition import TruncatedSVD

from catboost import CatBoostRegressor

from transformers import AutoTokenizer, AutoModel
from sentence_transformers import SentenceTransformer

import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn

from tqdm import tqdm

seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
random.seed(seed)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [2]:
df = pd.read_csv("./data/youtube/youtube_train.csv", index_col="id")
df_test = pd.read_csv("./data/youtube/youtube_test.csv", index_col="id")
df.head()

Unnamed: 0_level_0,Channel,Subtitles,PublishDate,Category,ViewCount
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,toplesofficial,('Я миллениал и я не занимаюсь сексом так част...,2017-12-31T01:35:44-08:00,Education,3019
1,postnauka,"(""сервитуты это одно из прямых наследие римско...",2014-09-04T08:24:47-07:00,Science & Technology,25
2,NaukaPRO,('[музыка] существуют сенсоры измеряющие магни...,2023-11-23T21:00:08-08:00,Science & Technology,7
3,postnauka,('в наше время только ленивые не говорит о том...,2014-06-23T03:49:31-07:00,Science & Technology,5
4,user-rb8ux1no6j,('дорогие друзья это честный рекламный ролик н...,2020-10-09T01:30:04-07:00,Education,10


In [3]:
min_date = pd.to_datetime(datetime.datetime.strptime('01012010', "%d%m%Y").date())
df["PublishDate"] = df["PublishDate"].apply(lambda x: datetime.datetime.strptime(x[:-6], "%Y-%m-%dT%H:%M:%S"))
df["PublishDateDays"] = (df["PublishDate"] - min_date).dt.days
df["PublishDateYear"] = df["PublishDate"].dt.year
df["PublishDateMonth"] = df["PublishDate"].dt.month
df["PublishDateDay"] = df["PublishDate"].dt.day
df["PublishDateDay"] = df["PublishDate"].dt.day_of_week
df["PublishDateDay"] = df["PublishDate"].dt.day_of_year

In [4]:
df_test["PublishDate"] = df_test["PublishDate"].apply(lambda x: datetime.datetime.strptime(x[:-6], "%Y-%m-%dT%H:%M:%S"))
df_test["PublishDateDays"] = (df_test["PublishDate"] - min_date).dt.days
df_test["PublishDateYear"] = df_test["PublishDate"].dt.year
df_test["PublishDateMonth"] = df_test["PublishDate"].dt.month
df_test["PublishDateDay"] = df_test["PublishDate"].dt.day
df_test["PublishDateDay"] = df_test["PublishDate"].dt.day_of_week
df_test["PublishDateDay"] = df_test["PublishDate"].dt.day_of_year

In [5]:
X = df.drop(columns=["ViewCount", "Subtitles"])
y = df["ViewCount"]

# X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=seed)
# X_train = X_train.reset_index()
# X_val = X_val.reset_index()

In [6]:
X_test = df_test.drop(columns=["Subtitles"])

In [7]:
model = SentenceTransformer("BAAI/bge-m3", device=device)

In [8]:
model.max_seq_length = 512

In [9]:
embs = model.encode(X["Subtitles"].tolist(), batch_size=8, show_progress_bar=True, normalize_embeddings=True, device=device)
# embs_val = model.encode(X_val["Subtitles"].tolist(), batch_size=8, show_progress_bar=True, normalize_embeddings=True, device=device)
embs_test = model.encode(X_test["Subtitles"].tolist(), batch_size=8, show_progress_bar=True, normalize_embeddings=True, device=device)

Batches:   0%|          | 0/732 [00:00<?, ?it/s]

Batches:   0%|          | 0/250 [00:00<?, ?it/s]

In [10]:
n_components = 16
svd = TruncatedSVD(n_components=n_components, random_state=seed)
svd.fit(embs.tolist())
embs_svd = svd.transform(embs.tolist())
embs_test_svd = svd.transform(embs_test.tolist())

In [11]:
X = pd.concat([X, pd.DataFrame(embs_svd.tolist(), columns=[f"emb_{i}" for i in range(n_components)])], axis=1)
X_test = pd.concat([X_test, pd.DataFrame(embs_test_svd.tolist(), columns=[f"emb_{i}" for i in range(n_components)], index=X_test.index)], axis=1)

In [7]:
cat_features = list()

for col_name, dtype in X.dtypes.to_dict().items():
    if dtype == "object" and col_name != "Subtitles":
        cat_features.append(col_name)

cat_features

['Channel', 'Category']

In [8]:
n_splits = 100
kf = KFold(n_splits=n_splits, shuffle=True, random_state=seed)
val_scores = list()
test_preds = np.zeros((X_test.shape[0], n_splits))

for i, (train_index, val_index) in enumerate(tqdm(kf.split(X))):
    print(f"=== FOLD {i + 1} ===")

    X_train = X.iloc[train_index]
    X_val = X.iloc[val_index]
    y_train = y.iloc[train_index]
    y_val = y.iloc[val_index]

    model = CatBoostRegressor(
        iterations=1500,
        cat_features=cat_features,
        # text_features=["Subtitles"],
        eval_metric="MAE",
        learning_rate=0.1,
        verbose=250,
        early_stopping_rounds=200,
        use_best_model=True,
        random_seed=seed,
        # depth=8,
    )
    model.fit(X_train, y_train, eval_set=(X_val, y_val))

    preds = model.predict(X_val)
    test_preds[:, i] = model.predict(X_test)
    
    val_scores.append(mean_absolute_error(y_val, preds))

sum(val_scores) / len(val_scores)

0it [00:00, ?it/s]

=== FOLD 1 ===
0:	learn: 468.6390245	test: 380.0739796	best: 380.0739796 (0)	total: 50.7ms	remaining: 1m 16s
250:	learn: 174.0726355	test: 193.9261651	best: 193.3972115 (244)	total: 397ms	remaining: 1.97s


1it [00:00,  1.21it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 192.8422942
bestIteration = 282

Shrink model to first 283 iterations.
=== FOLD 2 ===
0:	learn: 475.3306644	test: 366.8837654	best: 366.8837654 (0)	total: 2.01ms	remaining: 3.01s


2it [00:01,  1.72it/s]

250:	learn: 161.9682999	test: 131.4388794	best: 114.8612242 (55)	total: 349ms	remaining: 1.74s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 114.8612242
bestIteration = 55

Shrink model to first 56 iterations.
=== FOLD 3 ===
0:	learn: 470.1147460	test: 382.4119324	best: 382.4119324 (0)	total: 1.11ms	remaining: 1.66s
250:	learn: 163.2113117	test: 157.5185851	best: 156.5071552 (237)	total: 350ms	remaining: 1.74s
500:	learn: 145.7010298	test: 153.4982905	best: 153.0260346 (466)	total: 708ms	remaining: 1.41s


3it [00:02,  1.21it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 151.127649
bestIteration = 526

Shrink model to first 527 iterations.
=== FOLD 4 ===
0:	learn: 464.0634611	test: 658.8645798	best: 658.8645798 (0)	total: 2.04ms	remaining: 3.06s


4it [00:02,  1.38it/s]

250:	learn: 159.2600123	test: 317.1951742	best: 312.3683638 (165)	total: 342ms	remaining: 1.7s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 312.3683638
bestIteration = 165

Shrink model to first 166 iterations.
=== FOLD 5 ===
0:	learn: 468.7854281	test: 424.1289808	best: 424.1289808 (0)	total: 1.56ms	remaining: 2.34s


5it [00:03,  1.56it/s]

250:	learn: 160.8109400	test: 246.0916072	best: 152.0216444 (77)	total: 396ms	remaining: 1.97s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 152.0216444
bestIteration = 77

Shrink model to first 78 iterations.
=== FOLD 6 ===
0:	learn: 468.9192355	test: 397.1418813	best: 397.1418813 (0)	total: 1.63ms	remaining: 2.44s
250:	learn: 169.2877676	test: 167.6057402	best: 167.6057402 (250)	total: 352ms	remaining: 1.75s
500:	learn: 146.8686308	test: 156.2639404	best: 154.8294482 (479)	total: 711ms	remaining: 1.42s
750:	learn: 135.0666178	test: 155.1142160	best: 152.5288523 (669)	total: 1.07s	remaining: 1.07s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 152.5288523
bestIteration = 669

Shrink model to first 670 iterations.


6it [00:04,  1.14it/s]

=== FOLD 7 ===
0:	learn: 463.9798812	test: 552.8467759	best: 552.8467759 (0)	total: 1.68ms	remaining: 2.51s
250:	learn: 155.9347163	test: 223.4918999	best: 213.1961737 (178)	total: 347ms	remaining: 1.73s


7it [00:05,  1.26it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 213.1961737
bestIteration = 178

Shrink model to first 179 iterations.
=== FOLD 8 ===
0:	learn: 475.5278457	test: 348.8444053	best: 348.8444053 (0)	total: 1.78ms	remaining: 2.67s


8it [00:05,  1.53it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 168.0451614
bestIteration = 25

Shrink model to first 26 iterations.
=== FOLD 9 ===
0:	learn: 469.8239951	test: 335.4833429	best: 335.4833429 (0)	total: 1.52ms	remaining: 2.27s


9it [00:06,  1.73it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 191.6620555
bestIteration = 41

Shrink model to first 42 iterations.
=== FOLD 10 ===
0:	learn: 466.6857254	test: 507.5759671	best: 507.5759671 (0)	total: 1.67ms	remaining: 2.5s


10it [00:06,  1.86it/s]

250:	learn: 161.3301738	test: 178.0443089	best: 160.6484497 (83)	total: 351ms	remaining: 1.75s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 160.6484497
bestIteration = 83

Shrink model to first 84 iterations.
=== FOLD 11 ===
0:	learn: 462.7310194	test: 764.1952083	best: 764.1952083 (0)	total: 1.75ms	remaining: 2.62s
250:	learn: 160.1857685	test: 347.3539172	best: 346.8548726 (247)	total: 346ms	remaining: 1.72s
500:	learn: 142.5779814	test: 330.7422792	best: 330.7422792 (500)	total: 707ms	remaining: 1.41s
750:	learn: 129.6941114	test: 316.5324389	best: 315.3099199 (729)	total: 1.08s	remaining: 1.08s
1000:	learn: 122.4940735	test: 312.4241722	best: 312.3972318 (999)	total: 1.45s	remaining: 723ms
1250:	learn: 115.9134413	test: 308.3444876	best: 308.1095696 (1245)	total: 1.82s	remaining: 362ms


11it [00:08,  1.09s/it]

1499:	learn: 110.3695993	test: 308.5017444	best: 304.5800212 (1432)	total: 2.19s	remaining: 0us

bestTest = 304.5800212
bestIteration = 1432

Shrink model to first 1433 iterations.
=== FOLD 12 ===
0:	learn: 470.8949524	test: 287.7701268	best: 287.7701268 (0)	total: 1.94ms	remaining: 2.9s
250:	learn: 159.5796467	test: 106.6801130	best: 106.6801130 (250)	total: 344ms	remaining: 1.71s


12it [00:09,  1.02it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 103.8647239
bestIteration = 293

Shrink model to first 294 iterations.
=== FOLD 13 ===
0:	learn: 466.7410170	test: 428.2299882	best: 428.2299882 (0)	total: 1.59ms	remaining: 2.38s


13it [00:10,  1.22it/s]

250:	learn: 162.0246626	test: 189.6058920	best: 176.0778856 (78)	total: 347ms	remaining: 1.73s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 176.0778856
bestIteration = 78

Shrink model to first 79 iterations.
=== FOLD 14 ===
0:	learn: 470.2876643	test: 322.6679033	best: 322.6679033 (0)	total: 1.14ms	remaining: 1.71s


14it [00:10,  1.46it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 101.5384694
bestIteration = 30

Shrink model to first 31 iterations.
=== FOLD 15 ===
0:	learn: 466.2837421	test: 555.0690943	best: 555.0690943 (0)	total: 1.75ms	remaining: 2.62s


15it [00:10,  1.64it/s]

250:	learn: 168.5922381	test: 212.7209518	best: 195.8541804 (81)	total: 334ms	remaining: 1.66s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 195.8541804
bestIteration = 81

Shrink model to first 82 iterations.
=== FOLD 16 ===
0:	learn: 472.1035065	test: 527.8979308	best: 527.8979308 (0)	total: 2.05ms	remaining: 3.07s
250:	learn: 156.1469060	test: 205.3298195	best: 203.3188789 (235)	total: 346ms	remaining: 1.72s
500:	learn: 140.6987543	test: 199.2325934	best: 197.9439204 (462)	total: 695ms	remaining: 1.39s
750:	learn: 129.6126037	test: 184.3673959	best: 183.9896210 (738)	total: 1.04s	remaining: 1.04s


16it [00:12,  1.15it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 179.2820858
bestIteration = 780

Shrink model to first 781 iterations.
=== FOLD 17 ===
0:	learn: 469.5515324	test: 344.5071454	best: 344.5071454 (0)	total: 1.09ms	remaining: 1.64s


17it [00:12,  1.40it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 82.92075644
bestIteration = 21

Shrink model to first 22 iterations.
=== FOLD 18 ===
0:	learn: 462.8945588	test: 751.7660954	best: 751.7660954 (0)	total: 1.51ms	remaining: 2.27s


18it [00:13,  1.56it/s]

250:	learn: 156.8397565	test: 383.7057932	best: 372.5312494 (97)	total: 351ms	remaining: 1.75s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 372.5312494
bestIteration = 97

Shrink model to first 98 iterations.
=== FOLD 19 ===
0:	learn: 471.2562805	test: 321.2743696	best: 321.2743696 (0)	total: 1.69ms	remaining: 2.54s


19it [00:13,  1.76it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 133.6202152
bestIteration = 45

Shrink model to first 46 iterations.
=== FOLD 20 ===
0:	learn: 470.1319647	test: 385.7425280	best: 385.7425280 (0)	total: 1.16ms	remaining: 1.74s


20it [00:14,  1.84it/s]

250:	learn: 158.0927834	test: 219.9901834	best: 214.9162790 (94)	total: 371ms	remaining: 1.85s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 214.916279
bestIteration = 94

Shrink model to first 95 iterations.
=== FOLD 21 ===
0:	learn: 473.1069605	test: 475.5887425	best: 475.5887425 (0)	total: 1.75ms	remaining: 2.62s


21it [00:14,  1.95it/s]

250:	learn: 153.0339877	test: 178.6559774	best: 128.9334274 (85)	total: 342ms	remaining: 1.7s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 128.9334274
bestIteration = 85

Shrink model to first 86 iterations.
=== FOLD 22 ===
0:	learn: 467.1256344	test: 500.9177809	best: 500.9177809 (0)	total: 1.53ms	remaining: 2.29s


22it [00:15,  1.93it/s]

250:	learn: 162.7168425	test: 172.0196494	best: 162.3533113 (140)	total: 339ms	remaining: 1.69s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 162.3533113
bestIteration = 140

Shrink model to first 141 iterations.
=== FOLD 23 ===
0:	learn: 474.1861325	test: 438.5073006	best: 438.5073006 (0)	total: 1.75ms	remaining: 2.62s


23it [00:15,  2.01it/s]

250:	learn: 164.6355033	test: 175.9425432	best: 155.6631599 (90)	total: 355ms	remaining: 1.76s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 155.6631599
bestIteration = 90

Shrink model to first 91 iterations.
=== FOLD 24 ===
0:	learn: 469.1120590	test: 304.7646916	best: 304.7646916 (0)	total: 1.65ms	remaining: 2.47s
250:	learn: 166.9925321	test: 77.2272005	best: 69.2972887 (183)	total: 338ms	remaining: 1.68s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 69.29728871
bestIteration = 183

Shrink model to first 184 iterations.


24it [00:16,  1.92it/s]

=== FOLD 25 ===
0:	learn: 470.9367525	test: 325.4873179	best: 325.4873179 (0)	total: 1.56ms	remaining: 2.34s
250:	learn: 159.0040737	test: 123.6340586	best: 123.5252803 (249)	total: 346ms	remaining: 1.72s
500:	learn: 142.2316086	test: 114.7890661	best: 114.7254666 (497)	total: 703ms	remaining: 1.4s
750:	learn: 130.8084735	test: 110.4832349	best: 109.1865849 (721)	total: 1.06s	remaining: 1.06s


25it [00:17,  1.26it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 109.1865849
bestIteration = 721

Shrink model to first 722 iterations.
=== FOLD 26 ===
0:	learn: 466.0961005	test: 573.1489327	best: 573.1489327 (0)	total: 1.72ms	remaining: 2.58s
250:	learn: 161.2129002	test: 224.4231236	best: 224.4231236 (250)	total: 348ms	remaining: 1.73s


26it [00:18,  1.30it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 224.4231236
bestIteration = 250

Shrink model to first 251 iterations.
=== FOLD 27 ===
0:	learn: 465.1794849	test: 572.5539794	best: 572.5539794 (0)	total: 1.86ms	remaining: 2.79s
250:	learn: 166.9792076	test: 289.3525842	best: 274.3534799 (168)	total: 351ms	remaining: 1.75s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 274.3534799
bestIteration = 168

Shrink model to first 169 iterations.


27it [00:18,  1.40it/s]

=== FOLD 28 ===
0:	learn: 467.5719473	test: 504.7388749	best: 504.7388749 (0)	total: 1.76ms	remaining: 2.65s


28it [00:19,  1.59it/s]

250:	learn: 167.8181503	test: 166.7725998	best: 154.7578102 (80)	total: 350ms	remaining: 1.74s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 154.7578102
bestIteration = 80

Shrink model to first 81 iterations.
=== FOLD 29 ===
0:	learn: 470.0386335	test: 695.8136886	best: 695.8136886 (0)	total: 1.81ms	remaining: 2.72s
250:	learn: 157.5347430	test: 292.0283925	best: 289.1717089 (87)	total: 353ms	remaining: 1.75s
500:	learn: 138.2795053	test: 282.5518659	best: 282.3965946 (484)	total: 715ms	remaining: 1.43s


29it [00:20,  1.28it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 281.4441064
bestIteration = 524

Shrink model to first 525 iterations.
=== FOLD 30 ===
0:	learn: 467.2469773	test: 509.2993907	best: 509.2993907 (0)	total: 1.6ms	remaining: 2.4s


30it [00:20,  1.52it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 160.5515107
bestIteration = 38

Shrink model to first 39 iterations.
=== FOLD 31 ===
0:	learn: 467.0766718	test: 467.8889423	best: 467.8889423 (0)	total: 1.74ms	remaining: 2.62s


31it [00:21,  1.70it/s]

250:	learn: 164.5298856	test: 155.7305965	best: 137.9275508 (70)	total: 338ms	remaining: 1.68s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 137.9275508
bestIteration = 70

Shrink model to first 71 iterations.
=== FOLD 32 ===
0:	learn: 464.6983574	test: 618.5086598	best: 618.5086598 (0)	total: 1.63ms	remaining: 2.45s


32it [00:21,  1.91it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 184.2476023
bestIteration = 36

Shrink model to first 37 iterations.
=== FOLD 33 ===
0:	learn: 465.7744026	test: 635.6531273	best: 635.6531273 (0)	total: 1.55ms	remaining: 2.33s


33it [00:22,  1.98it/s]

250:	learn: 163.9624779	test: 176.4081172	best: 156.3873698 (84)	total: 358ms	remaining: 1.78s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 156.3873698
bestIteration = 84

Shrink model to first 85 iterations.
=== FOLD 34 ===
0:	learn: 470.6009863	test: 610.5776269	best: 610.5776269 (0)	total: 1.76ms	remaining: 2.64s


34it [00:22,  2.06it/s]

250:	learn: 154.3934357	test: 294.3453052	best: 288.0536067 (98)	total: 339ms	remaining: 1.69s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 288.0536067
bestIteration = 98

Shrink model to first 99 iterations.
=== FOLD 35 ===
0:	learn: 472.6555440	test: 461.8389652	best: 461.8389652 (0)	total: 1.74ms	remaining: 2.6s


35it [00:22,  2.22it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 264.2721523
bestIteration = 18

Shrink model to first 19 iterations.
=== FOLD 36 ===
0:	learn: 462.6760440	test: 703.8024965	best: 703.8024965 (0)	total: 7.82ms	remaining: 11.7s
250:	learn: 162.1153960	test: 260.6641154	best: 252.5778909 (172)	total: 367ms	remaining: 1.83s
500:	learn: 137.8966836	test: 247.1659896	best: 246.0980962 (455)	total: 723ms	remaining: 1.44s
750:	learn: 124.6454625	test: 240.7876096	best: 240.7876096 (750)	total: 1.07s	remaining: 1.07s


36it [00:24,  1.31it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 239.0784665
bestIteration = 770

Shrink model to first 771 iterations.
=== FOLD 37 ===
0:	learn: 466.7406699	test: 483.6781530	best: 483.6781530 (0)	total: 1.67ms	remaining: 2.51s


37it [00:24,  1.57it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 234.7225516
bestIteration = 26

Shrink model to first 27 iterations.
=== FOLD 38 ===
0:	learn: 465.3839184	test: 535.5422223	best: 535.5422223 (0)	total: 1.02ms	remaining: 1.53s
250:	learn: 153.2932051	test: 123.0575693	best: 123.0575693 (250)	total: 350ms	remaining: 1.74s


38it [00:25,  1.40it/s]

500:	learn: 138.4254751	test: 128.5583264	best: 113.8932120 (382)	total: 702ms	remaining: 1.4s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 113.893212
bestIteration = 382

Shrink model to first 383 iterations.
=== FOLD 39 ===
0:	learn: 475.7978318	test: 345.3830707	best: 345.3830707 (0)	total: 1.73ms	remaining: 2.59s
250:	learn: 173.2122789	test: 119.9930612	best: 119.9930612 (250)	total: 341ms	remaining: 1.7s


39it [00:26,  1.34it/s]

500:	learn: 142.5730024	test: 119.6247884	best: 116.1307564 (338)	total: 694ms	remaining: 1.38s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 116.1307564
bestIteration = 338

Shrink model to first 339 iterations.
=== FOLD 40 ===
0:	learn: 473.3259061	test: 485.8618602	best: 485.8618602 (0)	total: 1.94ms	remaining: 2.91s


40it [00:26,  1.53it/s]

250:	learn: 162.4376853	test: 228.7345019	best: 191.7269985 (72)	total: 350ms	remaining: 1.74s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 191.7269985
bestIteration = 72

Shrink model to first 73 iterations.
=== FOLD 41 ===
0:	learn: 468.1754658	test: 417.7125121	best: 417.7125121 (0)	total: 1.56ms	remaining: 2.35s
250:	learn: 157.5969585	test: 235.4282490	best: 235.1998486 (249)	total: 331ms	remaining: 1.65s
500:	learn: 139.7434122	test: 227.5837275	best: 226.5063638 (481)	total: 688ms	remaining: 1.37s
750:	learn: 129.1860975	test: 224.7905143	best: 224.2524972 (737)	total: 1.05s	remaining: 1.05s
1000:	learn: 121.5226206	test: 222.2177660	best: 220.2797169 (911)	total: 1.41s	remaining: 704ms


41it [00:28,  1.04s/it]

1250:	learn: 114.7284404	test: 219.0619514	best: 217.9773463 (1094)	total: 1.77s	remaining: 353ms
Stopped by overfitting detector  (200 iterations wait)

bestTest = 217.9773463
bestIteration = 1094

Shrink model to first 1095 iterations.
=== FOLD 42 ===
0:	learn: 471.9849176	test: 271.0286866	best: 271.0286866 (0)	total: 2.49ms	remaining: 3.73s


42it [00:29,  1.11it/s]

250:	learn: 159.2361021	test: 90.1036813	best: 85.6934044 (149)	total: 357ms	remaining: 1.78s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 85.69340443
bestIteration = 149

Shrink model to first 150 iterations.
=== FOLD 43 ===
0:	learn: 465.8867848	test: 585.4588423	best: 585.4588423 (0)	total: 1.84ms	remaining: 2.75s


43it [00:29,  1.32it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 212.0584966
bestIteration = 43

Shrink model to first 44 iterations.
=== FOLD 44 ===
0:	learn: 473.4437158	test: 452.0471548	best: 452.0471548 (0)	total: 2.23ms	remaining: 3.34s


44it [00:30,  1.52it/s]

250:	learn: 155.7878955	test: 264.2312881	best: 242.1531407 (62)	total: 356ms	remaining: 1.77s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 242.1531407
bestIteration = 62

Shrink model to first 63 iterations.
=== FOLD 45 ===
0:	learn: 469.1843465	test: 714.2233670	best: 714.2233670 (0)	total: 1.73ms	remaining: 2.59s
250:	learn: 158.2090426	test: 411.0834798	best: 406.0269025 (223)	total: 343ms	remaining: 1.71s
500:	learn: 138.0300415	test: 400.5196545	best: 400.3202229 (499)	total: 701ms	remaining: 1.4s
750:	learn: 126.4565274	test: 396.0205559	best: 395.9724318 (748)	total: 1.07s	remaining: 1.06s


45it [00:31,  1.10it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 395.0350863
bestIteration = 763

Shrink model to first 764 iterations.
=== FOLD 46 ===
0:	learn: 469.7506731	test: 368.8502641	best: 368.8502641 (0)	total: 1.56ms	remaining: 2.34s
250:	learn: 163.5683237	test: 176.9066590	best: 170.9401286 (152)	total: 343ms	remaining: 1.71s
500:	learn: 144.7380114	test: 169.7635466	best: 165.9660524 (458)	total: 690ms	remaining: 1.38s
750:	learn: 131.8557368	test: 162.6434924	best: 161.9283244 (624)	total: 1.05s	remaining: 1.05s


46it [00:33,  1.12s/it]

1000:	learn: 123.5301187	test: 165.1010269	best: 161.4868009 (854)	total: 1.43s	remaining: 711ms
Stopped by overfitting detector  (200 iterations wait)

bestTest = 161.4868009
bestIteration = 854

Shrink model to first 855 iterations.
=== FOLD 47 ===
0:	learn: 473.9381614	test: 453.3650550	best: 453.3650550 (0)	total: 1.91ms	remaining: 2.87s
250:	learn: 167.4424380	test: 137.5679206	best: 131.3023582 (230)	total: 348ms	remaining: 1.73s


47it [00:33,  1.02it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 131.3023582
bestIteration = 230

Shrink model to first 231 iterations.
=== FOLD 48 ===
0:	learn: 472.3199640	test: 517.5951643	best: 517.5951643 (0)	total: 1.91ms	remaining: 2.86s


48it [00:34,  1.20it/s]

250:	learn: 154.7965739	test: 264.5977107	best: 239.3460018 (90)	total: 378ms	remaining: 1.88s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 239.3460018
bestIteration = 90

Shrink model to first 91 iterations.
=== FOLD 49 ===
0:	learn: 469.9832244	test: 332.7113467	best: 332.7113467 (0)	total: 2.03ms	remaining: 3.04s


49it [00:34,  1.41it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 92.92238541
bestIteration = 27

Shrink model to first 28 iterations.
=== FOLD 50 ===
0:	learn: 470.5522635	test: 632.4934881	best: 632.4934881 (0)	total: 1.78ms	remaining: 2.67s
250:	learn: 163.9898284	test: 237.6806880	best: 237.5493425 (249)	total: 342ms	remaining: 1.7s
500:	learn: 143.7474309	test: 226.9741092	best: 225.5684535 (479)	total: 701ms	remaining: 1.4s


50it [00:35,  1.21it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 224.2621415
bestIteration = 522

Shrink model to first 523 iterations.
=== FOLD 51 ===
0:	learn: 471.8912184	test: 289.4845851	best: 289.4845851 (0)	total: 1.4ms	remaining: 2.1s


51it [00:36,  1.43it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 157.477611
bestIteration = 47

Shrink model to first 48 iterations.
=== FOLD 52 ===
0:	learn: 467.5052807	test: 465.8263446	best: 465.8263446 (0)	total: 1.57ms	remaining: 2.36s


52it [00:36,  1.67it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 172.3680044
bestIteration = 36

Shrink model to first 37 iterations.
=== FOLD 53 ===
0:	learn: 475.6635286	test: 331.9954829	best: 331.9954829 (0)	total: 1.81ms	remaining: 2.72s
250:	learn: 160.0517064	test: 71.2034244	best: 70.9402103 (246)	total: 349ms	remaining: 1.74s


53it [00:37,  1.59it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 70.7150378
bestIteration = 253

Shrink model to first 254 iterations.
=== FOLD 54 ===
0:	learn: 468.0253449	test: 436.6989946	best: 436.6989946 (0)	total: 1.51ms	remaining: 2.27s


54it [00:37,  1.74it/s]

250:	learn: 161.6760244	test: 98.0761166	best: 75.6247623 (89)	total: 338ms	remaining: 1.68s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 75.62476234
bestIteration = 89

Shrink model to first 90 iterations.
=== FOLD 55 ===
0:	learn: 473.2237770	test: 470.2332227	best: 470.2332227 (0)	total: 1.76ms	remaining: 2.65s


55it [00:38,  1.91it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 223.0964749
bestIteration = 48

Shrink model to first 49 iterations.
=== FOLD 56 ===
0:	learn: 472.7811590	test: 494.3238103	best: 494.3238103 (0)	total: 1.82ms	remaining: 2.73s
250:	learn: 167.7516485	test: 162.5144669	best: 162.0002062 (249)	total: 345ms	remaining: 1.72s
500:	learn: 144.7930999	test: 159.8668707	best: 154.3255894 (480)	total: 691ms	remaining: 1.38s


56it [00:39,  1.49it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 154.3255894
bestIteration = 480

Shrink model to first 481 iterations.
=== FOLD 57 ===
0:	learn: 475.7998019	test: 342.0151245	best: 342.0151245 (0)	total: 1.75ms	remaining: 2.62s
250:	learn: 159.8214494	test: 75.4290047	best: 73.4560085 (185)	total: 345ms	remaining: 1.72s


57it [00:40,  1.44it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 70.49012652
bestIteration = 289

Shrink model to first 290 iterations.
=== FOLD 58 ===
0:	learn: 475.9370508	test: 309.7195609	best: 309.7195609 (0)	total: 1.9ms	remaining: 2.84s


58it [00:40,  1.61it/s]

250:	learn: 165.7697169	test: 90.0045067	best: 82.1604516 (87)	total: 367ms	remaining: 1.83s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 82.1604516
bestIteration = 87

Shrink model to first 88 iterations.
=== FOLD 59 ===
0:	learn: 475.3935848	test: 357.1936360	best: 357.1936360 (0)	total: 1.77ms	remaining: 2.65s
250:	learn: 158.0743908	test: 104.5732153	best: 101.9559120 (175)	total: 347ms	remaining: 1.73s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 101.955912
bestIteration = 175

Shrink model to first 176 iterations.


59it [00:41,  1.64it/s]

=== FOLD 60 ===
0:	learn: 467.8532563	test: 766.4251947	best: 766.4251947 (0)	total: 2.01ms	remaining: 3.01s
250:	learn: 157.1745030	test: 288.1737946	best: 288.1737946 (250)	total: 342ms	remaining: 1.7s
500:	learn: 138.8739682	test: 263.4338756	best: 261.6743528 (466)	total: 705ms	remaining: 1.41s
750:	learn: 129.3341560	test: 254.4199067	best: 253.8591923 (718)	total: 1.06s	remaining: 1.06s
1000:	learn: 120.0349016	test: 249.8962056	best: 248.4090024 (947)	total: 1.44s	remaining: 715ms
1250:	learn: 112.6058458	test: 244.3184613	best: 243.6849639 (1247)	total: 1.81s	remaining: 361ms


60it [00:43,  1.12s/it]

1499:	learn: 106.5477825	test: 240.1278862	best: 239.7486214 (1475)	total: 2.19s	remaining: 0us

bestTest = 239.7486214
bestIteration = 1475

Shrink model to first 1476 iterations.
=== FOLD 61 ===
0:	learn: 468.1171746	test: 476.9000408	best: 476.9000408 (0)	total: 1.46ms	remaining: 2.18s


61it [00:43,  1.05it/s]

250:	learn: 161.0500821	test: 196.9560667	best: 187.8758789 (162)	total: 347ms	remaining: 1.73s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 187.8758789
bestIteration = 162

Shrink model to first 163 iterations.
=== FOLD 62 ===
0:	learn: 461.8825073	test: 774.0181128	best: 774.0181128 (0)	total: 1.5ms	remaining: 2.25s


62it [00:44,  1.24it/s]

250:	learn: 160.6931809	test: 332.1237875	best: 323.9160277 (85)	total: 353ms	remaining: 1.76s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 323.9160277
bestIteration = 85

Shrink model to first 86 iterations.
=== FOLD 63 ===
0:	learn: 468.5328558	test: 384.3156357	best: 384.3156357 (0)	total: 1.59ms	remaining: 2.38s


63it [00:44,  1.43it/s]

250:	learn: 155.8144051	test: 101.7201862	best: 95.9287242 (86)	total: 343ms	remaining: 1.71s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 95.9287242
bestIteration = 86

Shrink model to first 87 iterations.
=== FOLD 64 ===
0:	learn: 474.4295580	test: 410.1200783	best: 410.1200783 (0)	total: 1.81ms	remaining: 2.71s


64it [00:45,  1.65it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 170.4736105
bestIteration = 48

Shrink model to first 49 iterations.
=== FOLD 65 ===
0:	learn: 476.5310406	test: 277.5901711	best: 277.5901711 (0)	total: 1.79ms	remaining: 2.69s


65it [00:45,  1.85it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 107.8515677
bestIteration = 42

Shrink model to first 43 iterations.
=== FOLD 66 ===
0:	learn: 472.4572642	test: 506.8215048	best: 506.8215048 (0)	total: 2.56ms	remaining: 3.83s


66it [00:46,  1.99it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 128.9366531
bestIteration = 30

Shrink model to first 31 iterations.
=== FOLD 67 ===
0:	learn: 469.0409126	test: 716.8455984	best: 716.8455984 (0)	total: 1.78ms	remaining: 2.67s


67it [00:46,  2.08it/s]

250:	learn: 160.1177155	test: 311.8254870	best: 297.9654395 (71)	total: 351ms	remaining: 1.75s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 297.9654395
bestIteration = 71

Shrink model to first 72 iterations.
=== FOLD 68 ===
0:	learn: 475.0280183	test: 371.6099089	best: 371.6099089 (0)	total: 1.84ms	remaining: 2.76s


68it [00:46,  2.09it/s]

250:	learn: 165.1529477	test: 138.1602761	best: 131.7093355 (106)	total: 342ms	remaining: 1.7s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 131.7093355
bestIteration = 106

Shrink model to first 107 iterations.
=== FOLD 69 ===
0:	learn: 472.3168129	test: 283.1016738	best: 283.1016738 (0)	total: 1.48ms	remaining: 2.22s


69it [00:47,  2.17it/s]

250:	learn: 167.2121371	test: 79.1270653	best: 74.4737256 (66)	total: 344ms	remaining: 1.71s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 74.47372556
bestIteration = 66

Shrink model to first 67 iterations.
=== FOLD 70 ===
0:	learn: 471.9412989	test: 289.9508284	best: 289.9508284 (0)	total: 1.54ms	remaining: 2.3s
250:	learn: 168.9102589	test: 85.4967314	best: 84.8592433 (247)	total: 343ms	remaining: 1.71s


70it [00:48,  1.81it/s]

500:	learn: 152.4355672	test: 85.0597997	best: 84.6923480 (304)	total: 691ms	remaining: 1.38s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 84.69234798
bestIteration = 304

Shrink model to first 305 iterations.
=== FOLD 71 ===
0:	learn: 470.4210992	test: 384.2083966	best: 384.2083966 (0)	total: 1.51ms	remaining: 2.26s


71it [00:48,  1.96it/s]

250:	learn: 166.6300070	test: 111.8384080	best: 106.4209285 (54)	total: 352ms	remaining: 1.75s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 106.4209285
bestIteration = 54

Shrink model to first 55 iterations.
=== FOLD 72 ===
0:	learn: 468.0902833	test: 471.7954284	best: 471.7954284 (0)	total: 1.45ms	remaining: 2.18s
250:	learn: 164.5770417	test: 179.0691894	best: 174.7276675 (125)	total: 349ms	remaining: 1.74s


72it [00:49,  1.63it/s]

500:	learn: 146.0092394	test: 170.0018818	best: 165.8376402 (352)	total: 710ms	remaining: 1.42s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 165.8376402
bestIteration = 352

Shrink model to first 353 iterations.
=== FOLD 73 ===
0:	learn: 475.5628472	test: 334.7381795	best: 334.7381795 (0)	total: 1.78ms	remaining: 2.67s


73it [00:49,  1.76it/s]

250:	learn: 161.3747370	test: 111.3925395	best: 102.4156366 (91)	total: 352ms	remaining: 1.75s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 102.4156366
bestIteration = 91

Shrink model to first 92 iterations.
=== FOLD 74 ===
0:	learn: 467.3892888	test: 464.5710604	best: 464.5710604 (0)	total: 1.5ms	remaining: 2.25s


74it [00:50,  1.87it/s]

250:	learn: 167.8099414	test: 158.2405218	best: 150.8440888 (70)	total: 371ms	remaining: 1.84s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 150.8440888
bestIteration = 70

Shrink model to first 71 iterations.
=== FOLD 75 ===
0:	learn: 473.0771289	test: 469.1616881	best: 469.1616881 (0)	total: 2.02ms	remaining: 3.03s


75it [00:50,  1.95it/s]

250:	learn: 162.7405686	test: 242.0447006	best: 227.9813132 (93)	total: 350ms	remaining: 1.74s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 227.9813132
bestIteration = 93

Shrink model to first 94 iterations.
=== FOLD 76 ===
0:	learn: 475.3601693	test: 383.1582248	best: 383.1582248 (0)	total: 1.93ms	remaining: 2.89s
250:	learn: 162.0968184	test: 102.5635296	best: 89.9018713 (177)	total: 348ms	remaining: 1.73s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 89.90187128
bestIteration = 177

Shrink model to first 178 iterations.


76it [00:51,  1.86it/s]

=== FOLD 77 ===
0:	learn: 473.7505921	test: 455.5500389	best: 455.5500389 (0)	total: 1.91ms	remaining: 2.86s


77it [00:51,  2.00it/s]

250:	learn: 157.3672343	test: 175.1234606	best: 158.9448095 (71)	total: 348ms	remaining: 1.73s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 158.9448095
bestIteration = 71

Shrink model to first 72 iterations.
=== FOLD 78 ===
0:	learn: 468.4269727	test: 412.7866092	best: 412.7866092 (0)	total: 1.55ms	remaining: 2.33s
250:	learn: 154.7194854	test: 184.8499835	best: 179.5277294 (176)	total: 357ms	remaining: 1.77s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 179.5277294
bestIteration = 176

Shrink model to first 177 iterations.


78it [00:52,  1.90it/s]

=== FOLD 79 ===
0:	learn: 472.4088765	test: 508.1974962	best: 508.1974962 (0)	total: 1.79ms	remaining: 2.68s


79it [00:52,  2.01it/s]

250:	learn: 156.4542440	test: 176.6205645	best: 163.9359556 (68)	total: 349ms	remaining: 1.74s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 163.9359556
bestIteration = 68

Shrink model to first 69 iterations.
=== FOLD 80 ===
0:	learn: 473.0729616	test: 491.5006489	best: 491.5006489 (0)	total: 1.97ms	remaining: 2.95s


80it [00:53,  2.09it/s]

250:	learn: 165.4920633	test: 186.9263794	best: 172.6306878 (71)	total: 351ms	remaining: 1.75s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 172.6306878
bestIteration = 71

Shrink model to first 72 iterations.
=== FOLD 81 ===
0:	learn: 468.0186960	test: 464.4041466	best: 464.4041466 (0)	total: 1.57ms	remaining: 2.35s


81it [00:53,  2.14it/s]

250:	learn: 155.1184832	test: 192.0280611	best: 166.8526812 (55)	total: 376ms	remaining: 1.87s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 166.8526812
bestIteration = 55

Shrink model to first 56 iterations.
=== FOLD 82 ===
0:	learn: 475.0096825	test: 380.2739381	best: 380.2739381 (0)	total: 1.81ms	remaining: 2.72s
250:	learn: 158.6798467	test: 198.8806529	best: 196.7560768 (228)	total: 376ms	remaining: 1.87s
500:	learn: 141.8745261	test: 192.2225962	best: 192.2225962 (500)	total: 732ms	remaining: 1.46s
750:	learn: 130.1062390	test: 188.9738255	best: 187.8633772 (712)	total: 1.09s	remaining: 1.09s


82it [00:55,  1.27it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 186.5173011
bestIteration = 783

Shrink model to first 784 iterations.
=== FOLD 83 ===
0:	learn: 463.3056114	test: 705.0132594	best: 705.0132594 (0)	total: 1.57ms	remaining: 2.36s
250:	learn: 167.5520798	test: 239.8522525	best: 236.7294074 (181)	total: 376ms	remaining: 1.87s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 236.7294074
bestIteration = 181

Shrink model to first 182 iterations.


83it [00:55,  1.35it/s]

=== FOLD 84 ===
0:	learn: 473.8410293	test: 426.9162131	best: 426.9162131 (0)	total: 1.82ms	remaining: 2.72s


84it [00:56,  1.55it/s]

250:	learn: 162.0316522	test: 185.8289816	best: 175.4943420 (63)	total: 353ms	remaining: 1.75s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 175.494342
bestIteration = 63

Shrink model to first 64 iterations.
=== FOLD 85 ===
0:	learn: 475.7561694	test: 320.1115941	best: 320.1115941 (0)	total: 1.76ms	remaining: 2.63s


85it [00:56,  1.71it/s]

250:	learn: 159.0807555	test: 162.0615909	best: 149.8667269 (71)	total: 360ms	remaining: 1.79s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 149.8667269
bestIteration = 71

Shrink model to first 72 iterations.
=== FOLD 86 ===
0:	learn: 468.7673508	test: 404.3111264	best: 404.3111264 (0)	total: 1.5ms	remaining: 2.25s
250:	learn: 158.0924984	test: 250.3432092	best: 246.0108063 (177)	total: 339ms	remaining: 1.69s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 246.0108063
bestIteration = 177

Shrink model to first 178 iterations.


86it [00:57,  1.72it/s]

=== FOLD 87 ===
0:	learn: 468.0467007	test: 758.1471645	best: 758.1471645 (0)	total: 1.94ms	remaining: 2.9s


87it [00:57,  1.76it/s]

250:	learn: 159.5309078	test: 221.2213070	best: 212.5016854 (144)	total: 350ms	remaining: 1.74s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 212.5016854
bestIteration = 144

Shrink model to first 145 iterations.
=== FOLD 88 ===
0:	learn: 465.2057788	test: 563.1655674	best: 563.1655674 (0)	total: 1.52ms	remaining: 2.27s


88it [00:58,  1.82it/s]

250:	learn: 158.2209967	test: 146.9598848	best: 129.4301352 (91)	total: 388ms	remaining: 1.93s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 129.4301352
bestIteration = 91

Shrink model to first 92 iterations.
=== FOLD 89 ===
0:	learn: 468.8191373	test: 375.5293416	best: 375.5293416 (0)	total: 1.45ms	remaining: 2.17s


89it [00:58,  1.97it/s]

250:	learn: 160.5687521	test: 148.8365523	best: 124.0302934 (65)	total: 340ms	remaining: 1.69s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 124.0302934
bestIteration = 65

Shrink model to first 66 iterations.
=== FOLD 90 ===
0:	learn: 474.7054723	test: 383.0013989	best: 383.0013989 (0)	total: 2.11ms	remaining: 3.17s
250:	learn: 158.8413035	test: 108.1457512	best: 104.5052409 (210)	total: 338ms	remaining: 1.68s


90it [00:59,  1.83it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 104.5052409
bestIteration = 210

Shrink model to first 211 iterations.
=== FOLD 91 ===
0:	learn: 469.2829015	test: 457.3845691	best: 457.3845691 (0)	total: 1.48ms	remaining: 2.22s
250:	learn: 163.4143915	test: 100.9562370	best: 99.1163457 (231)	total: 352ms	remaining: 1.75s


91it [01:00,  1.64it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 95.51376966
bestIteration = 289

Shrink model to first 290 iterations.
=== FOLD 92 ===
0:	learn: 474.8633433	test: 390.9929392	best: 390.9929392 (0)	total: 2.02ms	remaining: 3.02s


92it [01:00,  1.74it/s]

250:	learn: 158.0222167	test: 144.8101212	best: 136.9928038 (118)	total: 346ms	remaining: 1.72s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 136.9928038
bestIteration = 118

Shrink model to first 119 iterations.
=== FOLD 93 ===
0:	learn: 475.0630012	test: 361.6118084	best: 361.6118084 (0)	total: 1.88ms	remaining: 2.82s
250:	learn: 157.5026971	test: 174.7122210	best: 168.6689650 (160)	total: 342ms	remaining: 1.7s
500:	learn: 140.8479672	test: 168.4824066	best: 167.9677706 (340)	total: 689ms	remaining: 1.37s


93it [01:01,  1.37it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 166.3980872
bestIteration = 527

Shrink model to first 528 iterations.
=== FOLD 94 ===
0:	learn: 468.6069798	test: 450.0932480	best: 450.0932480 (0)	total: 1.45ms	remaining: 2.17s


94it [01:02,  1.57it/s]

250:	learn: 161.5956323	test: 101.6021092	best: 100.6399138 (64)	total: 352ms	remaining: 1.75s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 100.6399138
bestIteration = 64

Shrink model to first 65 iterations.
=== FOLD 95 ===
0:	learn: 463.9697303	test: 770.7246005	best: 770.7246005 (0)	total: 1.66ms	remaining: 2.48s
250:	learn: 163.1223934	test: 387.1256176	best: 385.6874951 (242)	total: 350ms	remaining: 1.74s
500:	learn: 144.3209080	test: 367.1547949	best: 366.6736118 (490)	total: 704ms	remaining: 1.4s


95it [01:03,  1.31it/s]

Stopped by overfitting detector  (200 iterations wait)

bestTest = 366.6736118
bestIteration = 490

Shrink model to first 491 iterations.
=== FOLD 96 ===
0:	learn: 472.9768002	test: 487.2444017	best: 487.2444017 (0)	total: 1.8ms	remaining: 2.7s


96it [01:03,  1.48it/s]

250:	learn: 157.1651165	test: 253.1111486	best: 242.6811253 (107)	total: 341ms	remaining: 1.7s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 242.6811253
bestIteration = 107

Shrink model to first 108 iterations.
=== FOLD 97 ===
0:	learn: 468.1471666	test: 768.9844938	best: 768.9844938 (0)	total: 1.85ms	remaining: 2.78s


97it [01:04,  1.56it/s]

250:	learn: 160.8913591	test: 306.3823496	best: 295.8754203 (149)	total: 350ms	remaining: 1.74s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 295.8754203
bestIteration = 149

Shrink model to first 150 iterations.
=== FOLD 98 ===
0:	learn: 469.5064253	test: 669.8749852	best: 669.8749852 (0)	total: 2.11ms	remaining: 3.16s


98it [01:04,  1.65it/s]

250:	learn: 158.0932718	test: 362.4117723	best: 348.7616151 (133)	total: 350ms	remaining: 1.74s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 348.7616151
bestIteration = 133

Shrink model to first 134 iterations.
=== FOLD 99 ===
0:	learn: 469.9570684	test: 344.8380065	best: 344.8380065 (0)	total: 1.56ms	remaining: 2.35s
250:	learn: 161.7786948	test: 87.0617021	best: 82.9316239 (176)	total: 371ms	remaining: 1.85s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 82.93162392
bestIteration = 176

Shrink model to first 177 iterations.


99it [01:05,  1.65it/s]

=== FOLD 100 ===
0:	learn: 474.1986060	test: 404.6392725	best: 404.6392725 (0)	total: 1.81ms	remaining: 2.71s


100it [01:05,  1.52it/s]

250:	learn: 164.4408445	test: 177.2570605	best: 165.5944357 (51)	total: 342ms	remaining: 1.7s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 165.5944357
bestIteration = 51

Shrink model to first 52 iterations.





175.2835665566453

In [9]:
test_preds = test_preds.mean(axis=1)
test_preds.shape

(2000,)

In [10]:
sample_submission = pd.read_csv("./data/youtube/youtube_sample_submission.csv", index_col="id")
sample_submission["ViewCount"] = test_preds
sample_submission.to_csv("./submissions/youtube/sub6.csv")

In [3]:
# min_date = pd.to_datetime(datetime.datetime.strptime('01012010', "%d%m%Y").date())
# df["PublishDate"] = df["PublishDate"].apply(lambda x: datetime.datetime.strptime(x[:-6], "%Y-%m-%dT%H:%M:%S"))
# df["PublishDateDays"] = (df["PublishDate"] - min_date).dt.days
# df["PublishDateYear"] = df["PublishDate"].dt.year
# df["PublishDateMonth"] = df["PublishDate"].dt.month
# df["PublishDateDay"] = df["PublishDate"].dt.day

In [4]:
# df_test["PublishDate"] = df_test["PublishDate"].apply(lambda x: datetime.datetime.strptime(x[:-6], "%Y-%m-%dT%H:%M:%S"))
# df_test["PublishDateDays"] = (df_test["PublishDate"] - min_date).dt.days
# df_test["PublishDateYear"] = df_test["PublishDate"].dt.year
# df_test["PublishDateMonth"] = df_test["PublishDate"].dt.month
# df_test["PublishDateDay"] = df_test["PublishDate"].dt.day

In [5]:
# df.isna().sum()

Channel             0
Subtitles           0
PublishDate         0
Category            0
ViewCount           0
PublishDateDays     0
PublishDateYear     0
PublishDateMonth    0
PublishDateDay      0
dtype: int64

In [6]:
# X = df.drop(columns=["ViewCount", "Subtitles"])
# y = df["ViewCount"]

In [7]:
# X_test = df_test.drop(columns=["Subtitles"])

In [8]:
# cat_features = list()

# for col_name, dtype in X.dtypes.to_dict().items():
#     if dtype == "object" and col_name != "Subtitles":
#         cat_features.append(col_name)

# cat_features

['Channel', 'Category']

In [9]:
# n_splits = 5
# kf = KFold(n_splits=n_splits, shuffle=True, random_state=seed)
# val_scores = list()
# test_preds = np.zeros((X_test.shape[0], n_splits))

# for i, (train_index, val_index) in enumerate(tqdm(kf.split(X))):
#     print(f"=== FOLD {i + 1} ===")

#     X_train = X.iloc[train_index]
#     X_val = X.iloc[val_index]
#     y_train = y.iloc[train_index]
#     y_val = y.iloc[val_index]

#     model = CatBoostRegressor(
#         iterations=500,
#         cat_features=cat_features,
#         # text_features=["Subtitles"],
#         eval_metric="MAE",
#         # learning_rate=0.07,
#         verbose=250,
#         early_stopping_rounds=200,
#         use_best_model=True,
#         random_seed=seed,
#         # depth=8,
#     )
#     model.fit(X_train, y_train, eval_set=(X_val, y_val))

#     preds = model.predict(X_val)
#     test_preds[:, i] = model.predict(X_test)
    
#     val_scores.append(mean_absolute_error(y_val, preds))

# sum(val_scores) / len(val_scores)

0it [00:00, ?it/s]

=== FOLD 1 ===
Learning rate set to 0.099023
0:	learn: 474.6785850	test: 455.6021404	best: 455.6021404 (0)	total: 50.6ms	remaining: 25.3s
250:	learn: 155.7788338	test: 201.1125630	best: 201.0719664 (249)	total: 367ms	remaining: 364ms


1it [00:00,  1.27it/s]

499:	learn: 133.6669297	test: 203.8270172	best: 199.9057328 (361)	total: 702ms	remaining: 0us

bestTest = 199.9057328
bestIteration = 361

Shrink model to first 362 iterations.
=== FOLD 2 ===
Learning rate set to 0.099023
0:	learn: 454.7121915	test: 512.4080574	best: 512.4080574 (0)	total: 1.77ms	remaining: 886ms


2it [00:01,  1.79it/s]

250:	learn: 158.4131262	test: 216.7476868	best: 206.1990357 (71)	total: 318ms	remaining: 316ms
Stopped by overfitting detector  (200 iterations wait)

bestTest = 206.1990357
bestIteration = 71

Shrink model to first 72 iterations.
=== FOLD 3 ===
Learning rate set to 0.099023
0:	learn: 478.4011859	test: 452.5670566	best: 452.5670566 (0)	total: 1.85ms	remaining: 924ms


3it [00:01,  2.05it/s]

250:	learn: 153.4036972	test: 187.7742600	best: 180.3753788 (77)	total: 317ms	remaining: 314ms
Stopped by overfitting detector  (200 iterations wait)

bestTest = 180.3753788
bestIteration = 77

Shrink model to first 78 iterations.
=== FOLD 4 ===
Learning rate set to 0.099023
0:	learn: 487.4133552	test: 450.2770043	best: 450.2770043 (0)	total: 1.67ms	remaining: 835ms


4it [00:02,  2.04it/s]

250:	learn: 167.2952062	test: 178.5031845	best: 175.0625532 (142)	total: 312ms	remaining: 310ms
Stopped by overfitting detector  (200 iterations wait)

bestTest = 175.0625532
bestIteration = 142

Shrink model to first 143 iterations.
=== FOLD 5 ===
Learning rate set to 0.099027
0:	learn: 457.6805473	test: 487.2536470	best: 487.2536470 (0)	total: 1.57ms	remaining: 784ms


5it [00:02,  2.00it/s]

250:	learn: 151.6950821	test: 195.0853830	best: 190.8870395 (89)	total: 317ms	remaining: 315ms
Stopped by overfitting detector  (200 iterations wait)

bestTest = 190.8870395
bestIteration = 89

Shrink model to first 90 iterations.





190.4859490048181