In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import OneHotEncoder
from sklearn.base import BaseEstimator
import nltk
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
from scipy.sparse import hstack

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.multioutput import MultiOutputRegressor
from catboost import CatBoostRegressor, Pool


pd.options.display.float_format = '{:,.2f}'.format
sns.set_theme()
nltk.download("stopwords");

RANDOM_STATE = 44

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
train_df = pd.read_csv("data/train_dataset_train.csv", index_col=0)
test_df = pd.read_csv("data/test_dataset_test.csv", index_col=0)

train_df["is_train"] = 1
test_df["is_train"] = 0

df = pd.concat([train_df, test_df])

In [3]:
class RBKpreprocessor(BaseEstimator):
    def __init__(self):
        self._vectorizer_tags = TfidfVectorizer()
        self._vectorizer_authors = TfidfVectorizer()
        self._category_ohe = OneHotEncoder()
        self._category_from_title_ohe = OneHotEncoder()
        self._stop_words = stopwords.words("russian")
        self._stemmer = SnowballStemmer("russian")
        self._vectorizer_title = TfidfVectorizer()
        
        
    def _clean_list(self, title):
        return(title.
           replace("[", "").
           replace("]", "").
           replace(".", "").
           replace("'", "").
           replace(",", " ")
          )
    
    def _clean_title(self, title):
        if title.find("\n")>0:
            title = title[0:title.find("\n\n")].lower()
        title = " ".join([self._stemmer.stem(w) for w in title.split() if w not in self._stop_words])
        return title

    def _find_category_in_title(self, title):
        if title.find("\n")>0:
            title = title[title.find("\n\n"):].lower().strip()
        else:
            title = ""
        if "," in title:
            title = title[0:title.index(",")]
        else:
            title = ""

        return title
    
    def fit(self, df):
        self._category_ohe.fit(df.category.values.reshape(-1,1))
        
        authors_clean = df.authors.apply(self._clean_list)
        tags_clean = df.tags.apply(self._clean_list)
        self._vectorizer_tags.fit(tags_clean)
        self._vectorizer_authors.fit(authors_clean);
        
        title_clean = df.title.apply(self._clean_title)
        category_from_title =  df.title.apply(self._find_category_in_title)
        self._category_from_title_ohe.fit(category_from_title.values.reshape(-1,1))
        self._vectorizer_title.fit(title_clean)
        
        return(self)
        
        
    def transform(self, df):
        ctr_zero = (df.ctr == 0)
        ctr_log = np.log(df.ctr)
        mean_ctr_log = np.mean(ctr_log.values, where=(ctr_log != -np.inf))
        ctr_log = np.where(df["ctr"] == 0, mean_ctr_log, ctr_log)
        
        category_sparse = self._category_ohe.transform(df.category.values.reshape(-1,1))
        
        authors_clean = df.authors.apply(self._clean_list)
        tags_clean = df.tags.apply(self._clean_list)
        
        authors_count = authors_clean.apply(lambda x: len(x.split()))
        tags_count = tags_clean.apply(lambda x: len(x.split()))
        
        tags_sparse = self._vectorizer_tags.transform(tags_clean)
        authors_sparse = self._vectorizer_authors.transform(authors_clean)
        
        publish_date = pd.to_datetime(df.publish_date)
        publish_year = publish_date.dt.year * 100 + publish_date.dt.month
        publish_day = publish_date.dt.day
        publish_weekday = publish_date.dt.weekday
        publish_hour = publish_date.dt.hour
        
        title_clean = df.title.apply(self._clean_title)
        title_sparse  = self._vectorizer_title.transform(title_clean)
        
        category_from_title =  df.title.apply(self._find_category_in_title)
        category_from_title_sparse = self._category_from_title_ohe.transform(category_from_title.values.reshape(-1,1))
        
        return hstack([
            ctr_zero.values[:,None],
            ctr_log[:,None],
            category_sparse,
            authors_count.values[:,None],
            tags_count.values[:,None],
            tags_sparse,
            authors_sparse,
            publish_year.values[:,None],
            publish_day.values[:,None],
            publish_weekday.values[:,None],
            publish_hour.values[:,None],
            title_sparse,
            category_from_title_sparse
        ])
        
def my_metric(y_real, preds, detail=True):
    overall = (
            0.4*r2_score(y_real.iloc[:,0], preds[:,0]) + 
            0.3*r2_score(y_real.iloc[:,1], preds[:,1]) + 
            0.3*r2_score(y_real.iloc[:,2], preds[:,2])
        )
    
    if not detail:
        return overall
    else:
        return (
            overall,
            0.4*r2_score(y_real.iloc[:,0], preds[:,0]), 
            0.3*r2_score(y_real.iloc[:,1], preds[:,1]), 
            0.3*r2_score(y_real.iloc[:,2], preds[:,2])
        )
    
def write_down_predictions(preds, output_file = "output.csv"):
    solution = pd.read_csv("data\sample_solution.csv")
    solution.iloc[:,1:4] = preds
    solution.to_csv(output_file, index=False)

In [4]:
%%time
preprocess = RBKpreprocessor()
preprocess.fit(df)

Wall time: 1min 9s


RBKpreprocessor()

## Локальная валидация

In [5]:
valid_train_df, valid_test_df = train_test_split(train_df, test_size=0.2, random_state=RANDOM_STATE, shuffle=True)

In [6]:
%%time
X_train = preprocess.transform(valid_train_df)
y_train = valid_train_df[["views", "depth", "full_reads_percent"]]

X_test = preprocess.transform(valid_test_df)
y_test = valid_test_df[["views", "depth", "full_reads_percent"]]



  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


Wall time: 44.9 s


## CatboostRegressor

In [19]:
model = CatBoostRegressor(iterations=20,
                          early_stopping_rounds=3,
                          random_seed=RANDOM_STATE,
                          depth=10, 
                          loss_function='MultiRMSE',
                          verbose=True)

In [20]:
%%time
model.fit(X_train, y_train, eval_set=Pool(data=X_test, label=y_test))

Custom logger is already specified. Specify more than one logger at same time is not thread safe.Got unsafe target value = 2.5542e+06 at object #334 of dataset learn
Got unsafe target value = 2.5542e+06 at object #164 of dataset test #0


0:	learn: 85530.7791244	test: 125539.0041544	best: 125539.0041544 (0)	total: 1.79s	remaining: 33.9s
1:	learn: 85024.6884090	test: 125272.6614669	best: 125272.6614669 (1)	total: 7.08s	remaining: 1m 3s
2:	learn: 84356.0603643	test: 125010.3365896	best: 125010.3365896 (2)	total: 12.4s	remaining: 1m 10s
3:	learn: 84014.8571342	test: 124872.1330171	best: 124872.1330171 (3)	total: 17s	remaining: 1m 7s
4:	learn: 83358.3165231	test: 124596.8324311	best: 124596.8324311 (4)	total: 21.6s	remaining: 1m 4s
5:	learn: 82504.1805773	test: 124291.3635973	best: 124291.3635973 (5)	total: 26s	remaining: 1m
6:	learn: 81882.9984517	test: 124203.0536857	best: 124203.0536857 (6)	total: 30.9s	remaining: 57.4s
7:	learn: 81132.5591418	test: 123852.8156653	best: 123852.8156653 (7)	total: 35.1s	remaining: 52.7s
8:	learn: 80584.3516082	test: 123219.5671855	best: 123219.5671855 (8)	total: 39.4s	remaining: 48.1s
9:	learn: 80403.7560937	test: 122968.7477808	best: 122968.7477808 (9)	total: 43.5s	remaining: 43.5s
10:	le

<catboost.core.CatBoostRegressor at 0x239824b1f40>

In [21]:
preds = model.predict(X_test)
my_metric(y_test, preds)

(0.08884265310537756,
 0.033680461713825154,
 0.046408845992440474,
 0.008753345399111934)

In [22]:
model = CatBoostRegressor(iterations=50,
                          early_stopping_rounds=3,
                          random_seed=RANDOM_STATE,
                          depth=10, 
                          loss_function='MultiRMSE')

In [23]:
%%time
model.fit(X_train, y_train, eval_set=Pool(data=X_test, label=y_test))
preds = model.predict(X_test)
my_metric(y_test, preds)

Got unsafe target value = 2.5542e+06 at object #334 of dataset learn
Got unsafe target value = 2.5542e+06 at object #164 of dataset test #0


0:	learn: 85530.7791244	test: 125539.0041544	best: 125539.0041544 (0)	total: 1.09s	remaining: 53.3s
1:	learn: 85024.6884090	test: 125272.6614669	best: 125272.6614669 (1)	total: 2.31s	remaining: 55.3s
2:	learn: 84356.0603643	test: 125010.3365896	best: 125010.3365896 (2)	total: 7.32s	remaining: 1m 54s
3:	learn: 84014.8571342	test: 124872.1330171	best: 124872.1330171 (3)	total: 13.5s	remaining: 2m 35s
4:	learn: 83358.3165231	test: 124596.8324311	best: 124596.8324311 (4)	total: 18.3s	remaining: 2m 44s
5:	learn: 82504.1805773	test: 124291.3635973	best: 124291.3635973 (5)	total: 22.7s	remaining: 2m 46s
6:	learn: 81882.9984517	test: 124203.0536857	best: 124203.0536857 (6)	total: 26.8s	remaining: 2m 44s
7:	learn: 81132.5591418	test: 123852.8156653	best: 123852.8156653 (7)	total: 31.6s	remaining: 2m 45s
8:	learn: 80584.3516082	test: 123219.5671855	best: 123219.5671855 (8)	total: 35.8s	remaining: 2m 43s
9:	learn: 80403.7560937	test: 122968.7477808	best: 122968.7477808 (9)	total: 40.2s	remaining:

(0.17971151254219747,
 0.06627755569599025,
 0.09320289876036893,
 0.020231058085838315)

## А если сделать по одному бустинку на каждый выход, и метрику r2 оптимизировать

In [35]:
model1 = CatBoostRegressor(iterations=20,
                          early_stopping_rounds=3,
                          random_seed=RANDOM_STATE,
                          depth=10, 
                          eval_metric="R2",
                          loss_function="RMSE")
model2 = model1.copy()
model3 = model1.copy()

In [36]:
%%time
model1.fit(X_train, y_train.iloc[:,0], eval_set=Pool(data=X_test, label=y_test.iloc[:,0]))

Learning rate set to 0.5
0:	learn: 0.1457455	test: 0.0690903	best: 0.0690903 (0)	total: 450ms	remaining: 8.55s
1:	learn: 0.3192358	test: 0.1102355	best: 0.1102355 (1)	total: 867ms	remaining: 7.8s
2:	learn: 0.5451028	test: 0.2205832	best: 0.2205832 (2)	total: 1.28s	remaining: 7.27s
3:	learn: 0.6191707	test: 0.2545967	best: 0.2545967 (3)	total: 1.68s	remaining: 6.71s
4:	learn: 0.7028892	test: 0.3093535	best: 0.3093535 (4)	total: 2.1s	remaining: 6.31s
5:	learn: 0.7255360	test: 0.3272761	best: 0.3272761 (5)	total: 2.58s	remaining: 6.01s
6:	learn: 0.7407832	test: 0.3408906	best: 0.3408906 (6)	total: 3.44s	remaining: 6.4s
7:	learn: 0.7551583	test: 0.3519802	best: 0.3519802 (7)	total: 5.25s	remaining: 7.87s
8:	learn: 0.7656924	test: 0.3593509	best: 0.3593509 (8)	total: 7.54s	remaining: 9.22s
9:	learn: 0.7752481	test: 0.3665365	best: 0.3665365 (9)	total: 9.5s	remaining: 9.5s
10:	learn: 0.8156690	test: 0.4046898	best: 0.4046898 (10)	total: 11.7s	remaining: 9.57s
11:	learn: 0.8353679	test: 0.417

<catboost.core.CatBoostRegressor at 0x23982105a00>

In [37]:
%%time
model2.fit(X_train, y_train.iloc[:,1], eval_set=Pool(data=X_test, label=y_test.iloc[:,1]))

Learning rate set to 0.5
0:	learn: 0.4617745	test: 0.4599649	best: 0.4599649 (0)	total: 803ms	remaining: 15.3s
1:	learn: 0.6096927	test: 0.5611509	best: 0.5611509 (1)	total: 2.1s	remaining: 18.9s
2:	learn: 0.6751672	test: 0.6270159	best: 0.6270159 (2)	total: 3.53s	remaining: 20s
3:	learn: 0.7353214	test: 0.7108072	best: 0.7108072 (3)	total: 5.01s	remaining: 20s
4:	learn: 0.7431803	test: 0.7170892	best: 0.7170892 (4)	total: 6.92s	remaining: 20.8s
5:	learn: 0.7481745	test: 0.7170847	best: 0.7170892 (4)	total: 8.34s	remaining: 19.5s
6:	learn: 0.7532388	test: 0.7217736	best: 0.7217736 (6)	total: 9.8s	remaining: 18.2s
7:	learn: 0.7761962	test: 0.7355344	best: 0.7355344 (7)	total: 11.3s	remaining: 17s
8:	learn: 0.7800880	test: 0.7388071	best: 0.7388071 (8)	total: 12.7s	remaining: 15.5s
9:	learn: 0.7897370	test: 0.7443755	best: 0.7443755 (9)	total: 14.4s	remaining: 14.4s
10:	learn: 0.7932726	test: 0.7466024	best: 0.7466024 (10)	total: 15.7s	remaining: 12.9s
11:	learn: 0.7969714	test: 0.747523

<catboost.core.CatBoostRegressor at 0x239821059a0>

In [38]:
%%time
model3.fit(X_train, y_train.iloc[:,2], eval_set=Pool(data=X_test, label=y_test.iloc[:,2]))

Learning rate set to 0.5
0:	learn: 0.1899170	test: 0.1629200	best: 0.1629200 (0)	total: 1.81s	remaining: 34.5s
1:	learn: 0.2140387	test: 0.1630573	best: 0.1630573 (1)	total: 3.64s	remaining: 32.8s
2:	learn: 0.2328228	test: 0.1627615	best: 0.1630573 (1)	total: 5.54s	remaining: 31.4s
3:	learn: 0.3018429	test: 0.2168630	best: 0.2168630 (3)	total: 6.94s	remaining: 27.7s
4:	learn: 0.3139694	test: 0.2140174	best: 0.2168630 (3)	total: 8.49s	remaining: 25.5s
5:	learn: 0.3501655	test: 0.2402478	best: 0.2402478 (5)	total: 9.99s	remaining: 23.3s
6:	learn: 0.3574774	test: 0.2404505	best: 0.2404505 (6)	total: 11.6s	remaining: 21.5s
7:	learn: 0.3630384	test: 0.2403459	best: 0.2404505 (6)	total: 13.1s	remaining: 19.7s
8:	learn: 0.3931606	test: 0.2643137	best: 0.2643137 (8)	total: 14.5s	remaining: 17.8s
9:	learn: 0.3969076	test: 0.2588657	best: 0.2643137 (8)	total: 16.1s	remaining: 16.1s
10:	learn: 0.4003125	test: 0.2523575	best: 0.2643137 (8)	total: 17.6s	remaining: 14.4s
11:	learn: 0.4278480	test: 0

<catboost.core.CatBoostRegressor at 0x23982105d60>

In [42]:
preds = np.c_[
    model1.predict(X_test),
    model2.predict(X_test),
    model3.predict(X_test)
]
my_metric(y_test, preds)

(0.5000721823769023,
 0.17981116553200197,
 0.2285315081366026,
 0.09172950870829773)

## Выводы
Ну кажется, что оптимизация каждого параметра по отдельности более перспективна, чем считать одну модель на 3 предсказания

> Попробуем сделать обучение модели на этих параметрах, но на 2000 итерациях каждую (В разных ноутбуках)
> А потом можно будет  подобрать оптимальные гиперпараметры  для каждого признака

## Обучение дерева

In [7]:
GOAL_NUM = 2

In [8]:
model = model1 = CatBoostRegressor(iterations=1000,
                          early_stopping_rounds=30,
                          random_seed=RANDOM_STATE,
                          depth=10, 
                          eval_metric="R2",
                          loss_function="RMSE")

In [9]:
model.fit(X_train, 
          y_train.iloc[:,GOAL_NUM], 
          eval_set=Pool(
              data=X_test, 
              label=y_test.iloc[:,GOAL_NUM])
         )

Learning rate set to 0.063811
0:	learn: 0.0302774	test: 0.0260862	best: 0.0260862 (0)	total: 5.28s	remaining: 1h 27m 57s
1:	learn: 0.0571963	test: 0.0507386	best: 0.0507386 (1)	total: 11.6s	remaining: 1h 36m 17s
2:	learn: 0.0820307	test: 0.0726763	best: 0.0726763 (2)	total: 16.6s	remaining: 1h 32m 8s
3:	learn: 0.1035751	test: 0.0908567	best: 0.0908567 (3)	total: 21.7s	remaining: 1h 29m 52s
4:	learn: 0.1236488	test: 0.1077133	best: 0.1077133 (4)	total: 27.7s	remaining: 1h 31m 49s
5:	learn: 0.1433692	test: 0.1242434	best: 0.1242434 (5)	total: 33.8s	remaining: 1h 33m 26s
6:	learn: 0.1584868	test: 0.1365850	best: 0.1365850 (6)	total: 39.7s	remaining: 1h 33m 44s
7:	learn: 0.1722411	test: 0.1468264	best: 0.1468264 (7)	total: 45.8s	remaining: 1h 34m 38s
8:	learn: 0.1849386	test: 0.1569947	best: 0.1569947 (8)	total: 50.1s	remaining: 1h 31m 59s
9:	learn: 0.1973622	test: 0.1662315	best: 0.1662315 (9)	total: 54.2s	remaining: 1h 29m 28s
10:	learn: 0.2004274	test: 0.1661092	best: 0.1662315 (9)	tota

88:	learn: 0.3977224	test: 0.2311720	best: 0.2319230 (84)	total: 7m 15s	remaining: 1h 14m 13s
89:	learn: 0.3981498	test: 0.2298039	best: 0.2319230 (84)	total: 7m 19s	remaining: 1h 14m 1s
90:	learn: 0.3985146	test: 0.2298167	best: 0.2319230 (84)	total: 7m 23s	remaining: 1h 13m 48s
91:	learn: 0.4010838	test: 0.2318083	best: 0.2319230 (84)	total: 7m 27s	remaining: 1h 13m 33s
92:	learn: 0.4049653	test: 0.2346876	best: 0.2346876 (92)	total: 7m 31s	remaining: 1h 13m 19s
93:	learn: 0.4072776	test: 0.2358235	best: 0.2358235 (93)	total: 7m 35s	remaining: 1h 13m 7s
94:	learn: 0.4076846	test: 0.2345728	best: 0.2358235 (93)	total: 7m 39s	remaining: 1h 12m 55s
95:	learn: 0.4080777	test: 0.2346080	best: 0.2358235 (93)	total: 7m 43s	remaining: 1h 12m 42s
96:	learn: 0.4083749	test: 0.2346636	best: 0.2358235 (93)	total: 7m 47s	remaining: 1h 12m 30s
97:	learn: 0.4089088	test: 0.2347708	best: 0.2358235 (93)	total: 7m 51s	remaining: 1h 12m 18s
98:	learn: 0.4092488	test: 0.2334354	best: 0.2358235 (93)	tota

175:	learn: 0.5213150	test: 0.3013220	best: 0.3013220 (175)	total: 13m 8s	remaining: 1h 1m 29s
176:	learn: 0.5216114	test: 0.3013983	best: 0.3013983 (176)	total: 13m 12s	remaining: 1h 1m 23s
177:	learn: 0.5231839	test: 0.3026185	best: 0.3026185 (177)	total: 13m 15s	remaining: 1h 1m 15s
178:	learn: 0.5234632	test: 0.3026977	best: 0.3026977 (178)	total: 13m 19s	remaining: 1h 1m 9s
179:	learn: 0.5246063	test: 0.3035129	best: 0.3035129 (179)	total: 13m 23s	remaining: 1h 1m 2s
180:	learn: 0.5259017	test: 0.3042418	best: 0.3042418 (180)	total: 13m 27s	remaining: 1h 55s
181:	learn: 0.5276415	test: 0.3051975	best: 0.3051975 (181)	total: 13m 31s	remaining: 1h 48s
182:	learn: 0.5288040	test: 0.3060636	best: 0.3060636 (182)	total: 13m 35s	remaining: 1h 42s
183:	learn: 0.5304427	test: 0.3071612	best: 0.3071612 (183)	total: 13m 40s	remaining: 1h 37s
184:	learn: 0.5320596	test: 0.3079357	best: 0.3079357 (184)	total: 13m 44s	remaining: 1h 30s
185:	learn: 0.5335015	test: 0.3086779	best: 0.3086779 (185

263:	learn: 0.6064752	test: 0.3397673	best: 0.3397673 (263)	total: 17m 44s	remaining: 49m 28s
264:	learn: 0.6072697	test: 0.3400490	best: 0.3400490 (264)	total: 17m 47s	remaining: 49m 20s
265:	learn: 0.6079260	test: 0.3399734	best: 0.3400490 (264)	total: 17m 50s	remaining: 49m 12s
266:	learn: 0.6085909	test: 0.3403387	best: 0.3403387 (266)	total: 17m 53s	remaining: 49m 5s
267:	learn: 0.6099539	test: 0.3408547	best: 0.3408547 (267)	total: 17m 55s	remaining: 48m 58s
268:	learn: 0.6101395	test: 0.3408991	best: 0.3408991 (268)	total: 17m 58s	remaining: 48m 50s
269:	learn: 0.6113384	test: 0.3413975	best: 0.3413975 (269)	total: 18m 1s	remaining: 48m 43s
270:	learn: 0.6118859	test: 0.3415145	best: 0.3415145 (270)	total: 18m 4s	remaining: 48m 37s
271:	learn: 0.6125380	test: 0.3415208	best: 0.3415208 (271)	total: 18m 7s	remaining: 48m 29s
272:	learn: 0.6131672	test: 0.3419043	best: 0.3419043 (272)	total: 18m 9s	remaining: 48m 22s
273:	learn: 0.6134496	test: 0.3412482	best: 0.3419043 (272)	total

351:	learn: 0.6512811	test: 0.3517559	best: 0.3517559 (351)	total: 21m 46s	remaining: 40m 6s
352:	learn: 0.6517744	test: 0.3519181	best: 0.3519181 (352)	total: 21m 49s	remaining: 40m
353:	learn: 0.6521479	test: 0.3520783	best: 0.3520783 (353)	total: 21m 52s	remaining: 39m 55s
354:	learn: 0.6523722	test: 0.3520381	best: 0.3520783 (353)	total: 21m 55s	remaining: 39m 50s
355:	learn: 0.6526012	test: 0.3520640	best: 0.3520783 (353)	total: 21m 58s	remaining: 39m 45s
356:	learn: 0.6531043	test: 0.3522148	best: 0.3522148 (356)	total: 22m 1s	remaining: 39m 39s
357:	learn: 0.6534655	test: 0.3521221	best: 0.3522148 (356)	total: 22m 3s	remaining: 39m 34s
358:	learn: 0.6537891	test: 0.3520929	best: 0.3522148 (356)	total: 22m 6s	remaining: 39m 28s
359:	learn: 0.6539459	test: 0.3521536	best: 0.3522148 (356)	total: 22m 9s	remaining: 39m 23s
360:	learn: 0.6542371	test: 0.3522983	best: 0.3522983 (360)	total: 22m 12s	remaining: 39m 18s
361:	learn: 0.6545171	test: 0.3523440	best: 0.3523440 (361)	total: 22

439:	learn: 0.6828984	test: 0.3569668	best: 0.3571654 (437)	total: 25m 55s	remaining: 32m 59s
440:	learn: 0.6832760	test: 0.3573006	best: 0.3573006 (440)	total: 25m 57s	remaining: 32m 54s
441:	learn: 0.6835204	test: 0.3572803	best: 0.3573006 (440)	total: 26m	remaining: 32m 50s
442:	learn: 0.6836956	test: 0.3572821	best: 0.3573006 (440)	total: 26m 3s	remaining: 32m 45s
443:	learn: 0.6839754	test: 0.3572216	best: 0.3573006 (440)	total: 26m 6s	remaining: 32m 41s
444:	learn: 0.6843478	test: 0.3571367	best: 0.3573006 (440)	total: 26m 9s	remaining: 32m 37s
445:	learn: 0.6847697	test: 0.3573053	best: 0.3573053 (445)	total: 26m 12s	remaining: 32m 33s
446:	learn: 0.6849657	test: 0.3573180	best: 0.3573180 (446)	total: 26m 15s	remaining: 32m 29s
447:	learn: 0.6852522	test: 0.3574533	best: 0.3574533 (447)	total: 26m 18s	remaining: 32m 24s
448:	learn: 0.6862512	test: 0.3574638	best: 0.3574638 (448)	total: 26m 21s	remaining: 32m 20s
449:	learn: 0.6867546	test: 0.3577383	best: 0.3577383 (449)	total: 

527:	learn: 0.7106572	test: 0.3628086	best: 0.3628983 (526)	total: 30m 21s	remaining: 27m 8s
528:	learn: 0.7112571	test: 0.3629643	best: 0.3629643 (528)	total: 30m 24s	remaining: 27m 4s
529:	learn: 0.7114686	test: 0.3628886	best: 0.3629643 (528)	total: 30m 28s	remaining: 27m 1s
530:	learn: 0.7115782	test: 0.3628507	best: 0.3629643 (528)	total: 30m 30s	remaining: 26m 57s
531:	learn: 0.7120116	test: 0.3631195	best: 0.3631195 (531)	total: 30m 32s	remaining: 26m 52s
532:	learn: 0.7125329	test: 0.3630828	best: 0.3631195 (531)	total: 30m 35s	remaining: 26m 47s
533:	learn: 0.7126751	test: 0.3630686	best: 0.3631195 (531)	total: 30m 37s	remaining: 26m 43s
534:	learn: 0.7130764	test: 0.3628743	best: 0.3631195 (531)	total: 30m 39s	remaining: 26m 38s
535:	learn: 0.7132208	test: 0.3629760	best: 0.3631195 (531)	total: 30m 41s	remaining: 26m 33s
536:	learn: 0.7134064	test: 0.3629006	best: 0.3631195 (531)	total: 30m 43s	remaining: 26m 29s
537:	learn: 0.7139757	test: 0.3633511	best: 0.3633511 (537)	tot

615:	learn: 0.7344073	test: 0.3673907	best: 0.3674492 (614)	total: 33m 35s	remaining: 20m 56s
616:	learn: 0.7348014	test: 0.3672211	best: 0.3674492 (614)	total: 33m 36s	remaining: 20m 51s
617:	learn: 0.7350895	test: 0.3670396	best: 0.3674492 (614)	total: 33m 38s	remaining: 20m 47s
618:	learn: 0.7352950	test: 0.3671643	best: 0.3674492 (614)	total: 33m 39s	remaining: 20m 43s
619:	learn: 0.7356007	test: 0.3671880	best: 0.3674492 (614)	total: 33m 41s	remaining: 20m 39s
620:	learn: 0.7356999	test: 0.3672137	best: 0.3674492 (614)	total: 33m 43s	remaining: 20m 35s
621:	learn: 0.7360543	test: 0.3671589	best: 0.3674492 (614)	total: 33m 45s	remaining: 20m 31s
622:	learn: 0.7363458	test: 0.3674524	best: 0.3674524 (622)	total: 33m 47s	remaining: 20m 27s
623:	learn: 0.7364423	test: 0.3674555	best: 0.3674555 (623)	total: 33m 49s	remaining: 20m 23s
624:	learn: 0.7367912	test: 0.3678056	best: 0.3678056 (624)	total: 33m 52s	remaining: 20m 19s
625:	learn: 0.7369993	test: 0.3678486	best: 0.3678486 (625)	

<catboost.core.CatBoostRegressor at 0x1626eb932b0>

In [10]:
with open(f"models/cb1000-{GOAL_NUM}.pkl", "wb") as pkl_file:
     pickle.dump(model, pkl_file)