In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import OneHotEncoder
from sklearn.base import BaseEstimator
import nltk
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
from scipy.sparse import hstack

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.multioutput import MultiOutputRegressor
from catboost import CatBoostRegressor, Pool


pd.options.display.float_format = '{:,.2f}'.format
sns.set_theme()
nltk.download("stopwords");

RANDOM_STATE = 44

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
train_df = pd.read_csv("data/train_dataset_train.csv", index_col=0)
test_df = pd.read_csv("data/test_dataset_test.csv", index_col=0)

train_df["is_train"] = 1
test_df["is_train"] = 0

df = pd.concat([train_df, test_df])

In [3]:
class RBKpreprocessor(BaseEstimator):
    def __init__(self):
        self._vectorizer_tags = TfidfVectorizer()
        self._vectorizer_authors = TfidfVectorizer()
        self._category_ohe = OneHotEncoder()
        self._category_from_title_ohe = OneHotEncoder()
        self._stop_words = stopwords.words("russian")
        self._stemmer = SnowballStemmer("russian")
        self._vectorizer_title = TfidfVectorizer()
        
        
    def _clean_list(self, title):
        return(title.
           replace("[", "").
           replace("]", "").
           replace(".", "").
           replace("'", "").
           replace(",", " ")
          )
    
    def _clean_title(self, title):
        if title.find("\n")>0:
            title = title[0:title.find("\n\n")].lower()
        title = " ".join([self._stemmer.stem(w) for w in title.split() if w not in self._stop_words])
        return title

    def _find_category_in_title(self, title):
        if title.find("\n")>0:
            title = title[title.find("\n\n"):].lower().strip()
        else:
            title = ""
        if "," in title:
            title = title[0:title.index(",")]
        else:
            title = ""

        return title
    
    def fit(self, df):
        self._category_ohe.fit(df.category.values.reshape(-1,1))
        
        authors_clean = df.authors.apply(self._clean_list)
        tags_clean = df.tags.apply(self._clean_list)
        self._vectorizer_tags.fit(tags_clean)
        self._vectorizer_authors.fit(authors_clean);
        
        title_clean = df.title.apply(self._clean_title)
        category_from_title =  df.title.apply(self._find_category_in_title)
        self._category_from_title_ohe.fit(category_from_title.values.reshape(-1,1))
        self._vectorizer_title.fit(title_clean)
        
        return(self)
        
        
    def transform(self, df):
        ctr_zero = (df.ctr == 0)
        ctr_log = np.log(df.ctr)
        mean_ctr_log = np.mean(ctr_log.values, where=(ctr_log != -np.inf))
        ctr_log = np.where(df["ctr"] == 0, mean_ctr_log, ctr_log)
        
        category_sparse = self._category_ohe.transform(df.category.values.reshape(-1,1))
        
        authors_clean = df.authors.apply(self._clean_list)
        tags_clean = df.tags.apply(self._clean_list)
        
        authors_count = authors_clean.apply(lambda x: len(x.split()))
        tags_count = tags_clean.apply(lambda x: len(x.split()))
        
        tags_sparse = self._vectorizer_tags.transform(tags_clean)
        authors_sparse = self._vectorizer_authors.transform(authors_clean)
        
        publish_date = pd.to_datetime(df.publish_date)
        publish_year = publish_date.dt.year * 100 + publish_date.dt.month
        publish_day = publish_date.dt.day
        publish_weekday = publish_date.dt.weekday
        publish_hour = publish_date.dt.hour
        
        title_clean = df.title.apply(self._clean_title)
        title_sparse  = self._vectorizer_title.transform(title_clean)
        
        category_from_title =  df.title.apply(self._find_category_in_title)
        category_from_title_sparse = self._category_from_title_ohe.transform(category_from_title.values.reshape(-1,1))
        
        return hstack([
            ctr_zero.values[:,None],
            ctr_log[:,None],
            category_sparse,
            authors_count.values[:,None],
            tags_count.values[:,None],
            tags_sparse,
            authors_sparse,
            publish_year.values[:,None],
            publish_day.values[:,None],
            publish_weekday.values[:,None],
            publish_hour.values[:,None],
            title_sparse,
            category_from_title_sparse
        ])
        
def my_metric(y_real, preds, detail=True):
    overall = (
            0.4*r2_score(y_real.iloc[:,0], preds[:,0]) + 
            0.3*r2_score(y_real.iloc[:,1], preds[:,1]) + 
            0.3*r2_score(y_real.iloc[:,2], preds[:,2])
        )
    
    if not detail:
        return overall
    else:
        return (
            overall,
            0.4*r2_score(y_real.iloc[:,0], preds[:,0]), 
            0.3*r2_score(y_real.iloc[:,1], preds[:,1]), 
            0.3*r2_score(y_real.iloc[:,2], preds[:,2])
        )
    
def write_down_predictions(preds, output_file = "output.csv"):
    solution = pd.read_csv("data\sample_solution.csv")
    solution.iloc[:,1:4] = preds
    solution.to_csv(output_file, index=False)

In [4]:
%%time
preprocess = RBKpreprocessor()
preprocess.fit(df)

Wall time: 37.2 s


RBKpreprocessor()

## Локальная валидация

In [5]:
valid_train_df, valid_test_df = train_test_split(train_df, test_size=0.2, random_state=RANDOM_STATE, shuffle=True)

In [6]:
%%time
X_train = preprocess.transform(valid_train_df)
y_train = valid_train_df[["views", "depth", "full_reads_percent"]]

X_test = preprocess.transform(valid_test_df)
y_test = valid_test_df[["views", "depth", "full_reads_percent"]]



  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


Wall time: 30.7 s


## CatboostRegressor

In [19]:
model = CatBoostRegressor(iterations=20,
                          early_stopping_rounds=3,
                          random_seed=RANDOM_STATE,
                          depth=10, 
                          loss_function='MultiRMSE',
                          verbose=True)

In [20]:
%%time
model.fit(X_train, y_train, eval_set=Pool(data=X_test, label=y_test))

Custom logger is already specified. Specify more than one logger at same time is not thread safe.Got unsafe target value = 2.5542e+06 at object #334 of dataset learn
Got unsafe target value = 2.5542e+06 at object #164 of dataset test #0


0:	learn: 85530.7791244	test: 125539.0041544	best: 125539.0041544 (0)	total: 1.79s	remaining: 33.9s
1:	learn: 85024.6884090	test: 125272.6614669	best: 125272.6614669 (1)	total: 7.08s	remaining: 1m 3s
2:	learn: 84356.0603643	test: 125010.3365896	best: 125010.3365896 (2)	total: 12.4s	remaining: 1m 10s
3:	learn: 84014.8571342	test: 124872.1330171	best: 124872.1330171 (3)	total: 17s	remaining: 1m 7s
4:	learn: 83358.3165231	test: 124596.8324311	best: 124596.8324311 (4)	total: 21.6s	remaining: 1m 4s
5:	learn: 82504.1805773	test: 124291.3635973	best: 124291.3635973 (5)	total: 26s	remaining: 1m
6:	learn: 81882.9984517	test: 124203.0536857	best: 124203.0536857 (6)	total: 30.9s	remaining: 57.4s
7:	learn: 81132.5591418	test: 123852.8156653	best: 123852.8156653 (7)	total: 35.1s	remaining: 52.7s
8:	learn: 80584.3516082	test: 123219.5671855	best: 123219.5671855 (8)	total: 39.4s	remaining: 48.1s
9:	learn: 80403.7560937	test: 122968.7477808	best: 122968.7477808 (9)	total: 43.5s	remaining: 43.5s
10:	le

<catboost.core.CatBoostRegressor at 0x239824b1f40>

In [21]:
preds = model.predict(X_test)
my_metric(y_test, preds)

(0.08884265310537756,
 0.033680461713825154,
 0.046408845992440474,
 0.008753345399111934)

In [22]:
model = CatBoostRegressor(iterations=50,
                          early_stopping_rounds=3,
                          random_seed=RANDOM_STATE,
                          depth=10, 
                          loss_function='MultiRMSE')

In [23]:
%%time
model.fit(X_train, y_train, eval_set=Pool(data=X_test, label=y_test))
preds = model.predict(X_test)
my_metric(y_test, preds)

Got unsafe target value = 2.5542e+06 at object #334 of dataset learn
Got unsafe target value = 2.5542e+06 at object #164 of dataset test #0


0:	learn: 85530.7791244	test: 125539.0041544	best: 125539.0041544 (0)	total: 1.09s	remaining: 53.3s
1:	learn: 85024.6884090	test: 125272.6614669	best: 125272.6614669 (1)	total: 2.31s	remaining: 55.3s
2:	learn: 84356.0603643	test: 125010.3365896	best: 125010.3365896 (2)	total: 7.32s	remaining: 1m 54s
3:	learn: 84014.8571342	test: 124872.1330171	best: 124872.1330171 (3)	total: 13.5s	remaining: 2m 35s
4:	learn: 83358.3165231	test: 124596.8324311	best: 124596.8324311 (4)	total: 18.3s	remaining: 2m 44s
5:	learn: 82504.1805773	test: 124291.3635973	best: 124291.3635973 (5)	total: 22.7s	remaining: 2m 46s
6:	learn: 81882.9984517	test: 124203.0536857	best: 124203.0536857 (6)	total: 26.8s	remaining: 2m 44s
7:	learn: 81132.5591418	test: 123852.8156653	best: 123852.8156653 (7)	total: 31.6s	remaining: 2m 45s
8:	learn: 80584.3516082	test: 123219.5671855	best: 123219.5671855 (8)	total: 35.8s	remaining: 2m 43s
9:	learn: 80403.7560937	test: 122968.7477808	best: 122968.7477808 (9)	total: 40.2s	remaining:

(0.17971151254219747,
 0.06627755569599025,
 0.09320289876036893,
 0.020231058085838315)

## А если сделать по одному бустинку на каждый выход, и метрику r2 оптимизировать

In [35]:
model1 = CatBoostRegressor(iterations=20,
                          early_stopping_rounds=3,
                          random_seed=RANDOM_STATE,
                          depth=10, 
                          eval_metric="R2",
                          loss_function="RMSE")
model2 = model1.copy()
model3 = model1.copy()

In [36]:
%%time
model1.fit(X_train, y_train.iloc[:,0], eval_set=Pool(data=X_test, label=y_test.iloc[:,0]))

Learning rate set to 0.5
0:	learn: 0.1457455	test: 0.0690903	best: 0.0690903 (0)	total: 450ms	remaining: 8.55s
1:	learn: 0.3192358	test: 0.1102355	best: 0.1102355 (1)	total: 867ms	remaining: 7.8s
2:	learn: 0.5451028	test: 0.2205832	best: 0.2205832 (2)	total: 1.28s	remaining: 7.27s
3:	learn: 0.6191707	test: 0.2545967	best: 0.2545967 (3)	total: 1.68s	remaining: 6.71s
4:	learn: 0.7028892	test: 0.3093535	best: 0.3093535 (4)	total: 2.1s	remaining: 6.31s
5:	learn: 0.7255360	test: 0.3272761	best: 0.3272761 (5)	total: 2.58s	remaining: 6.01s
6:	learn: 0.7407832	test: 0.3408906	best: 0.3408906 (6)	total: 3.44s	remaining: 6.4s
7:	learn: 0.7551583	test: 0.3519802	best: 0.3519802 (7)	total: 5.25s	remaining: 7.87s
8:	learn: 0.7656924	test: 0.3593509	best: 0.3593509 (8)	total: 7.54s	remaining: 9.22s
9:	learn: 0.7752481	test: 0.3665365	best: 0.3665365 (9)	total: 9.5s	remaining: 9.5s
10:	learn: 0.8156690	test: 0.4046898	best: 0.4046898 (10)	total: 11.7s	remaining: 9.57s
11:	learn: 0.8353679	test: 0.417

<catboost.core.CatBoostRegressor at 0x23982105a00>

In [37]:
%%time
model2.fit(X_train, y_train.iloc[:,1], eval_set=Pool(data=X_test, label=y_test.iloc[:,1]))

Learning rate set to 0.5
0:	learn: 0.4617745	test: 0.4599649	best: 0.4599649 (0)	total: 803ms	remaining: 15.3s
1:	learn: 0.6096927	test: 0.5611509	best: 0.5611509 (1)	total: 2.1s	remaining: 18.9s
2:	learn: 0.6751672	test: 0.6270159	best: 0.6270159 (2)	total: 3.53s	remaining: 20s
3:	learn: 0.7353214	test: 0.7108072	best: 0.7108072 (3)	total: 5.01s	remaining: 20s
4:	learn: 0.7431803	test: 0.7170892	best: 0.7170892 (4)	total: 6.92s	remaining: 20.8s
5:	learn: 0.7481745	test: 0.7170847	best: 0.7170892 (4)	total: 8.34s	remaining: 19.5s
6:	learn: 0.7532388	test: 0.7217736	best: 0.7217736 (6)	total: 9.8s	remaining: 18.2s
7:	learn: 0.7761962	test: 0.7355344	best: 0.7355344 (7)	total: 11.3s	remaining: 17s
8:	learn: 0.7800880	test: 0.7388071	best: 0.7388071 (8)	total: 12.7s	remaining: 15.5s
9:	learn: 0.7897370	test: 0.7443755	best: 0.7443755 (9)	total: 14.4s	remaining: 14.4s
10:	learn: 0.7932726	test: 0.7466024	best: 0.7466024 (10)	total: 15.7s	remaining: 12.9s
11:	learn: 0.7969714	test: 0.747523

<catboost.core.CatBoostRegressor at 0x239821059a0>

In [38]:
%%time
model3.fit(X_train, y_train.iloc[:,2], eval_set=Pool(data=X_test, label=y_test.iloc[:,2]))

Learning rate set to 0.5
0:	learn: 0.1899170	test: 0.1629200	best: 0.1629200 (0)	total: 1.81s	remaining: 34.5s
1:	learn: 0.2140387	test: 0.1630573	best: 0.1630573 (1)	total: 3.64s	remaining: 32.8s
2:	learn: 0.2328228	test: 0.1627615	best: 0.1630573 (1)	total: 5.54s	remaining: 31.4s
3:	learn: 0.3018429	test: 0.2168630	best: 0.2168630 (3)	total: 6.94s	remaining: 27.7s
4:	learn: 0.3139694	test: 0.2140174	best: 0.2168630 (3)	total: 8.49s	remaining: 25.5s
5:	learn: 0.3501655	test: 0.2402478	best: 0.2402478 (5)	total: 9.99s	remaining: 23.3s
6:	learn: 0.3574774	test: 0.2404505	best: 0.2404505 (6)	total: 11.6s	remaining: 21.5s
7:	learn: 0.3630384	test: 0.2403459	best: 0.2404505 (6)	total: 13.1s	remaining: 19.7s
8:	learn: 0.3931606	test: 0.2643137	best: 0.2643137 (8)	total: 14.5s	remaining: 17.8s
9:	learn: 0.3969076	test: 0.2588657	best: 0.2643137 (8)	total: 16.1s	remaining: 16.1s
10:	learn: 0.4003125	test: 0.2523575	best: 0.2643137 (8)	total: 17.6s	remaining: 14.4s
11:	learn: 0.4278480	test: 0

<catboost.core.CatBoostRegressor at 0x23982105d60>

In [42]:
preds = np.c_[
    model1.predict(X_test),
    model2.predict(X_test),
    model3.predict(X_test)
]
my_metric(y_test, preds)

(0.5000721823769023,
 0.17981116553200197,
 0.2285315081366026,
 0.09172950870829773)

## Выводы
Ну кажется, что оптимизация каждого параметра по отдельности более перспективна, чем считать одну модель на 3 предсказания

> Попробуем сделать обучение модели на этих параметрах, но на 2000 итерациях каждую (В разных ноутбуках)
> А потом можно будет  подобрать оптимальные гиперпараметры  для каждого признака

## Обучение дерева

In [7]:
GOAL_NUM = 1

In [8]:
model = model1 = CatBoostRegressor(iterations=1000,
                          early_stopping_rounds=30,
                          random_seed=RANDOM_STATE,
                          depth=10, 
                          eval_metric="R2",
                          loss_function="RMSE")

In [9]:
model.fit(X_train, 
          y_train.iloc[:,GOAL_NUM], 
          eval_set=Pool(
              data=X_test, 
              label=y_test.iloc[:,GOAL_NUM])
         )

Learning rate set to 0.063811
0:	learn: 0.0749527	test: 0.0753353	best: 0.0753353 (0)	total: 3.62s	remaining: 1h 15s
1:	learn: 0.1431257	test: 0.1423073	best: 0.1423073 (1)	total: 6.87s	remaining: 57m 10s
2:	learn: 0.2054815	test: 0.2034981	best: 0.2034981 (2)	total: 11.1s	remaining: 1h 1m 16s
3:	learn: 0.2586756	test: 0.2559163	best: 0.2559163 (3)	total: 15.2s	remaining: 1h 2m 57s
4:	learn: 0.3085164	test: 0.3103539	best: 0.3103539 (4)	total: 18.6s	remaining: 1h 1m 44s
5:	learn: 0.3487976	test: 0.3517140	best: 0.3517140 (5)	total: 22.2s	remaining: 1h 1m 24s
6:	learn: 0.3904519	test: 0.3943495	best: 0.3943495 (6)	total: 26.2s	remaining: 1h 2m 1s
7:	learn: 0.4268837	test: 0.4252959	best: 0.4252959 (7)	total: 29.5s	remaining: 1h 55s
8:	learn: 0.4590417	test: 0.4603266	best: 0.4603266 (8)	total: 34.2s	remaining: 1h 2m 44s
9:	learn: 0.4884992	test: 0.4900121	best: 0.4900121 (9)	total: 38.5s	remaining: 1h 3m 26s
10:	learn: 0.5099689	test: 0.5097157	best: 0.5097157 (10)	total: 42.2s	remainin

88:	learn: 0.7987599	test: 0.7755735	best: 0.7755735 (88)	total: 7m 10s	remaining: 1h 13m 22s
89:	learn: 0.8008138	test: 0.7777884	best: 0.7777884 (89)	total: 7m 14s	remaining: 1h 13m 11s
90:	learn: 0.8025087	test: 0.7790838	best: 0.7790838 (90)	total: 7m 18s	remaining: 1h 12m 59s
91:	learn: 0.8037181	test: 0.7811931	best: 0.7811931 (91)	total: 7m 22s	remaining: 1h 12m 46s
92:	learn: 0.8055121	test: 0.7819872	best: 0.7819872 (92)	total: 7m 27s	remaining: 1h 12m 44s
93:	learn: 0.8068646	test: 0.7828630	best: 0.7828630 (93)	total: 7m 32s	remaining: 1h 12m 41s
94:	learn: 0.8071763	test: 0.7829105	best: 0.7829105 (94)	total: 7m 37s	remaining: 1h 12m 38s
95:	learn: 0.8082766	test: 0.7837013	best: 0.7837013 (95)	total: 7m 42s	remaining: 1h 12m 34s
96:	learn: 0.8088317	test: 0.7838572	best: 0.7838572 (96)	total: 7m 47s	remaining: 1h 12m 30s
97:	learn: 0.8092953	test: 0.7836757	best: 0.7838572 (96)	total: 7m 52s	remaining: 1h 12m 24s
98:	learn: 0.8096549	test: 0.7836760	best: 0.7838572 (96)	to

175:	learn: 0.8519684	test: 0.8032731	best: 0.8032731 (175)	total: 13m 21s	remaining: 1h 2m 31s
176:	learn: 0.8521539	test: 0.8032707	best: 0.8032731 (175)	total: 13m 25s	remaining: 1h 2m 23s
177:	learn: 0.8526761	test: 0.8037562	best: 0.8037562 (177)	total: 13m 29s	remaining: 1h 2m 17s
178:	learn: 0.8528666	test: 0.8037441	best: 0.8037562 (177)	total: 13m 33s	remaining: 1h 2m 10s
179:	learn: 0.8534012	test: 0.8037718	best: 0.8037718 (179)	total: 13m 36s	remaining: 1h 2m 1s
180:	learn: 0.8535939	test: 0.8037108	best: 0.8037718 (179)	total: 13m 41s	remaining: 1h 1m 55s
181:	learn: 0.8544064	test: 0.8044697	best: 0.8044697 (181)	total: 13m 45s	remaining: 1h 1m 48s
182:	learn: 0.8545872	test: 0.8044476	best: 0.8044697 (181)	total: 13m 49s	remaining: 1h 1m 42s
183:	learn: 0.8551012	test: 0.8045344	best: 0.8045344 (183)	total: 13m 53s	remaining: 1h 1m 35s
184:	learn: 0.8559921	test: 0.8050237	best: 0.8050237 (184)	total: 13m 57s	remaining: 1h 1m 28s
185:	learn: 0.8561677	test: 0.8050033	bes

263:	learn: 0.8772172	test: 0.8115444	best: 0.8115444 (263)	total: 18m 44s	remaining: 52m 14s
264:	learn: 0.8773557	test: 0.8114738	best: 0.8115444 (263)	total: 18m 47s	remaining: 52m 6s
265:	learn: 0.8774553	test: 0.8114609	best: 0.8115444 (263)	total: 18m 49s	remaining: 51m 57s
266:	learn: 0.8775932	test: 0.8113151	best: 0.8115444 (263)	total: 18m 52s	remaining: 51m 48s
267:	learn: 0.8776924	test: 0.8112690	best: 0.8115444 (263)	total: 18m 55s	remaining: 51m 40s
268:	learn: 0.8777886	test: 0.8112681	best: 0.8115444 (263)	total: 18m 58s	remaining: 51m 32s
269:	learn: 0.8780719	test: 0.8113294	best: 0.8115444 (263)	total: 19m 1s	remaining: 51m 25s
270:	learn: 0.8782803	test: 0.8113871	best: 0.8115444 (263)	total: 19m 4s	remaining: 51m 17s
271:	learn: 0.8784157	test: 0.8113872	best: 0.8115444 (263)	total: 19m 7s	remaining: 51m 10s
272:	learn: 0.8787351	test: 0.8115711	best: 0.8115711 (272)	total: 19m 10s	remaining: 51m 3s
273:	learn: 0.8788321	test: 0.8115377	best: 0.8115711 (272)	total

351:	learn: 0.8916324	test: 0.8150089	best: 0.8152452 (347)	total: 22m 48s	remaining: 41m 59s
352:	learn: 0.8917003	test: 0.8149732	best: 0.8152452 (347)	total: 22m 51s	remaining: 41m 53s
353:	learn: 0.8918310	test: 0.8150474	best: 0.8152452 (347)	total: 22m 54s	remaining: 41m 48s
354:	learn: 0.8920632	test: 0.8152159	best: 0.8152452 (347)	total: 22m 57s	remaining: 41m 41s
355:	learn: 0.8925186	test: 0.8154215	best: 0.8154215 (355)	total: 23m	remaining: 41m 36s
356:	learn: 0.8925833	test: 0.8154040	best: 0.8154215 (355)	total: 23m 2s	remaining: 41m 30s
357:	learn: 0.8927089	test: 0.8154191	best: 0.8154215 (355)	total: 23m 5s	remaining: 41m 25s
358:	learn: 0.8927760	test: 0.8153715	best: 0.8154215 (355)	total: 23m 8s	remaining: 41m 19s
359:	learn: 0.8928403	test: 0.8153843	best: 0.8154215 (355)	total: 23m 11s	remaining: 41m 13s
360:	learn: 0.8929467	test: 0.8153872	best: 0.8154215 (355)	total: 23m 13s	remaining: 41m 7s
361:	learn: 0.8934338	test: 0.8155975	best: 0.8155975 (361)	total: 2

439:	learn: 0.9044387	test: 0.8174728	best: 0.8174728 (439)	total: 26m 55s	remaining: 34m 15s
440:	learn: 0.9046056	test: 0.8174491	best: 0.8174728 (439)	total: 26m 59s	remaining: 34m 12s
441:	learn: 0.9048763	test: 0.8177345	best: 0.8177345 (441)	total: 27m 1s	remaining: 34m 7s
442:	learn: 0.9049275	test: 0.8177324	best: 0.8177345 (441)	total: 27m 4s	remaining: 34m 2s
443:	learn: 0.9051504	test: 0.8176997	best: 0.8177345 (441)	total: 27m 7s	remaining: 33m 58s
444:	learn: 0.9052625	test: 0.8176781	best: 0.8177345 (441)	total: 27m 10s	remaining: 33m 53s
445:	learn: 0.9054003	test: 0.8176196	best: 0.8177345 (441)	total: 27m 12s	remaining: 33m 48s
446:	learn: 0.9054798	test: 0.8175065	best: 0.8177345 (441)	total: 27m 15s	remaining: 33m 43s
447:	learn: 0.9057611	test: 0.8175474	best: 0.8177345 (441)	total: 27m 18s	remaining: 33m 38s
448:	learn: 0.9058330	test: 0.8175303	best: 0.8177345 (441)	total: 27m 21s	remaining: 33m 33s
449:	learn: 0.9059504	test: 0.8175692	best: 0.8177345 (441)	total

527:	learn: 0.9139923	test: 0.8186495	best: 0.8187473 (516)	total: 31m 2s	remaining: 27m 45s
528:	learn: 0.9140330	test: 0.8186443	best: 0.8187473 (516)	total: 31m 6s	remaining: 27m 41s
529:	learn: 0.9141102	test: 0.8186682	best: 0.8187473 (516)	total: 31m 9s	remaining: 27m 38s
530:	learn: 0.9142687	test: 0.8187911	best: 0.8187911 (530)	total: 31m 14s	remaining: 27m 35s
531:	learn: 0.9143091	test: 0.8187912	best: 0.8187912 (531)	total: 31m 17s	remaining: 27m 31s
532:	learn: 0.9144725	test: 0.8188182	best: 0.8188182 (532)	total: 31m 20s	remaining: 27m 27s
533:	learn: 0.9146937	test: 0.8188835	best: 0.8188835 (533)	total: 31m 24s	remaining: 27m 24s
534:	learn: 0.9147353	test: 0.8188571	best: 0.8188835 (533)	total: 31m 27s	remaining: 27m 20s
535:	learn: 0.9148758	test: 0.8190000	best: 0.8190000 (535)	total: 31m 30s	remaining: 27m 16s
536:	learn: 0.9150302	test: 0.8190115	best: 0.8190115 (536)	total: 31m 33s	remaining: 27m 12s
537:	learn: 0.9151821	test: 0.8190331	best: 0.8190331 (537)	tot

<catboost.core.CatBoostRegressor at 0x26786c29c40>

In [10]:
with open(f"models/cb1000-{GOAL_NUM}.pkl", "wb") as pkl_file:
     pickle.dump(model, pkl_file)