In [69]:
from sklearn.pipeline import Pipeline
import numpy as np
import pandas as pd
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
from sklearn.ensemble import RandomForestRegressor, HistGradientBoostingRegressor
from sklearn.feature_selection import RFE
from catboost import CatBoostRegressor

In [48]:
X, y = make_regression(n_samples=3000, n_features=1000, n_targets=3, n_informative=250)

In [81]:
X[:, :1].flatten()

array([-1.38663094, -1.39473964, -1.11871211, ..., -0.17257963,
       -1.6368848 ,  0.52534067])

In [82]:
# Latest

"""fs_est = CatBoostRegressor(n_estimators=500, verbose=0,
                           early_stopping_rounds=20, loss_function="RMSE",
                           allow_writing_files=False)"""
fs_est = LinearRegression(n_jobs=-1)
fs = RFE(fs_est, n_features_to_select=500, step=0.2, verbose=1)
estimator = HistGradientBoostingRegressor(early_stopping=True, validation_fraction=0.1, verbose=1)

pipe = Pipeline([
    ("feature_selection", fs),
    ("estimator", estimator),
])

eval_set = (X[:400], y[:400])
fit_params = {"estimator__sample_weight": X[:, :1].flatten()}

mo_estimator = MultiOutputRegressor(pipe, n_jobs=-1)

In [83]:
mo_estimator.fit(X, y, **fit_params)

Fitting estimator with 1000 features.
Fitting estimator with 800 features.
Fitting estimator with 600 features.
Binning 0.011 GB of training data: Fitting estimator with 1000 features.
Fitting estimator with 800 features.
Fitting estimator with 600 features.
Binning 0.011 GB of training data: Fitting estimator with 1000 features.
Fitting estimator with 800 features.
Fitting estimator with 600 features.
Binning 0.011 GB of training data: 0.229 s
Binning 0.001 GB of validation data: 0.003 s
Fitting gradient boosted rounds:
[1/100] 1 tree, 16 leaves, max depth = 13, train loss: -4786220332.37802, val loss: 9016006126511.31445, in 0.017s
[2/100] 0.227 s
Binning 0.001 GB of validation data: 0.227 s
Binning 0.001 GB of validation data: 0.003 s
Fitting gradient boosted rounds:
[1/100] 0.003 s
Fitting gradient boosted rounds:
[1/100] 1 tree, 22 leaves, max depth = 16, train loss: -25625259512048096.00000, val loss: 27288120574440579072.00000, in 0.018s
[3/100] 1 tree, 15 leaves, max depth = 12

In [85]:
len(mo_estimator.estimators_[1].named_steps.feature_selection.get_feature_names_out())

500

1 tree, 26 leaves, max depth = 15, train loss: -1648178288852615262065634420612085365244529611706458816467826530332966912.00000, val loss: 1759693174978142885129472552315756164267347377167697021757692884301161431040.00000, in 0.022s
Fit 10 trees in 0.429 s, (207 total leaves)
Time spent computing histograms: 0.086s
Time spent finding best splits:  0.066s
Time spent applying splits:      0.003s
Time spent predicting:           0.000s
1 tree, 1 leaves, max depth = 0, train loss: -176691615578002211816379006015662311140751437328767181149728814672486858752.00000, val loss: 796348717870812687322082197170722313336956960725359241881839469421637912231936.00000, in 0.001s
Fit 14 trees in 0.439 s, (233 total leaves)
Time spent computing histograms: 0.094s
Time spent finding best splits:  0.070s
Time spent applying splits:      0.002s
Time spent predicting:           0.000s
1 tree, 1 leaves, max depth = 0, train loss: -37159037571523863485017686550692237574612319185678262590201473310528634880.000

In [93]:
mo_estimator.estimators_[0].named_steps.estimator.n_iter_

14

In [47]:
fs.

array([3, 5, 1, ..., 1, 4, 3])

In [22]:
mo_estimator.estimators_

[Pipeline(steps=[('feature_selection',
                  RFE(estimator=RandomForestRegressor(n_estimators=500,
                                                      n_jobs=-1),
                      n_features_to_select=500, step=50, verbose=2)),
                 ('clf_model',
                  <catboost.core.CatBoostRegressor object at 0x16e3e5df0>)]),
 Pipeline(steps=[('feature_selection',
                  RFE(estimator=RandomForestRegressor(n_estimators=500,
                                                      n_jobs=-1),
                      n_features_to_select=500, step=50, verbose=2)),
                 ('clf_model',
                  <catboost.core.CatBoostRegressor object at 0x154a60b20>)]),
 Pipeline(steps=[('feature_selection',
                  RFE(estimator=RandomForestRegressor(n_estimators=500,
                                                      n_jobs=-1),
                      n_features_to_select=500, step=50, verbose=2)),
                 ('clf_model',
          

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [None]:
cb_estimator = CatBoostRegressor(iterations=1000, loss_function='MultiRMSE')

In [None]:
cb_estimator.fit(X, y)

In [None]:
cb_estimator.predict(X[:2])

In [None]:
x = np.random.randint(0,2, 1000)
X = pd.Series(data=x)

In [None]:
X.shift(1).rolling(5).max()

In [None]:
def contains_str(df):
    if df.

In [None]:
y = pd.Series(['test', 'no_test', 'test', 'no_test', 'test', 'no_test', 'test', 'no_test', 'test', 'no_test', 'test', 'no_test'])

In [None]:
y.shift(1).rolling(5).apply(contains_str)

In [None]:
s = 'hello'

In [None]:
s.find('hoe')