## Advanced options for multioutput handling

### Imports

In [1]:
import os
# set the device to run
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

os.makedirs('../data', exist_ok=True)

import pandas as pd
from pandas import Series, DataFrame

from sklearn.model_selection import train_test_split

from py_boost import GradientBoosting, SketchBoost

# strategies to deal with multiple outputs
from py_boost.multioutput.sketching import *
from py_boost.multioutput.target_splitter import *

### Downloading data from OpenML

In [2]:
!wget https://www.openml.org/data/get_csv/19335692/file1c556677f875.csv -O ../data/helena.csv

--2023-03-17 13:51:18--  https://www.openml.org/data/get_csv/19335692/file1c556677f875.csv
Resolving www.openml.org (www.openml.org)... 131.155.11.11
Connecting to www.openml.org (www.openml.org)|131.155.11.11|:443... connected.
HTTP request sent, awaiting response... 307 Temporary Redirect
Location: https://api.openml.org/data/get_csv/19335692/file1c556677f875.csv [following]
--2023-03-17 13:51:18--  https://api.openml.org/data/get_csv/19335692/file1c556677f875.csv
Resolving api.openml.org (api.openml.org)... 131.155.11.11
Connecting to api.openml.org (api.openml.org)|131.155.11.11|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/plain]
Saving to: ‘../data/helena.csv’

../data/helena.csv      [      <=>           ]  14.56M  12.0MB/s    in 1.2s    

2023-03-17 13:51:20 (12.0 MB/s) - ‘../data/helena.csv’ saved [15271704]



In [3]:
data = pd.read_csv('../data/helena.csv')

data

Unnamed: 0,class,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27
0,41,0.005521,0.080556,0.110417,0.490822,0.586406,0.066414,0.092206,0.116352,0.379310,...,-0.342986,78.6894,17.237800,21.504200,14.43730,17.378000,9.61674,-0.609370,1.044830,1.481790
1,92,0.049398,0.147917,0.541667,0.542865,0.515608,0.105128,0.475550,0.049555,0.383460,...,2.639370,59.7879,5.393410,3.819610,11.49240,3.929470,5.91423,1.409210,4.749540,1.103820
2,24,0.548663,1.000000,1.000000,0.397029,0.627398,1.023440,1.004220,0.027381,0.451337,...,0.137427,58.1429,-3.365980,-0.037489,10.63470,2.660180,3.93377,-0.898220,2.137790,1.054470
3,29,0.023073,0.206250,0.238889,0.622998,0.764067,0.202599,0.177892,0.071232,0.531712,...,0.477009,55.4798,-1.051090,-4.755360,13.36710,2.852060,9.65162,0.224397,-0.220216,-0.273287
4,91,0.224427,0.433333,0.902083,0.814199,0.576879,0.344413,0.822975,0.026121,0.425875,...,0.521306,76.8475,-19.371700,32.270700,9.41442,4.343450,8.67710,-1.587580,1.117870,-0.545338
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65191,88,0.007292,0.152083,0.061111,0.114431,0.406104,0.143170,0.053086,0.129365,0.215442,...,1.265300,53.2951,-1.416430,2.173900,13.66950,1.588520,2.02855,0.619052,0.622377,-0.363035
65192,77,0.411279,1.000000,0.430556,0.503805,0.207163,1.003740,0.412067,0.017673,0.044771,...,-2.842380,91.1178,-0.009207,-2.224830,1.30504,0.898489,1.80362,-2.726140,-0.184329,-0.476441
65193,71,0.999352,1.000000,1.000000,0.501360,0.501075,0.999384,0.999414,0.009636,0.000648,...,0.213472,84.4141,2.042450,13.849800,7.24428,1.443890,4.00495,-0.749115,1.025130,0.096257
65194,24,0.206175,0.383333,0.944444,0.749915,0.550936,0.292477,0.830921,0.033542,0.430515,...,0.879472,61.4110,17.354200,5.566660,16.22600,10.049400,6.04195,0.400956,0.375599,0.644575


In [4]:
data['class'].value_counts()

78    4005
55    3063
40    2992
39    2623
38    2216
      ... 
75     121
56     121
32     119
34     116
10     111
Name: class, Length: 100, dtype: int64

Looks like we have 100 classes here ...

In [5]:
X = data.drop('class', axis=1).values.astype('float32')
y = data['class'].values.astype('int32')

X, X_test, y, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

### Traininig a multiclass model

A simple use case for training a multiclass problem is the same as for regression. By default py_boost builds multioutout trees to handle multioutput problems (single tree outputs a vector of length 100 for 100 class task).


In [6]:
%%time
model = GradientBoosting('crossentropy',
                         ntrees=10000, lr=0.03, verbose=100, es=300, lambda_l2=1, gd_steps=1,
                         subsample=1, colsample=1, min_data_in_leaf=10, use_hess=True,
                         max_bin=256, max_depth=6)

model.fit(X, y, 
          eval_sets = [{'X': X_test, 'y': y_test}])

[13:51:26] Stdout logging level is INFO.
[13:51:26] GDBT train starts. Max iter 10000, early stopping rounds 300
[13:51:26] Iter 0; Sample 0, Crossentropy = 4.285501684715281; 
[13:51:33] Iter 100; Sample 0, Crossentropy = 2.7762957662313448; 
[13:51:41] Iter 200; Sample 0, Crossentropy = 2.653432020829007; 
[13:51:49] Iter 300; Sample 0, Crossentropy = 2.612068716864302; 
[13:51:57] Iter 400; Sample 0, Crossentropy = 2.591455786897159; 
[13:52:05] Iter 500; Sample 0, Crossentropy = 2.5793296898387608; 
[13:52:14] Iter 600; Sample 0, Crossentropy = 2.5728900071483705; 
[13:52:23] Iter 700; Sample 0, Crossentropy = 2.5679543805480676; 
[13:52:32] Iter 800; Sample 0, Crossentropy = 2.565229903765837; 
[13:52:41] Iter 900; Sample 0, Crossentropy = 2.563097641548163; 
[13:52:50] Iter 1000; Sample 0, Crossentropy = 2.561640682449919; 
[13:53:00] Iter 1100; Sample 0, Crossentropy = 2.5606844985705646; 
[13:53:09] Iter 1200; Sample 0, Crossentropy = 2.560342937220701; 
[13:53:18] Iter 1300; S

<py_boost.gpu.boosting.GradientBoosting at 0x7f7791029850>

In [9]:
%%time
pred = model.predict(X_test)
pred.shape

CPU times: user 44.1 ms, sys: 3.88 ms, total: 48 ms
Wall time: 46.7 ms


(13040, 100)

In [20]:
model._predict_deprecated(X_test)

Exception: To use _deprecated funcs pass debug=True to .reformat

### Sketching strategies to speedup training

Computational costs of training multioutput models drastically increase when number of output grows. We implemented a few strategies to simplify tree structure search via gradinet matrix sketching:

* ***RandomSamplingSketch*** (recommended for use_hess=True)
* ***RandomProjectionSketch*** (recommended for use_hess=False)
* ***TopOutputsSketch***
* ***SVDSketch*** (needs RAPIDS (cuml) to be installed)

Let us check, how it works.

In [83]:
%%time

sketch = RandomProjectionSketch(1)
# sketch = RandomSamplingSketch(10)
# sketch = TopOutputsSketch(10)
# sketch = SVDSketch(n_components=1)

model = GradientBoosting('crossentropy',
                         ntrees=10000, lr=0.03, verbose=100, es=300, lambda_l2=1, gd_steps=1, 
                         subsample=1, colsample=1, min_data_in_leaf=10, use_hess=False, 
                         max_bin=256, max_depth=6,
                         multioutput_sketch=sketch, debug=True
                        )

model.fit(X, y, eval_sets = [{'X': X_test, 'y': y_test}])

[15:03:59] Stdout logging level is INFO.
[15:03:59] GDBT train starts. Max iter 10000, early stopping rounds 300
[15:03:59] Iter 0; Sample 0, Crossentropy = 4.3778085723456694; 
[15:04:01] Iter 100; Sample 0, Crossentropy = 2.883051295425266; 
[15:04:02] Iter 200; Sample 0, Crossentropy = 2.7344353187776522; 
[15:04:03] Iter 300; Sample 0, Crossentropy = 2.6736021979622886; 
[15:04:04] Iter 400; Sample 0, Crossentropy = 2.6400149395512034; 
[15:04:06] Iter 500; Sample 0, Crossentropy = 2.61634384367524; 
[15:04:07] Iter 600; Sample 0, Crossentropy = 2.600246497753561; 
[15:04:08] Iter 700; Sample 0, Crossentropy = 2.5880620732922472; 
[15:04:09] Iter 800; Sample 0, Crossentropy = 2.578024176804096; 
[15:04:11] Iter 900; Sample 0, Crossentropy = 2.5706890512590435; 
[15:04:12] Iter 1000; Sample 0, Crossentropy = 2.5651760386463276; 
[15:04:13] Iter 1100; Sample 0, Crossentropy = 2.5599363816837255; 
[15:04:14] Iter 1200; Sample 0, Crossentropy = 2.5562656463980735; 
[15:04:16] Iter 1300

<py_boost.gpu.boosting.GradientBoosting at 0x7f75240954c0>

In [85]:
%%time
pred = model.predict(X_test)
pred.shape

CPU times: user 67.9 ms, sys: 191 µs, total: 68 ms
Wall time: 67.2 ms


(13040, 100)

In [24]:
(model.predict(X_test) == model._predict_deprecated(X_test)).all()

True

In [26]:
(model.predict_leaves(X_test, iterations=[100, 500, 999]) 
     == model._predict_leaves_deprecated(X_test, iterations=[100, 500, 999])).all()

True

In [27]:
(model.predict_staged(X_test, iterations=[100, 500, 999]) 
     == model._predict_staged_deprecated(X_test, iterations=[100, 500, 999])).all()

True

In [86]:
from py_boost import TLPredictor

tl = TLPredictor(model)

100%|███████████████████████████████████████████████████████████████████████████████| 2162/2162 [01:18<00:00, 27.56it/s]


In [87]:
tl.dump('temp_mod')

In [88]:
!zip -r temp_mod.zip temp_mod

  adding: temp_mod/ (stored 0%)
  adding: temp_mod/model.mod (deflated 29%)
  adding: temp_mod/predictor.pkl (deflated 16%)


In [35]:
import numpy as np
np.allclose(tl.predict(X_test), model.predict(X_test))

True

In [49]:
%%time
model.predict(X, batch_size=100000)

CPU times: user 171 ms, sys: 32 ms, total: 203 ms
Wall time: 202 ms


array([[7.92986975e-05, 1.03937680e-04, 4.89788590e-06, ...,
        1.27210194e-04, 6.00873791e-05, 5.13385312e-05],
       [1.79671217e-04, 8.87905946e-04, 1.01042126e-04, ...,
        2.33678424e-04, 3.01559427e-04, 1.79110240e-04],
       [1.88973208e-03, 5.58319362e-03, 2.97509570e-04, ...,
        9.49864276e-03, 3.40211438e-03, 1.86627824e-03],
       ...,
       [5.76336775e-03, 4.13657224e-04, 9.53435301e-05, ...,
        1.68929156e-03, 7.25755747e-03, 2.87839514e-03],
       [5.16558357e-04, 1.85979545e-04, 3.45449334e-05, ...,
        4.18666936e-03, 1.17539717e-02, 1.48052573e-02],
       [1.65079802e-03, 1.00071644e-04, 2.73768292e-05, ...,
        5.56070218e-03, 3.71819420e-04, 4.82756505e-03]], dtype=float32)

In [60]:
X_gpu = cp.asarray(X_test).astype(np.int)

In [65]:
(model.predict(X_gpu.astype(np.int32).astype(np.float32)) == model.predict(X_gpu)).all()

array(True)

In [66]:
(model.predict(X_gpu.astype(np.float64)) == model.predict(X_gpu)).all()

array(True)

In [63]:
%%time
type(model.predict(X_gpu).dtype)

CPU times: user 67 ms, sys: 191 µs, total: 67.2 ms
Wall time: 66.1 ms


numpy.dtype[float32]

In [59]:
(model.predict(X_gpu).get() == model.predict(X_test)).all()

True

In [69]:
from py_boost.multioutput.target_splitter import *

sketch = RandomProjectionSketch(1)

model = GradientBoosting('crossentropy',
                         ntrees=10000, lr=0.03, verbose=100, es=300, lambda_l2=1, gd_steps=1, 
                         subsample=1, colsample=1, min_data_in_leaf=10, use_hess=False, 
                         max_bin=256, max_depth=6, target_splitter=RandomGroupsSplitter(3),
                         multioutput_sketch=sketch, debug=True
                        )

model.fit(X, y, eval_sets = [{'X': X_test, 'y': y_test}])

[14:55:46] Stdout logging level is INFO.
[14:55:46] GDBT train starts. Max iter 10000, early stopping rounds 300
[14:55:46] Iter 0; Sample 0, Crossentropy = 4.341088863432013; 
[14:55:49] Iter 100; Sample 0, Crossentropy = 2.8523654162821717; 
[14:55:53] Iter 200; Sample 0, Crossentropy = 2.7148256158758324; 
[14:55:56] Iter 300; Sample 0, Crossentropy = 2.6592810472134083; 
[14:56:00] Iter 400; Sample 0, Crossentropy = 2.627524283137471; 
[14:56:03] Iter 500; Sample 0, Crossentropy = 2.6067315133499114; 
[14:56:07] Iter 600; Sample 0, Crossentropy = 2.592725673805568; 
[14:56:10] Iter 700; Sample 0, Crossentropy = 2.5819170865334966; 
[14:56:14] Iter 800; Sample 0, Crossentropy = 2.5733000947590847; 
[14:56:17] Iter 900; Sample 0, Crossentropy = 2.5662782993664712; 
[14:56:21] Iter 1000; Sample 0, Crossentropy = 2.5608393948765977; 
[14:56:24] Iter 1100; Sample 0, Crossentropy = 2.556933511002062; 
[14:56:27] Iter 1200; Sample 0, Crossentropy = 2.553822672816694; 
[14:56:31] Iter 1300

<py_boost.gpu.boosting.GradientBoosting at 0x7f75248e4340>

In [73]:
%%time
model.predict(X)

CPU times: user 159 ms, sys: 40 ms, total: 199 ms
Wall time: 198 ms


array([[4.4835768e-05, 5.7420519e-05, 1.4245013e-06, ..., 5.9342256e-05,
        4.3078064e-05, 2.4062110e-05],
       [1.4327238e-04, 5.5339001e-04, 5.9456692e-05, ..., 1.8374207e-04,
        2.4802223e-04, 1.0591723e-04],
       [2.3480167e-03, 4.0593329e-03, 3.1008717e-04, ..., 9.8660719e-03,
        4.6063806e-03, 1.4488546e-03],
       ...,
       [4.9772295e-03, 7.4964983e-04, 1.4158348e-04, ..., 3.4838906e-03,
        1.0539060e-02, 4.0259738e-03],
       [4.1143541e-04, 1.1095267e-04, 3.0307368e-05, ..., 4.4401903e-03,
        8.4680347e-03, 1.1251144e-02],
       [1.6046517e-03, 1.1743393e-04, 1.9439613e-05, ..., 3.1640921e-03,
        4.5393794e-04, 3.1366977e-03]], dtype=float32)

In [75]:
%%time

(model.predict(X_test) == model._predict_deprecated(X_test)).all()

CPU times: user 1.69 s, sys: 636 ms, total: 2.33 s
Wall time: 2.34 s


True

In [77]:
%%time
(model.predict_leaves(X_test, iterations=[100, 500, 999]) 
     == model._predict_leaves_deprecated(X_test, iterations=[100, 500, 999])).all()

CPU times: user 7.82 ms, sys: 0 ns, total: 7.82 ms
Wall time: 6.84 ms


True

In [78]:
%%time

(model.predict_staged(X_test, iterations=[100, 500, 999]) 
     == model._predict_staged_deprecated(X_test, iterations=[100, 500, 999])).all()

CPU times: user 859 ms, sys: 316 ms, total: 1.17 s
Wall time: 1.18 s


True

In [79]:
tl = TLPredictor(model)

  0%|                                                                                          | 0/2020 [00:00<?, ?it/s]


AssertionError: Models with more than 1 group are not currently supported

### SketchBoost

Alternatively you can use SketchBoost class with built in sketching strateges. Just define number of outputs to keep

In [80]:
%%time
model = SketchBoost('crossentropy',
                     ntrees=10000, lr=0.03, verbose=100, es=300, lambda_l2=1, gd_steps=1, 
                     subsample=1, colsample=1, min_data_in_leaf=10, 
                     max_bin=256, max_depth=6,
                        )

model.fit(X, y, eval_sets = [{'X': X_test, 'y': y_test}])

[15:02:03] Stdout logging level is INFO.
[15:02:03] GDBT train starts. Max iter 10000, early stopping rounds 300
[15:02:03] Iter 0; Sample 0, Crossentropy = 4.373896923333266; 
[15:02:04] Iter 100; Sample 0, Crossentropy = 2.9071790400138897; 
[15:02:05] Iter 200; Sample 0, Crossentropy = 2.751372790040917; 
[15:02:07] Iter 300; Sample 0, Crossentropy = 2.689612644762575; 
[15:02:08] Iter 400; Sample 0, Crossentropy = 2.654457912345847; 
[15:02:09] Iter 500; Sample 0, Crossentropy = 2.6289987805125206; 
[15:02:10] Iter 600; Sample 0, Crossentropy = 2.6110376247041267; 
[15:02:12] Iter 700; Sample 0, Crossentropy = 2.5964840952865718; 
[15:02:13] Iter 800; Sample 0, Crossentropy = 2.58601272934291; 
[15:02:14] Iter 900; Sample 0, Crossentropy = 2.577037757546524; 
[15:02:16] Iter 1000; Sample 0, Crossentropy = 2.57100911166211; 
[15:02:17] Iter 1100; Sample 0, Crossentropy = 2.5650401636088813; 
[15:02:18] Iter 1200; Sample 0, Crossentropy = 2.560716934282162; 
[15:02:19] Iter 1300; Sam

<py_boost.gpu.sketch_boost.SketchBoost at 0x7f751c7ec400>

In [82]:
pred = model.predict(X_test)
pred.shape

(13040, 100)

#### We see a nice speed up and sometimes even a better accuracy!

#### These modifications allow us to train a model faster than CatBoost does (with a similar setup).

In [None]:
%%time
from catboost import CatBoostClassifier

model = CatBoostClassifier(
                       grow_policy='Depthwise', 
                       bootstrap_type='Bernoulli',
                       subsample=1.,
                       border_count=256, 
                       iterations=10000, 
                       od_wait=300,
                       max_depth=6, 
                       devices='0:0', 
                       learning_rate=0.03, 
                       l2_leaf_reg=1, 
                       min_data_in_leaf=10, 
                       score_function='L2',
                       model_shrink_mode='Constant',
                       **{'task_type': 'GPU', 'verbose': 100, }
                    )

model.fit(X, y, eval_set = (X_test, y_test))    