In [1]:
import os
# set the device to run
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

import numpy as np
from sklearn.datasets import make_classification, make_regression

from py_boost import GradientBoosting 

In [2]:
%%time
X, y = make_regression(150000, 100, n_targets=10, random_state=42)
X = X.astype(np.float32)
X_test, y_test = X[:50000], y[:50000]
X, y = X[-50000:], y[-50000:]

CPU times: user 2.15 s, sys: 1.57 s, total: 3.72 s
Wall time: 831 ms


In [4]:
%%time
model = GradientBoosting('mse', 'r2_score',
                         ntrees=1000, lr=.1, verbose=100, es=200, lambda_l2=1,
                         subsample=.8, colsample=.8, min_data_in_leaf=10,
                         max_bin=256, max_depth=6)
model.fit(X, y[:, 0], 
          eval_sets = [
              {'X': X_test, 'y': y_test[:, 0]}, 
          ]
         )

[13:41:46] Stdout logging level is INFO.
[13:41:46] GDBT train starts. Max iter 1000, early stopping rounds 200
[13:41:47] Iter 0; Sample 0, R2_score = 0.07438634372555475; 
[13:41:48] Iter 100; Sample 0, R2_score = 0.9877549755188597; 
[13:41:49] Iter 200; Sample 0, R2_score = 0.9906494384457265; 
[13:41:50] Iter 300; Sample 0, R2_score = 0.9909321804947996; 
[13:41:51] Iter 400; Sample 0, R2_score = 0.9910298856821315; 
[13:41:52] Iter 500; Sample 0, R2_score = 0.9911125353816248; 
[13:41:53] Iter 600; Sample 0, R2_score = 0.9911402694434043; 
[13:41:54] Iter 700; Sample 0, R2_score = 0.9911714811053071; 
[13:41:55] Iter 800; Sample 0, R2_score = 0.9911579905713459; 
[13:41:57] Iter 900; Sample 0, R2_score = 0.9911550964716098; 
[13:41:57] Early stopping at iter 922, best iter 722, best_score 0.9911740777172078
CPU times: user 12.7 s, sys: 1.16 s, total: 13.9 s
Wall time: 12.8 s


<py_boost.gpu.boosting.GradientBoosting at 0x7f138029fca0>

In [None]:
%%time
model = GradientBoosting('mse', 'r2', 
                         ntrees=2000, lr=.01, verbose=100, es=200, lambda_l2=1,
                         subsample=.8, colsample=.8, min_data_in_leaf=10,
                         max_bin=256, max_depth=8)
model.fit(X, y, 
          eval_sets = [
              {'X': X, 'y': y}, 
              {'X': X_test, 'y': y_test}
          ]
         )

[13:41:09] Stdout logging level is INFO2.
[13:41:09] GDBT train starts. Max iter 2000, early stopping rounds 200
[13:41:09] Iter 0; Sample 0, R2_score = 0.010256408618130087;  Sample 1, R2_score = 0.009789836832384413; 
[13:41:12] Iter 100; Sample 0, R2_score = 0.6237097084436416;  Sample 1, R2_score = 0.5949179752753461; 
[13:41:15] Iter 200; Sample 0, R2_score = 0.8299463376505928;  Sample 1, R2_score = 0.7972185467858299; 
[13:41:18] Iter 300; Sample 0, R2_score = 0.9177264578290203;  Sample 1, R2_score = 0.8887807155839937; 
[13:41:21] Iter 400; Sample 0, R2_score = 0.9584496629748219;  Sample 1, R2_score = 0.9344900582799159; 
[13:41:24] Iter 500; Sample 0, R2_score = 0.9780890043254086;  Sample 1, R2_score = 0.9586314490380425; 


In [5]:
%%time
model = GradientBoosting('mse', 
                         ntrees=2000, lr=.01, verbose=100, es=200, lambda_l2=1,
                         subsample=.8, colsample=.8, min_data_in_leaf=10,
                         max_bin=256, max_depth=8, target_splitter='OneVsAll')
model.fit(X, y, 
          eval_sets = [
              {'X': X, 'y': y}, 
              {'X': X_test, 'y': y_test}
          ]
         )

Iter 0; Sample 0, score = 179.12324045227626;  Sample 1, score = 178.5527616037698; 
Iter 100; Sample 0, score = 99.5956700642039;  Sample 1, score = 103.33447884698649; 
Iter 200; Sample 0, score = 61.255816755925096;  Sample 1, score = 67.87146591156528; 
Iter 300; Sample 0, score = 40.146739327207335;  Sample 1, score = 48.50921889753441; 
Iter 400; Sample 0, score = 27.544164811371594;  Sample 1, score = 36.93368816492511; 
Iter 500; Sample 0, score = 19.651848143801097;  Sample 1, score = 29.642689281600365; 
Iter 600; Sample 0, score = 14.630266789584756;  Sample 1, score = 24.937504836623113; 
Iter 700; Sample 0, score = 11.456814563574161;  Sample 1, score = 21.862548617819463; 
Iter 800; Sample 0, score = 9.470113084280912;  Sample 1, score = 19.825650839050073; 
Iter 900; Sample 0, score = 8.25229718448769;  Sample 1, score = 18.468577195323558; 
Iter 1000; Sample 0, score = 7.49936808439699;  Sample 1, score = 17.547041370324568; 
Iter 1100; Sample 0, score = 7.0252230711530

<py_boost.gpu.boosting.GradientBoosting at 0x7f8f520ec6a0>

In [6]:
from py_boost.sampling.target_splitter import *

In [7]:
%%time
model = GradientBoosting('mse', 
                         ntrees=2000, lr=.01, verbose=100, es=200, lambda_l2=1,
                         subsample=.8, colsample=.8, min_data_in_leaf=10,
                         max_bin=256, max_depth=8, target_splitter=RandomGroupsSplitter(2))
model.fit(X, y, 
          eval_sets = [
              {'X': X, 'y': y}, 
              {'X': X_test, 'y': y_test}
          ])

Iter 0; Sample 0, score = 179.2460777763383;  Sample 1, score = 178.67696388647525; 
Iter 100; Sample 0, score = 108.08098298391276;  Sample 1, score = 111.88127933113428; 
Iter 200; Sample 0, score = 69.64128866636518;  Sample 1, score = 76.20619580333553; 
Iter 300; Sample 0, score = 46.600844949160475;  Sample 1, score = 54.78188608355994; 
Iter 400; Sample 0, score = 31.954479946143096;  Sample 1, score = 41.1110594490837; 
Iter 500; Sample 0, score = 22.49775869073082;  Sample 1, score = 32.23539407266256; 
Iter 600; Sample 0, score = 16.444437587243776;  Sample 1, score = 26.472972072640065; 
Iter 700; Sample 0, score = 12.6941105297289;  Sample 1, score = 22.763459655435106; 
Iter 800; Sample 0, score = 10.405959311366699;  Sample 1, score = 20.3429295053605; 
Iter 900; Sample 0, score = 9.036731029616549;  Sample 1, score = 18.741360726529646; 
Iter 1000; Sample 0, score = 8.229365330541649;  Sample 1, score = 17.68068729977043; 
Iter 1100; Sample 0, score = 7.735863616925175; 

<py_boost.gpu.boosting.GradientBoosting at 0x7f8f5230a280>