# データ準備

In [1]:
import xgboost as xgb
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_regression

In [2]:

X, y, coef = make_regression(
    n_samples=1000, n_features=10, n_informative=10,
    bias=1, n_targets=5, shuffle=True, coef=True, random_state=42
)

In [3]:
print(X.shape)
print(y.shape)
print(coef.shape)

(1000, 10)
(1000, 5)
(10, 5)


In [4]:
def add_dummy_feature(X):
    # np.column_stack((np.ones(x.shape[0]) x))
    X_dummyFeature = np.column_stack((np.ones(X.shape[0]), X))
    return X_dummyFeature

def trainTestSplit(X, y):
    return train_test_split(X,y, test_size=.2,random_state=42 )

def preprocess(X, y):
    X_withdummyfeature = add_dummy_feature(X)
    X_train, X_test, y_train, y_test = trainTestSplit(X_withdummyfeature, y)
    return (X_train, X_test, y_train, y_test)

In [5]:
X_train, X_test, y_train, y_test = preprocess(X, y)

# 学習・評価

In [6]:
def rmse_model(X_train, y_train, X_test, y_test, strategy: str) -> None:

    reg = xgb.XGBRegressor(
        tree_method="hist",
        n_estimators=10,
        n_jobs=16,
        max_depth=8,
        multi_strategy=strategy,
        subsample=0.6,
    )
    reg.fit(X_train, y_train, eval_set=[(X_test, y_test)])
    
    y_pred = reg.predict(X_test)
    
    # Calculating RMSE for the predictions
    # rmse_value = np.sqrt(np.mean((y_pred - y_test) ** 2))
    # print(f"RMSE: {rmse_value:.4f}")
    
    rmse_values = np.sqrt(np.mean((y_pred - y_test) ** 2, axis=0))
    
    return y_pred, rmse_values

## one_output_per_treeモード

In [7]:
%%time

y_pred_one_output, rmse_one_output = (
    rmse_model(X_train, y_train, X_test, y_test, 'one_output_per_tree')
)

[0]	validation_0-rmse:142.39924
[1]	validation_0-rmse:125.13344
[2]	validation_0-rmse:113.81898
[3]	validation_0-rmse:104.44637
[4]	validation_0-rmse:97.31021
[5]	validation_0-rmse:91.56373
[6]	validation_0-rmse:87.15631
[7]	validation_0-rmse:84.14150
[8]	validation_0-rmse:81.83047
[9]	validation_0-rmse:79.94199
CPU times: user 687 ms, sys: 1.8 s, total: 2.49 s
Wall time: 1.51 s


In [8]:
y_pred_one_output[:3]

array([[  39.61588  ,  -86.344864 ,  189.01514  ,   66.54641  ,
         204.35556  ],
       [-189.94342  ,   49.701653 ,   -4.8221784,    3.07233  ,
         -32.576298 ],
       [   4.4479485,  -17.94181  ,  -27.173563 , -111.497055 ,
           8.070973 ]], dtype=float32)

In [9]:
# RMSE

rmse_one_output

array([ 94.13737619,  45.89503974,  87.60233596,  48.79260519,
       104.54912595])

##  multi-outputモード：v2.0で追加された機能

In [10]:
%%time

y_pred_multi_output, rmse_multi_output = (
    rmse_model(X_train, y_train, X_test, y_test, 'multi_output_tree')
)

[0]	validation_0-rmse:145.68274
[1]	validation_0-rmse:131.97485
[2]	validation_0-rmse:119.99593
[3]	validation_0-rmse:112.19433
[4]	validation_0-rmse:105.89406
[5]	validation_0-rmse:99.73904
[6]	validation_0-rmse:95.55826
[7]	validation_0-rmse:92.93062
[8]	validation_0-rmse:90.29302
[9]	validation_0-rmse:88.30450
CPU times: user 965 ms, sys: 986 ms, total: 1.95 s
Wall time: 1.08 s


In [11]:
y_pred_multi_output[:3]

array([[ 125.61726 ,  -38.236664,   59.608208,   30.832848,  140.96844 ],
       [ -99.362465,   23.78672 ,  -27.81465 ,   21.297115,  -51.148952],
       [ -26.781939,  -48.608788,  -54.31098 , -126.385284,   31.205887]],
      dtype=float32)

In [12]:
# RMSE

rmse_multi_output

array([100.42834274,  54.99396456,  93.91773181,  60.69883529,
       115.64320304])