In [1]:
from ngboost import NGBRegressor

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [2]:
X, y = load_boston(True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)



In [3]:
ngb = NGBRegressor().fit(X_train, y_train)
y_preds = ngb.predict(X_test)
y_dists = ngb.pred_dist(X_test)

[iter 0] loss=3.6580 val_loss=0.0000 scale=1.0000 norm=6.9011
[iter 100] loss=2.7713 val_loss=0.0000 scale=2.0000 norm=5.2672
[iter 200] loss=2.2075 val_loss=0.0000 scale=2.0000 norm=3.5009
[iter 300] loss=1.9162 val_loss=0.0000 scale=2.0000 norm=2.9835
[iter 400] loss=1.7729 val_loss=0.0000 scale=1.0000 norm=1.3712


In [4]:
test_mse = mean_squared_error(y_preds, y_test)
print('test MSE: ', test_mse)

test MSE:  6.286404645599724


In [6]:
test_nll = -y_dists.logpdf(y_test).mean()
print('test NLL: ', test_nll)

test NLL:  2.7606787244753614


In [7]:
# テストセットの最初の5つの予測値の平均と標準偏差
y_dists[:5].params

{'loc': array([17.10336786, 23.11443264, 20.38513284, 14.75620587, 16.93061225]),
 'scale': array([1.1952456 , 1.44391403, 1.34923582, 2.51317375, 1.74519129])}

In [8]:
from ngboost.distns import Exponential, Normal

ngb_norm = NGBRegressor(Dist=Normal).fit(X_train, y_train)
ngb_exp = NGBRegressor(Dist=Exponential).fit(X_train, y_train)

[iter 0] loss=3.6580 val_loss=0.0000 scale=1.0000 norm=6.9011
[iter 100] loss=2.7713 val_loss=0.0000 scale=2.0000 norm=5.2672
[iter 200] loss=2.2075 val_loss=0.0000 scale=2.0000 norm=3.5009
[iter 300] loss=1.9162 val_loss=0.0000 scale=2.0000 norm=2.9835
[iter 400] loss=1.7729 val_loss=0.0000 scale=1.0000 norm=1.3712
[iter 0] loss=4.1275 val_loss=0.0000 scale=1.0000 norm=0.2995
[iter 100] loss=4.0557 val_loss=0.0000 scale=2.0000 norm=0.2133
[iter 200] loss=4.0499 val_loss=0.0000 scale=2.0000 norm=0.1588
[iter 300] loss=4.0485 val_loss=0.0000 scale=1.0000 norm=0.0698
[iter 400] loss=4.0477 val_loss=0.0000 scale=2.0000 norm=0.1257


In [10]:
ngb_norm.predict(X_test)[:5]

array([16.92633767, 23.44031883, 20.03124883, 15.55709207, 16.90218892])

In [11]:
ngb_exp.predict(X_test)[:5]

array([16.92633767, 23.44031883, 20.03124883, 15.55709207, 16.90218892])

In [12]:
ngb_exp.pred_dist(X_test)[:5].params

{'scale': array([16.92633767, 23.44031883, 20.03124883, 15.55709207, 16.90218892])}

In [13]:
import numpy as np
from ngboost import NGBSurvival
from ngboost.distns import LogNormal

X, Y = load_boston(True)
X_surv_train, X_surv_test, Y_surv_train, Y_surv_test = train_test_split(X, Y, test_size=0.2)

# introduce administrative censoring to simulate survival data
T_surv_train = np.minimum(Y_surv_train, 30) # time of an event or censoring
E_surv_train = Y_surv_train > 30 # 1 if T[i] is the time of an event, 0 if it's a time of censoring

ngb = NGBSurvival(Dist=LogNormal).fit(X_surv_train, T_surv_train, E_surv_train)



[iter 0] loss=1.2801 val_loss=0.0000 scale=4.0000 norm=2.3218
[iter 100] loss=0.5722 val_loss=0.0000 scale=2.0000 norm=0.8470
[iter 200] loss=0.3160 val_loss=0.0000 scale=2.0000 norm=0.5059
[iter 300] loss=0.1231 val_loss=0.0000 scale=2.0000 norm=0.2619
[iter 400] loss=0.0033 val_loss=0.0000 scale=2.0000 norm=0.2021
