[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/sunyeul/playground/blob/main/Interval_estimator/CatBoostで区間推定.ipynb)

In [None]:
!pip install --upgrade catboost --quiet

[K     |████████████████████████████████| 76.3 MB 1.3 MB/s 
[?25h

In [None]:
import numpy as np
import plotly.graph_objects as go

from scipy.stats import norm
from catboost import Pool, CatBoostRegressor

In [None]:
"""学習用データの作成
"""
np.random.seed(32) #乱数シード固定
n = 30 #データ数
x_train = np.tile(np.random.uniform(-5, 5, n), 10) #同じxで10回観測を行うことを想定
y_train = np.sin(x_train) + 0.1*np.random.randn(len(x_train)) #観測値がsin(x)+偏差0.1の正規分布となることを想定
x_train = x_train.reshape(-1, 1) #xを縦ベクトルに変形
y_train = y_train.reshape(-1, 1) #yを縦ベクトルに変形

alpha = 2.5 #分位点の位置　95%信頼区間を出したい

In [None]:
train_dataset = Pool(
    data=x_train,
    label=y_train
)

In [None]:
"""モデル作成、学習
"""
random_seed = 0
iterations = 100

#CatBoost
model_cat = CatBoostRegressor(
    iterations=iterations,
    learning_rate=.1,
    loss_function='RMSEWithUncertainty',
    random_seed=random_seed,
    verbose=False
    )

model_cat.fit(
    X=train_dataset
    )

<catboost.core.CatBoostRegressor at 0x7fa8b0d0db50>

In [None]:
"""予測
"""
x_pred = np.linspace(-10, 10, 500).reshape(-1, 1) #予測したいxの配列

#CatBoostの予測
y_pred_cat = model_cat.predict(data=x_pred)

y_cat_lower = y_pred_cat[:, 0] - norm.ppf(q=1 - alpha / 100) * np.sqrt(y_pred_cat[:, 1])
y_cat_middle = y_pred_cat[:, 0]
y_cat_upper = y_pred_cat[:, 0] + norm.ppf(q=1 - alpha / 100) * np.sqrt(y_pred_cat[:, 1])

In [None]:
""" 可視化のため縦ベクトルを横ベクトルに直す
"""
x_pred = x_pred.flatten()
x_train = x_train.flatten()
y_train = y_train.flatten()

In [None]:
"""CatBoostの結果可視化
"""
fig = go.Figure()
go_scatter_lower = go.Scatter(x=x_pred, y=y_cat_lower, line=dict(width=0), showlegend=False)
go_scatter_upper = go.Scatter(x=x_pred, y=y_cat_upper, fill='tonexty', fillcolor='rgba(30, 144, 255, 0.3)', line=dict(width=0), name=str(100-2*alpha) + '%信頼区間', legendgroup=1)
go_scatter_middle = go.Scatter(x=x_pred, y=y_cat_middle, name='予測平均値', line=dict(color='DodgerBlue'), legendgroup=1)

go_scatter_data = go.Scatter(x=x_train, y=y_train, mode='markers', name='観測データ', marker=dict(color='Black'))
go_scatter_func = go.Scatter(x=x_pred, y=np.sin(x_pred), name='元の関数', line=dict(color='Red', dash='dash'))

data = [go_scatter_lower, go_scatter_upper, go_scatter_middle, go_scatter_data, go_scatter_func]
fig.add_traces(data=data)
fig