In [3]:
!pip install git+https://github.com/StatMixedML/XGBoostLSS.git

Defaulting to user installation because normal site-packages is not writeable
Collecting git+https://github.com/StatMixedML/XGBoostLSS.git
  Cloning https://github.com/StatMixedML/XGBoostLSS.git to /private/var/folders/m5/qjbw9mwx66n97zmrlsfzns380000gn/T/pip-req-build-sn498swt
  Running command git clone --filter=blob:none --quiet https://github.com/StatMixedML/XGBoostLSS.git /private/var/folders/m5/qjbw9mwx66n97zmrlsfzns380000gn/T/pip-req-build-sn498swt
  Resolved https://github.com/StatMixedML/XGBoostLSS.git to commit 03a860da4de1a6a4d6c2db55be879bf6ea681f36
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting xgboost~=2.0.3 (from xgboostlss==0.4.0)
  Downloading xgboost-2.0.3-py3-none-macosx_12_0_arm64.whl.metadata (2.0 kB)
Collecting torch~=2.1.2 (from xgboostlss==0.4.0)
  Downloading torch-2.1.2-cp39-none-macosx_11_0_arm64.whl.metadata (25 kB)
Collecting pyro-ppl~=1.8.6 (from xgboostlss==0.4.0)
  Downloading pyro_ppl-1.8.6-py3-none-any.whl.metadata (7.8 kB)
Collecting o

In [4]:
!pip install fbm

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip[0m


In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from fbm import FBM
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

import xgboost as xgb
from xgboostlss.model import XGBoostLSS
from xgboostlss.distributions.Gaussian import Gaussian

import multiprocessing
import plotnine
from plotnine import *
plotnine.options.figure_size = (12, 8)

from xgboostlss.distributions.Gaussian import Gaussian

# fBmデータをプロットする関数
def plot_fbm(data, H):
    plt.figure(figsize=(10, 6))
    plt.plot(data, label=f'fBm with H={H}')
    plt.title(f'Fractional Brownian Motion with H={H}')
    plt.xlabel('Time Step')
    plt.ylabel('Value')
    plt.legend()
    plt.grid(True)
    plt.show()

# 分数ブラウン運動のデータを生成する関数
def generate_fbm(H, N):
    fbm = FBM(n=N-1, hurst=H, length=1, method='daviesharte')
    fbm_samples = fbm.fbm()
    return fbm_samples

# データを準備する関数
def prepare_data(data, past_steps, future_steps):
    X = []
    y_mean = []
    y_std = []
    total_steps = past_steps + future_steps
    for i in range(len(data) - total_steps):
        past_data = data[i:i+past_steps]
        future_data = data[i+past_steps:i+total_steps]
        X.append(past_data)
        y_mean.append(np.mean(future_data))
        y_std.append(np.std(future_data))
    return np.array(X), np.array(y_mean), np.array(y_std)

ModuleNotFoundError: No module named 'xgboostlss'

In [None]:
# パラメータ設定
H = 0.8  # Hurst指数
N = 10000  # サンプル数
past_steps = 128
future_steps = 128

# fBmデータ生成と前処理
data = generate_fbm(H, N)
X, y_mean, y_std = prepare_data(data, past_steps, future_steps)
plot_fbm(data, H)

# データセットの分割
X_train, X_test, y_train_mean, y_test_mean, y_train_std, y_test_std = train_test_split(
    X, y_mean, y_std, test_size=0.2, random_state=42
)

# モデルパラメータの設定
params = {
    'objective': 'reg:squarederror',
    'max_depth': 6,
    'eta': 0.01,
    'subsample': 0.7,
    'colsample_bytree': 0.7,
    'nthread': -1
}

# Gaussian 分布を指定してモデルを作成
model = XGBoostLSS(dist=Gaussian())

# モデルのトレーニング
dtrain = xgb.DMatrix(X_train, label=y_train_mean)
dtest = xgb.DMatrix(X_test, label=y_test_mean)
model.train(params=params, dtrain=dtrain, num_boost_round=100)

# 推定
predictions = model.predict(dtest)
y_pred_mean = predictions['loc']
y_pred_std = predictions['scale']

# 結果の評価と可視化
plt.figure(figsize=(10, 5))
plt.plot(y_test_mean, label="True Mean")
plt.plot(y_pred_mean, label="Predicted Mean", linestyle="--")
plt.title("Predicted vs. True Mean of Future Data")
plt.xlabel("Sample")
plt.ylabel("Mean Value")
plt.legend()
plt.show()

plt.figure(figsize=(10, 5))
plt.plot(y_test_std, label="True Std Dev")
plt.plot(y_pred_std, label="Predicted Std Dev", linestyle="--")
plt.title("Predicted vs. True Standard Deviation of Future Data")
plt.xlabel("Sample")
plt.ylabel("Standard Deviation")
plt.legend()
plt.show()


from sklearn.metrics import mean_squared_error
# 平均のMSEを計算
mse_mean = mean_squared_error(y_test_mean, y_pred_mean)
print("Mean Squared Error for Mean Prediction:", mse_mean)
# 標準偏差のMSEを計算
mse_std = mean_squared_error(y_test_std, y_pred_std)
print("Mean Squared Error for Standard Deviation Prediction:", mse_std)