In [2]:
!pip install numpy==1.26.4
!pip install scikit-learn catboost

Collecting numpy==1.26.4
  Downloading numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl.metadata (114 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.8/114.8 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl (14.0 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.0/14.0 MB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m00:01[0m:01[0m
[?25hInstalling collected packages: numpy
Successfully installed numpy-1.26.4

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Collecting scikit-learn
  Downloading scikit_learn-1.5.1-cp311-cp311-macosx_12_0_arm64.whl.metadata (12 kB)
Collecting catboost
  Downloading catboost-1.2.5-cp311-cp311-macosx_11_0_universal

In [2]:
import pandas as pd
from catboost import CatBoostRegressor, Pool
from catboost.utils import eval_metric
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import root_mean_squared_error, mean_squared_error


In [5]:
df = pd.read_csv('tmp/data.csv', header=None)

# 重複行を削除
df = df.drop_duplicates()

# 説明変数 (特徴量) と目的変数 (ターゲット) に分ける
# X = df.iloc[:, :-1]  # 最後の列を除いたものが説明変数
# y = df.iloc[:, -1]   # 最後の列が目的変数
X = df.iloc[:, :-1].values  # 最後の列以外が特徴量
y = df.iloc[:, -1].values   # 最後の列が正規化EV

# 学習データとテストデータに分ける
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# トレーニングデータをCatBoostのPoolに変換
train_pool = Pool(X_train, y_train)
test_pool = Pool(X_test, y_test)


In [11]:
model = CatBoostRegressor(iterations=1500, depth=4, learning_rate=0.1, l2_leaf_reg=7, loss_function='RMSE', eval_metric='RMSE', verbose=500)

# モデルを学習
model.fit(train_pool, eval_set=test_pool)

predictions = model.predict(X_test)



metrics = {
    'RMSE': eval_metric(predictions, y_test, 'RMSE'),
    'MAE': eval_metric(predictions, y_test, 'MAE'),
    'MAPE': eval_metric(predictions, y_test, 'MAPE'),
    'R2': eval_metric(predictions, y_test, 'R2')
}

# 各評価指標を表示
for metric_name, metric_value in metrics.items():
    print(f'{metric_name}: {metric_value}')

# Shrink model to first 1489 iterations.
# RMSE: [0.010081507810215133]
# MAE: [0.007347661887280395]
# MAPE: [0.007348661887280393]
# R2: [0.9823416229149526]


0:	learn: 0.0712025	test: 0.0747232	best: 0.0747232 (0)	total: 1.43ms	remaining: 2.15s
500:	learn: 0.0032227	test: 0.0118061	best: 0.0118061 (500)	total: 248ms	remaining: 494ms
1000:	learn: 0.0016611	test: 0.0111323	best: 0.0111323 (1000)	total: 513ms	remaining: 256ms
1499:	learn: 0.0010805	test: 0.0109508	best: 0.0109502 (1497)	total: 806ms	remaining: 0us

bestTest = 0.01095021089
bestIteration = 1497

Shrink model to first 1498 iterations.
RMSE: [0.010950211340870722]
MAE: [0.006169094505068774]
MAPE: [0.006170093100602041]
R2: [0.9775851561544532]


In [7]:
model.save_model("tmp/bjc.onnx",
           format="onnx",
           export_parameters=None,
           pool=None)