In [4]:
# 基本的なライブラリ
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
import random

# モデル関連
import lightgbm as lgb
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
import xgboost as xgb

# モデル評価・前処理関連
from sklearn.model_selection import train_test_split, GridSearchCV, learning_curve, cross_val_score, KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

# ハイパーパラメータ最適化関連
import optuna
from optuna.integration import LightGBMPruningCallback, XGBoostPruningCallback
import optuna.visualization as vis

# その他のユーティリティ
import shap
import joblib

# 乱数のシードを固定
SEED = 42
np.random.seed(SEED)
random.seed(SEED)

# 警告を無視
warnings.filterwarnings("ignore")


In [5]:
# データの読み込み
all_data = pd.read_csv('../data_processed/all_data1.csv')

train_data = all_data[0:27532]
X = train_data.drop(['price'], axis=1)
y = train_data['price']

# データの分割
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)

In [7]:
# CatBoost
cat_model = CatBoostRegressor(random_seed=42, verbose=0)
cat_model.fit(X_train, y_train, cat_features=[])
y_pred_cat = cat_model.predict(X_val)
mape_cat = mean_absolute_percentage_error(y_val, y_pred_cat)
print(f"1. CatBoost:\n   - Mean Absolute Percentage Error (MAPE): {mape_cat:.2f}%\n")

# LightGBM
lgb_model = LGBMRegressor(random_state=42, verbose=-1)  
lgb_model.fit(X_train, y_train)
y_pred_lgb = lgb_model.predict(X_val)
mape_lgb = mean_absolute_percentage_error(y_val, y_pred_lgb)
print(f"2. LightGBM:\n   - Mean Absolute Percentage Error (MAPE): {mape_lgb:.2f}%\n")

# XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train)
dval = xgb.DMatrix(X_val)
xgb_model = xgb.train({}, dtrain, verbose_eval=False)  
y_pred_xgb = xgb_model.predict(dval)
mape_xgb = mean_absolute_percentage_error(y_val, y_pred_xgb)
print(f"3. XGBoost:\n   - Mean Absolute Percentage Error (MAPE): {mape_xgb:.2f}%\n")

# 平均のMAPE
average_mape = (mape_cat + mape_lgb + mape_xgb) / 3
print(f"Average MAPE across all models: {average_mape:.2f}%")


1. CatBoost:
   - Mean Absolute Percentage Error (MAPE): 0.66%

2. LightGBM:
   - Mean Absolute Percentage Error (MAPE): 0.66%

3. XGBoost:
   - Mean Absolute Percentage Error (MAPE): 0.65%

Average MAPE across all models: 0.66%
