In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-3.3.0-py3-none-any.whl (404 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.2/404.2 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.11.3-py3-none-any.whl (225 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m225.4/225.4 kB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cmaes>=0.10.0 (from optuna)
  Downloading cmaes-0.10.0-py3-none-any.whl (29 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.2.4-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, cmaes, alembic, optuna
Successfully installed Mako-1.2.4 alembic-1.11.3 cmaes-0.10.0 colorlog-6.7.0 optuna-3.3.0


In [None]:
# ===================================================================
#  Library
# ===================================================================
import pandas as pd
from sklearn.metrics import mean_absolute_percentage_error

import warnings
warnings.simplefilter("ignore")

import optuna
import numpy as np

In [None]:
# ===================================================================
#  CFG
# ===================================================================
class CFG:
    seed = 42
    n_seeds = 3
    n_trials = 2000
    save_dir = "/content/drive/MyDrive/Colab Notebooks/signate2023/exp/"
    data_dir = "/content/drive/MyDrive/Colab Notebooks/signate2023/"
    filename = "exp00054"

In [None]:
# ===================================================================
#  Utils
# ===================================================================
def get_score(y_true, y_pred):
    """get MAPE score"""
    score = mean_absolute_percentage_error(y_true, y_pred)
    return score * 100

In [None]:
df_train = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/signate2023/train.csv')
kun_test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/signate2023/exp/kun_exp00052.csv')
kun_oof = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/signate2023/exp/kun_exp00052_oof_pred.csv')
yuji_test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/signate2023/exp/exp050.csv')
yuji_oof = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/signate2023/exp/oof_df_exp050.csv')
df = pd.concat([kun_oof.merge(yuji_oof, on='id'), df_train['price']], axis=1)
test = kun_test.merge(yuji_test, on='id')

In [None]:
df['pred_0']

0         6881.892301
1         3740.634027
2         2954.247573
3         8430.949224
4         3972.418866
             ...     
27527    12650.381284
27528     6494.944663
27529    12231.954239
27530     6832.933932
27531     9925.656545
Name: pred_0, Length: 27532, dtype: float64

In [None]:
# ===================================================================
#  optuna
# ===================================================================
def objective(trial):
    # あなたの予測結果の列名のプレフィックスと数
    kun_prefix = "kun_pred_"
    kun_num = 10

    # もう一人の人の予測結果の列名のプレフィックスと数
    other_prefix = "pred_"
    other_num = 30

    weights = []

    # あなたの予測結果に対する重みを動的に取得
    for idx in range(kun_num):
        weight = trial.suggest_float(f"w_kun_{idx}", 1e-8, 1, log=True)
        weights.append(weight)

    # もう一人の予測結果に対する重みを動的に取得
    for idx in range(other_num):
        weight = trial.suggest_float(f"w_other_{idx}", 1e-8, 1, log=True)
        weights.append(weight)

    # df["pred"]を動的に計算
    kun_preds = [df[f"{kun_prefix}{idx}"] * weights[idx] for idx in range(kun_num)]
    other_preds = [df[f"{other_prefix}{idx}"] * weights[idx+kun_num] for idx in range(other_num)]

    df["pred"] = sum(kun_preds + other_preds)

    score = get_score(y_true=df["price"], y_pred = df["pred"])
    return score


optuna.logging.set_verbosity(optuna.logging.WARNING)

# シードのリストを定義
seeds = [seed for seed in range(CFG.seed, CFG.seed+CFG.n_seeds)]

best_values = []
best_params_list = []

for seed in seeds:
    study = optuna.create_study(
        direction="minimize",
        sampler=optuna.samplers.TPESampler(seed=seed)
    )
    study.optimize(objective,
                   n_trials=CFG.n_trials,
                   n_jobs = -1,
                   show_progress_bar=True)

    best_value = study.best_value
    best_params = study.best_params

    best_values.append(best_value)
    best_params_list.append(best_params)

    print(f"Seed: {seed}, Best Value: {best_value}, Best Params: {best_params}")


# 最も小さい best_value を持つ Study を探索
best_index = np.argmin(best_values)
best_params_final = best_params_list[best_index]
best_value_final = best_values[best_index]

print("Final Best Value:", best_value_final)
print("Final Best Params:", best_params_final)

  0%|          | 0/2000 [00:00<?, ?it/s]

Seed: 42, Best Value: 43.71064136959435, Best Params: {'w_kun_0': 1.7694236509772962e-06, 'w_kun_1': 0.0024842406528704777, 'w_kun_2': 0.05278100841038327, 'w_kun_3': 6.0789373289108714e-05, 'w_kun_4': 6.802368512732099e-07, 'w_kun_5': 2.136662152779773e-06, 'w_kun_6': 0.0019498499133363512, 'w_kun_7': 1.730065505141195e-08, 'w_kun_8': 0.0033603529614627697, 'w_kun_9': 0.2430195556291224, 'w_other_0': 0.02922695801686354, 'w_other_1': 0.0007770389602813432, 'w_other_2': 0.00015152681147707705, 'w_other_3': 0.0005863192021648081, 'w_other_4': 0.011971562477146752, 'w_other_5': 4.3079078182749995e-08, 'w_other_6': 7.819115247247191e-06, 'w_other_7': 0.012283002062184802, 'w_other_8': 0.00015136045985770084, 'w_other_9': 0.017016779988806977, 'w_other_10': 3.937670710248774e-05, 'w_other_11': 1.3867300097494924e-08, 'w_other_12': 0.00029508873623179595, 'w_other_13': 3.745723684665617e-06, 'w_other_14': 6.914569864025614e-08, 'w_other_15': 0.0004949194551507616, 'w_other_16': 0.0003815554

  0%|          | 0/2000 [00:00<?, ?it/s]

Seed: 43, Best Value: 43.70290765986362, Best Params: {'w_kun_0': 2.0118508526849078e-07, 'w_kun_1': 5.071207363614973e-08, 'w_kun_2': 1.0072855645242633e-08, 'w_kun_3': 6.898590878712066e-08, 'w_kun_4': 0.004417936573129908, 'w_kun_5': 1.541981518121278e-05, 'w_kun_6': 0.024250631658890753, 'w_kun_7': 1.4035626409679278e-06, 'w_kun_8': 0.0005543206460396077, 'w_kun_9': 2.4926945595655434e-08, 'w_other_0': 2.519990876304773e-06, 'w_other_1': 0.09791418038241322, 'w_other_2': 2.5287722245548474e-08, 'w_other_3': 0.0017720358457928196, 'w_other_4': 1.782394537971028e-06, 'w_other_5': 0.1313570368172729, 'w_other_6': 0.0022307350709383704, 'w_other_7': 2.9407542615022264e-06, 'w_other_8': 0.0041894772870649435, 'w_other_9': 1.8603402468359464e-06, 'w_other_10': 0.0011728268881912676, 'w_other_11': 0.015637799224078763, 'w_other_12': 0.2973878442118673, 'w_other_13': 0.01254591192873523, 'w_other_14': 2.616886499635297e-07, 'w_other_15': 5.54039599848125e-06, 'w_other_16': 0.01635220366813

  0%|          | 0/2000 [00:00<?, ?it/s]

Seed: 44, Best Value: 43.68156816487425, Best Params: {'w_kun_0': 0.02688117478780046, 'w_kun_1': 0.10415961960220019, 'w_kun_2': 8.023526204623011e-07, 'w_kun_3': 0.0005112706033871676, 'w_kun_4': 8.087485823943694e-05, 'w_kun_5': 0.01551431763048836, 'w_kun_6': 0.0019826614003664055, 'w_kun_7': 0.0002598770441063838, 'w_kun_8': 2.496524661498506e-06, 'w_kun_9': 0.004260027789180974, 'w_other_0': 0.0005357685068235267, 'w_other_1': 1.563671766624388e-06, 'w_other_2': 0.0002485136351309052, 'w_other_3': 3.531337833017823e-05, 'w_other_4': 3.58626699414083e-06, 'w_other_5': 0.0008540232421422873, 'w_other_6': 1.6240163606254575e-05, 'w_other_7': 2.4458768777906662e-05, 'w_other_8': 0.002047715853358741, 'w_other_9': 1.2020514228447977e-07, 'w_other_10': 0.19262562408825878, 'w_other_11': 1.3397521073392131e-05, 'w_other_12': 1.0417297027764192e-08, 'w_other_13': 0.14910284758185324, 'w_other_14': 3.5624858847077254e-05, 'w_other_15': 3.6145310773702675e-08, 'w_other_16': 1.7197279091531

In [14]:
# Optunaで取得した最適な重みパラメータを元に、アンサンブルの予測値を計算
kun_preds = [test[f"kun_pred_{idx}"] * best_params_final[f"w_kun_{idx}"] for idx in range(10)]
other_preds = [test[f"pred_{idx}"] * best_params_final[f"w_other_{idx}"] for idx in range(30)]
test["pred"] = sum(kun_preds + other_preds)

# 結果をCSVファイルとして保存
test[["id", "pred"]].to_csv(CFG.save_dir + f"kun_{CFG.filename}.csv", index=False, header=None)